Compare commits
17 Commits
feat/crd-a
...
feat/ceph-
| Author | SHA1 | Date | |
|---|---|---|---|
| ce5e5ea6ab | |||
| cd3ea6fc10 | |||
| 89eb88d10e | |||
| d1a274b705 | |||
| b43ca7c740 | |||
|
|
67f3a23071 | ||
| d86970f81b | |||
| 623a3f019b | |||
|
|
bd214f8fb8 | ||
| f0ed548755 | |||
| 1de96027a1 | |||
| 0812937a67 | |||
| 29a261575b | |||
| dcf8335240 | |||
|
|
f876b5e67b | ||
| 440c1bce12 | |||
| 024084859e |
@@ -9,7 +9,7 @@ jobs:
|
||||
check:
|
||||
runs-on: docker
|
||||
container:
|
||||
image: hub.nationtech.io/harmony/harmony_composer:latest@sha256:eb0406fcb95c63df9b7c4b19bc50ad7914dd8232ce98e9c9abef628e07c69386
|
||||
image: hub.nationtech.io/harmony/harmony_composer:latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
@@ -7,7 +7,7 @@ on:
|
||||
jobs:
|
||||
package_harmony_composer:
|
||||
container:
|
||||
image: hub.nationtech.io/harmony/harmony_composer:latest@sha256:eb0406fcb95c63df9b7c4b19bc50ad7914dd8232ce98e9c9abef628e07c69386
|
||||
image: hub.nationtech.io/harmony/harmony_composer:latest
|
||||
runs-on: dind
|
||||
steps:
|
||||
- name: Checkout code
|
||||
|
||||
86
Cargo.lock
generated
86
Cargo.lock
generated
@@ -96,6 +96,12 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ansi_term"
|
||||
version = "0.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b3568b48b7cefa6b8ce125f9bb4989e52fbcc29ebea88df04cc7c5f12f70455"
|
||||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "0.6.19"
|
||||
@@ -1259,6 +1265,18 @@ dependencies = [
|
||||
name = "example"
|
||||
version = "0.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "example-application-monitoring-with-tenant"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"env_logger",
|
||||
"harmony",
|
||||
"harmony_cli",
|
||||
"logging",
|
||||
"tokio",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "example-cli"
|
||||
version = "0.1.0"
|
||||
@@ -1779,6 +1797,7 @@ dependencies = [
|
||||
"k3d-rs",
|
||||
"k8s-openapi",
|
||||
"kube",
|
||||
"kube-derive",
|
||||
"lazy_static",
|
||||
"libredfish",
|
||||
"log",
|
||||
@@ -1791,6 +1810,7 @@ dependencies = [
|
||||
"reqwest 0.11.27",
|
||||
"russh",
|
||||
"rust-ipmi",
|
||||
"schemars 0.8.22",
|
||||
"semver",
|
||||
"serde",
|
||||
"serde-value",
|
||||
@@ -2669,6 +2689,7 @@ dependencies = [
|
||||
"k8s-openapi",
|
||||
"kube-client",
|
||||
"kube-core",
|
||||
"kube-derive",
|
||||
"kube-runtime",
|
||||
]
|
||||
|
||||
@@ -2722,12 +2743,27 @@ dependencies = [
|
||||
"http 1.3.1",
|
||||
"json-patch",
|
||||
"k8s-openapi",
|
||||
"schemars 0.8.22",
|
||||
"serde",
|
||||
"serde-value",
|
||||
"serde_json",
|
||||
"thiserror 2.0.12",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kube-derive"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "079fc8c1c397538628309cfdee20696ebdcc26745f9fb17f89b78782205bd995"
|
||||
dependencies = [
|
||||
"darling",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kube-runtime"
|
||||
version = "1.1.0"
|
||||
@@ -2843,6 +2879,15 @@ dependencies = [
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "logging"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "461a8beca676e8ab1bd468c92e9b4436d6368e11e96ae038209e520cfe665e46"
|
||||
dependencies = [
|
||||
"ansi_term",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lru"
|
||||
version = "0.12.5"
|
||||
@@ -4140,6 +4185,18 @@ dependencies = [
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "schemars"
|
||||
version = "0.8.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615"
|
||||
dependencies = [
|
||||
"dyn-clone",
|
||||
"schemars_derive",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "schemars"
|
||||
version = "0.9.0"
|
||||
@@ -4154,9 +4211,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "schemars"
|
||||
version = "1.0.3"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1375ba8ef45a6f15d83fa8748f1079428295d403d6ea991d09ab100155fbc06d"
|
||||
checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0"
|
||||
dependencies = [
|
||||
"dyn-clone",
|
||||
"ref-cast",
|
||||
@@ -4164,6 +4221,18 @@ dependencies = [
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "schemars_derive"
|
||||
version = "0.8.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"serde_derive_internals",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.2.0"
|
||||
@@ -4296,6 +4365,17 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive_internals"
|
||||
version = "0.29.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.140"
|
||||
@@ -4374,7 +4454,7 @@ dependencies = [
|
||||
"indexmap 1.9.3",
|
||||
"indexmap 2.10.0",
|
||||
"schemars 0.9.0",
|
||||
"schemars 1.0.3",
|
||||
"schemars 1.0.4",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
|
||||
@@ -13,6 +13,7 @@ WORKDIR /app
|
||||
RUN rustup target add x86_64-pc-windows-gnu
|
||||
RUN rustup target add x86_64-unknown-linux-gnu
|
||||
RUN rustup component add rustfmt
|
||||
RUN rustup component add clippy
|
||||
|
||||
RUN apt update
|
||||
|
||||
|
||||
2
check.sh
2
check.sh
@@ -1,5 +1,7 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
cargo check --all-targets --all-features --keep-going
|
||||
cargo fmt --check
|
||||
cargo clippy
|
||||
cargo test
|
||||
|
||||
14
examples/application_monitoring_with_tenant/Cargo.toml
Normal file
14
examples/application_monitoring_with_tenant/Cargo.toml
Normal file
@@ -0,0 +1,14 @@
|
||||
[package]
|
||||
name = "example-application-monitoring-with-tenant"
|
||||
edition = "2024"
|
||||
version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
env_logger.workspace = true
|
||||
harmony = { version = "0.1.0", path = "../../harmony" }
|
||||
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
|
||||
logging = "0.1.0"
|
||||
tokio.workspace = true
|
||||
url.workspace = true
|
||||
55
examples/application_monitoring_with_tenant/src/main.rs
Normal file
55
examples/application_monitoring_with_tenant/src/main.rs
Normal file
@@ -0,0 +1,55 @@
|
||||
use std::{path::PathBuf, str::FromStr, sync::Arc};
|
||||
|
||||
use harmony::{
|
||||
data::Id,
|
||||
inventory::Inventory,
|
||||
modules::{
|
||||
application::{ApplicationScore, RustWebFramework, RustWebapp, features::Monitoring},
|
||||
monitoring::alert_channel::webhook_receiver::WebhookReceiver,
|
||||
tenant::TenantScore,
|
||||
},
|
||||
topology::{K8sAnywhereTopology, Url, tenant::TenantConfig},
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
//TODO there is a bug where the application is deployed into the namespace matching the
|
||||
//application name and the tenant is created in the namesapce matching the tenant name
|
||||
//in order for the application to be deployed in the tenant namespace the application.name and
|
||||
//the TenantConfig.name must match
|
||||
let tenant = TenantScore {
|
||||
config: TenantConfig {
|
||||
id: Id::from_str("test-tenant-id").unwrap(),
|
||||
name: "example-monitoring".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
};
|
||||
let application = Arc::new(RustWebapp {
|
||||
name: "example-monitoring".to_string(),
|
||||
domain: Url::Url(url::Url::parse("https://rustapp.harmony.example.com").unwrap()),
|
||||
project_root: PathBuf::from("./examples/rust/webapp"),
|
||||
framework: Some(RustWebFramework::Leptos),
|
||||
});
|
||||
|
||||
let webhook_receiver = WebhookReceiver {
|
||||
name: "sample-webhook-receiver".to_string(),
|
||||
url: Url::Url(url::Url::parse("https://webhook-doesnt-exist.com").unwrap()),
|
||||
};
|
||||
|
||||
let app = ApplicationScore {
|
||||
features: vec![Box::new(Monitoring {
|
||||
alert_receiver: vec![Box::new(webhook_receiver)],
|
||||
application: application.clone(),
|
||||
})],
|
||||
application,
|
||||
};
|
||||
|
||||
harmony_cli::run(
|
||||
Inventory::autoload(),
|
||||
K8sAnywhereTopology::from_env(),
|
||||
vec![Box::new(tenant), Box::new(app)],
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
@@ -125,40 +125,47 @@ spec:
|
||||
name: nginx"#,
|
||||
)
|
||||
.unwrap();
|
||||
return deployment;
|
||||
deployment
|
||||
}
|
||||
fn nginx_deployment_2() -> Deployment {
|
||||
let mut pod_template = PodTemplateSpec::default();
|
||||
pod_template.metadata = Some(ObjectMeta {
|
||||
let pod_template = PodTemplateSpec {
|
||||
metadata: Some(ObjectMeta {
|
||||
labels: Some(BTreeMap::from([(
|
||||
"app".to_string(),
|
||||
"nginx-test".to_string(),
|
||||
)])),
|
||||
..Default::default()
|
||||
});
|
||||
pod_template.spec = Some(PodSpec {
|
||||
}),
|
||||
spec: Some(PodSpec {
|
||||
containers: vec![Container {
|
||||
name: "nginx".to_string(),
|
||||
image: Some("nginx".to_string()),
|
||||
..Default::default()
|
||||
}],
|
||||
..Default::default()
|
||||
});
|
||||
let mut spec = DeploymentSpec::default();
|
||||
spec.template = pod_template;
|
||||
spec.selector = LabelSelector {
|
||||
}),
|
||||
};
|
||||
|
||||
let spec = DeploymentSpec {
|
||||
template: pod_template,
|
||||
selector: LabelSelector {
|
||||
match_expressions: None,
|
||||
match_labels: Some(BTreeMap::from([(
|
||||
"app".to_string(),
|
||||
"nginx-test".to_string(),
|
||||
)])),
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut deployment = Deployment::default();
|
||||
deployment.spec = Some(spec);
|
||||
deployment.metadata.name = Some("nginx-test".to_string());
|
||||
|
||||
deployment
|
||||
Deployment {
|
||||
spec: Some(spec),
|
||||
metadata: ObjectMeta {
|
||||
name: Some("nginx-test".to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
fn nginx_deployment() -> Deployment {
|
||||
|
||||
@@ -23,7 +23,7 @@ async fn main() {
|
||||
// This config can be extended as needed for more complicated configurations
|
||||
config: LAMPConfig {
|
||||
project_root: "./php".into(),
|
||||
database_size: format!("4Gi").into(),
|
||||
database_size: "4Gi".to_string().into(),
|
||||
..Default::default()
|
||||
},
|
||||
};
|
||||
|
||||
@@ -50,8 +50,8 @@ async fn main() {
|
||||
|
||||
let service_monitor_endpoint = ServiceMonitorEndpoint {
|
||||
port: Some("80".to_string()),
|
||||
path: "/metrics".to_string(),
|
||||
scheme: HTTPScheme::HTTP,
|
||||
path: Some("/metrics".to_string()),
|
||||
scheme: Some(HTTPScheme::HTTP),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::collections::HashMap;
|
||||
use std::{collections::HashMap, str::FromStr};
|
||||
|
||||
use harmony::{
|
||||
data::Id,
|
||||
@@ -28,7 +28,7 @@ use harmony::{
|
||||
async fn main() {
|
||||
let tenant = TenantScore {
|
||||
config: TenantConfig {
|
||||
id: Id::from_string("1234".to_string()),
|
||||
id: Id::from_str("1234").unwrap(),
|
||||
name: "test-tenant".to_string(),
|
||||
resource_limits: ResourceLimits {
|
||||
cpu_request_cores: 6.0,
|
||||
@@ -53,8 +53,8 @@ async fn main() {
|
||||
|
||||
let service_monitor_endpoint = ServiceMonitorEndpoint {
|
||||
port: Some("80".to_string()),
|
||||
path: "/metrics".to_string(),
|
||||
scheme: HTTPScheme::HTTP,
|
||||
path: Some("/metrics".to_string()),
|
||||
scheme: Some(HTTPScheme::HTTP),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
|
||||
12
examples/remove_rook_osd/Cargo.toml
Normal file
12
examples/remove_rook_osd/Cargo.toml
Normal file
@@ -0,0 +1,12 @@
|
||||
[package]
|
||||
name = "example_remove_rook_osd"
|
||||
edition = "2024"
|
||||
version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
harmony = { version = "0.1.0", path = "../../harmony" }
|
||||
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
|
||||
harmony_tui = { version = "0.1.0", path = "../../harmony_tui" }
|
||||
tokio.workspace = true
|
||||
18
examples/remove_rook_osd/src/main.rs
Normal file
18
examples/remove_rook_osd/src/main.rs
Normal file
@@ -0,0 +1,18 @@
|
||||
use harmony::{
|
||||
inventory::Inventory, modules::storage::ceph::ceph_remove_osd_score::CephRemoveOsd,
|
||||
topology::K8sAnywhereTopology,
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let ceph_score = CephRemoveOsd {
|
||||
osd_deployment_name: "rook-ceph-osd-2".to_string(),
|
||||
rook_ceph_namespace: "rook-ceph".to_string(),
|
||||
};
|
||||
|
||||
let topology = K8sAnywhereTopology::from_env();
|
||||
let inventory = Inventory::autoload();
|
||||
harmony_cli::run(inventory, topology, vec![Box::new(ceph_score)], None)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
@@ -2,10 +2,15 @@ use std::{path::PathBuf, sync::Arc};
|
||||
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
modules::application::{
|
||||
modules::{
|
||||
application::{
|
||||
ApplicationScore, RustWebFramework, RustWebapp,
|
||||
features::{ContinuousDelivery, Monitoring},
|
||||
},
|
||||
monitoring::alert_channel::{
|
||||
discord_alert_channel::DiscordWebhook, webhook_receiver::WebhookReceiver,
|
||||
},
|
||||
},
|
||||
topology::{K8sAnywhereTopology, Url},
|
||||
};
|
||||
|
||||
@@ -18,6 +23,16 @@ async fn main() {
|
||||
framework: Some(RustWebFramework::Leptos),
|
||||
});
|
||||
|
||||
let discord_receiver = DiscordWebhook {
|
||||
name: "test-discord".to_string(),
|
||||
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
|
||||
};
|
||||
|
||||
let webhook_receiver = WebhookReceiver {
|
||||
name: "sample-webhook-receiver".to_string(),
|
||||
url: Url::Url(url::Url::parse("https://webhook-doesnt-exist.com").unwrap()),
|
||||
};
|
||||
|
||||
let app = ApplicationScore {
|
||||
features: vec![
|
||||
Box::new(ContinuousDelivery {
|
||||
@@ -25,7 +40,9 @@ async fn main() {
|
||||
}),
|
||||
Box::new(Monitoring {
|
||||
application: application.clone(),
|
||||
}), // TODO: add backups, multisite ha, etc.
|
||||
alert_receiver: vec![Box::new(discord_receiver), Box::new(webhook_receiver)],
|
||||
}),
|
||||
// TODO add backups, multisite ha, etc
|
||||
],
|
||||
application,
|
||||
};
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use harmony::{
|
||||
data::Id,
|
||||
inventory::Inventory,
|
||||
@@ -9,7 +11,7 @@ use harmony::{
|
||||
async fn main() {
|
||||
let tenant = TenantScore {
|
||||
config: TenantConfig {
|
||||
id: Id::from_str("test-tenant-id"),
|
||||
id: Id::from_str("test-tenant-id").unwrap(),
|
||||
name: "testtenant".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
|
||||
@@ -5,6 +5,9 @@ version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[features]
|
||||
testing = []
|
||||
|
||||
[dependencies]
|
||||
rand = "0.9"
|
||||
hex = "0.4"
|
||||
@@ -27,7 +30,7 @@ harmony_macros = { path = "../harmony_macros" }
|
||||
harmony_types = { path = "../harmony_types" }
|
||||
uuid.workspace = true
|
||||
url.workspace = true
|
||||
kube.workspace = true
|
||||
kube = { workspace = true, features = ["derive"] }
|
||||
k8s-openapi.workspace = true
|
||||
serde_yaml.workspace = true
|
||||
http.workspace = true
|
||||
@@ -58,6 +61,8 @@ tokio-util = "0.7.15"
|
||||
strum = { version = "0.27.1", features = ["derive"] }
|
||||
tempfile = "3.20.0"
|
||||
serde_with = "3.14.0"
|
||||
schemars = "0.8.22"
|
||||
kube-derive = "1.1.0"
|
||||
bollard.workspace = true
|
||||
tar.workspace = true
|
||||
base64.workspace = true
|
||||
|
||||
@@ -11,5 +11,5 @@ lazy_static! {
|
||||
pub static ref REGISTRY_PROJECT: String =
|
||||
std::env::var("HARMONY_REGISTRY_PROJECT").unwrap_or_else(|_| "harmony".to_string());
|
||||
pub static ref DRY_RUN: bool =
|
||||
std::env::var("HARMONY_DRY_RUN").map_or(true, |value| value.parse().unwrap_or(true));
|
||||
std::env::var("HARMONY_DRY_RUN").is_ok_and(|value| value.parse().unwrap_or(false));
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use rand::distr::Alphanumeric;
|
||||
use rand::distr::SampleString;
|
||||
use std::str::FromStr;
|
||||
use std::time::SystemTime;
|
||||
use std::time::UNIX_EPOCH;
|
||||
|
||||
@@ -23,13 +24,13 @@ pub struct Id {
|
||||
value: String,
|
||||
}
|
||||
|
||||
impl Id {
|
||||
pub fn from_string(value: String) -> Self {
|
||||
Self { value }
|
||||
}
|
||||
impl FromStr for Id {
|
||||
type Err = ();
|
||||
|
||||
pub fn from_str(value: &str) -> Self {
|
||||
Self::from_string(value.to_string())
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(Id {
|
||||
value: s.to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ impl serde::Serialize for Version {
|
||||
|
||||
impl std::fmt::Display for Version {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
return self.value.fmt(f);
|
||||
self.value.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -35,10 +35,9 @@ impl PhysicalHost {
|
||||
|
||||
pub fn cluster_mac(&self) -> MacAddress {
|
||||
self.network
|
||||
.get(0)
|
||||
.first()
|
||||
.expect("Cluster physical host should have a network interface")
|
||||
.mac_address
|
||||
.clone()
|
||||
}
|
||||
|
||||
pub fn cpu(mut self, cpu_count: Option<u64>) -> Self {
|
||||
|
||||
@@ -2,28 +2,42 @@ use log::debug;
|
||||
use once_cell::sync::Lazy;
|
||||
use tokio::sync::broadcast;
|
||||
|
||||
use super::interpret::{InterpretError, Outcome};
|
||||
use crate::modules::application::ApplicationFeatureStatus;
|
||||
|
||||
use super::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
topology::TopologyStatus,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum HarmonyEvent {
|
||||
HarmonyStarted,
|
||||
PrepareTopologyStarted {
|
||||
topology: String,
|
||||
},
|
||||
TopologyPrepared {
|
||||
topology: String,
|
||||
outcome: Outcome,
|
||||
},
|
||||
HarmonyFinished,
|
||||
InterpretExecutionStarted {
|
||||
execution_id: String,
|
||||
topology: String,
|
||||
interpret: String,
|
||||
score: String,
|
||||
message: String,
|
||||
},
|
||||
InterpretExecutionFinished {
|
||||
execution_id: String,
|
||||
topology: String,
|
||||
interpret: String,
|
||||
score: String,
|
||||
outcome: Result<Outcome, InterpretError>,
|
||||
},
|
||||
TopologyStateChanged {
|
||||
topology: String,
|
||||
status: TopologyStatus,
|
||||
message: Option<String>,
|
||||
},
|
||||
ApplicationFeatureStateChanged {
|
||||
topology: String,
|
||||
application: String,
|
||||
feature: String,
|
||||
status: ApplicationFeatureStatus,
|
||||
},
|
||||
}
|
||||
|
||||
static HARMONY_EVENT_BUS: Lazy<broadcast::Sender<HarmonyEvent>> = Lazy::new(|| {
|
||||
@@ -33,10 +47,15 @@ static HARMONY_EVENT_BUS: Lazy<broadcast::Sender<HarmonyEvent>> = Lazy::new(|| {
|
||||
});
|
||||
|
||||
pub fn instrument(event: HarmonyEvent) -> Result<(), &'static str> {
|
||||
if cfg!(any(test, feature = "testing")) {
|
||||
let _ = event; // Suppress the "unused variable" warning for `event`
|
||||
Ok(())
|
||||
} else {
|
||||
match HARMONY_EVENT_BUS.send(event) {
|
||||
Ok(_) => Ok(()),
|
||||
Err(_) => Err("send error: no subscribers"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn subscribe<F, Fut>(name: &str, mut handler: F)
|
||||
|
||||
@@ -7,6 +7,7 @@ use super::{
|
||||
data::{Id, Version},
|
||||
executors::ExecutorError,
|
||||
inventory::Inventory,
|
||||
topology::PreparationError,
|
||||
};
|
||||
|
||||
pub enum InterpretName {
|
||||
@@ -23,6 +24,14 @@ pub enum InterpretName {
|
||||
TenantInterpret,
|
||||
Application,
|
||||
ArgoCD,
|
||||
Alerting,
|
||||
Ntfy,
|
||||
HelmChart,
|
||||
HelmCommand,
|
||||
K8sResource,
|
||||
Lamp,
|
||||
ApplicationMonitoring,
|
||||
K8sPrometheusCrdAlerting,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for InterpretName {
|
||||
@@ -41,6 +50,14 @@ impl std::fmt::Display for InterpretName {
|
||||
InterpretName::TenantInterpret => f.write_str("Tenant"),
|
||||
InterpretName::Application => f.write_str("Application"),
|
||||
InterpretName::ArgoCD => f.write_str("ArgoCD"),
|
||||
InterpretName::Alerting => f.write_str("Alerting"),
|
||||
InterpretName::Ntfy => f.write_str("Ntfy"),
|
||||
InterpretName::HelmChart => f.write_str("HelmChart"),
|
||||
InterpretName::HelmCommand => f.write_str("HelmCommand"),
|
||||
InterpretName::K8sResource => f.write_str("K8sResource"),
|
||||
InterpretName::Lamp => f.write_str("LAMP"),
|
||||
InterpretName::ApplicationMonitoring => f.write_str("ApplicationMonitoring"),
|
||||
InterpretName::K8sPrometheusCrdAlerting => f.write_str("K8sPrometheusCrdAlerting"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -113,6 +130,14 @@ impl std::fmt::Display for InterpretError {
|
||||
}
|
||||
impl Error for InterpretError {}
|
||||
|
||||
impl From<PreparationError> for InterpretError {
|
||||
fn from(value: PreparationError) -> Self {
|
||||
Self {
|
||||
msg: format!("InterpretError : {value}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ExecutorError> for InterpretError {
|
||||
fn from(value: ExecutorError) -> Self {
|
||||
Self {
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
use std::sync::{Arc, Mutex, RwLock};
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use log::{debug, warn};
|
||||
|
||||
use crate::instrumentation::{self, HarmonyEvent};
|
||||
use crate::topology::TopologyStatus;
|
||||
|
||||
use super::{
|
||||
interpret::{InterpretError, InterpretStatus, Outcome},
|
||||
interpret::{InterpretError, Outcome},
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::Topology,
|
||||
topology::{PreparationError, PreparationOutcome, Topology, TopologyState},
|
||||
};
|
||||
|
||||
type ScoreVec<T> = Vec<Box<dyn Score<T>>>;
|
||||
@@ -17,7 +17,7 @@ pub struct Maestro<T: Topology> {
|
||||
inventory: Inventory,
|
||||
topology: T,
|
||||
scores: Arc<RwLock<ScoreVec<T>>>,
|
||||
topology_preparation_result: Mutex<Option<Outcome>>,
|
||||
topology_state: TopologyState,
|
||||
}
|
||||
|
||||
impl<T: Topology> Maestro<T> {
|
||||
@@ -25,42 +25,47 @@ impl<T: Topology> Maestro<T> {
|
||||
///
|
||||
/// This should rarely be used. Most of the time Maestro::initialize should be used instead.
|
||||
pub fn new_without_initialization(inventory: Inventory, topology: T) -> Self {
|
||||
let topology_name = topology.name().to_string();
|
||||
|
||||
Self {
|
||||
inventory,
|
||||
topology,
|
||||
scores: Arc::new(RwLock::new(Vec::new())),
|
||||
topology_preparation_result: None.into(),
|
||||
topology_state: TopologyState::new(topology_name),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn initialize(inventory: Inventory, topology: T) -> Result<Self, InterpretError> {
|
||||
let instance = Self::new_without_initialization(inventory, topology);
|
||||
pub async fn initialize(inventory: Inventory, topology: T) -> Result<Self, PreparationError> {
|
||||
let mut instance = Self::new_without_initialization(inventory, topology);
|
||||
instance.prepare_topology().await?;
|
||||
Ok(instance)
|
||||
}
|
||||
|
||||
/// Ensures the associated Topology is ready for operations.
|
||||
/// Delegates the readiness check and potential setup actions to the Topology.
|
||||
pub async fn prepare_topology(&self) -> Result<Outcome, InterpretError> {
|
||||
instrumentation::instrument(HarmonyEvent::PrepareTopologyStarted {
|
||||
topology: self.topology.name().to_string(),
|
||||
})
|
||||
.unwrap();
|
||||
async fn prepare_topology(&mut self) -> Result<PreparationOutcome, PreparationError> {
|
||||
self.topology_state.prepare();
|
||||
|
||||
let outcome = self.topology.ensure_ready().await?;
|
||||
let result = self.topology.ensure_ready().await;
|
||||
|
||||
instrumentation::instrument(HarmonyEvent::TopologyPrepared {
|
||||
topology: self.topology.name().to_string(),
|
||||
outcome: outcome.clone(),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
self.topology_preparation_result
|
||||
.lock()
|
||||
.unwrap()
|
||||
.replace(outcome.clone());
|
||||
match result {
|
||||
Ok(outcome) => {
|
||||
match outcome.clone() {
|
||||
PreparationOutcome::Success { details } => {
|
||||
self.topology_state.success(details);
|
||||
}
|
||||
PreparationOutcome::Noop => {
|
||||
self.topology_state.noop();
|
||||
}
|
||||
};
|
||||
Ok(outcome)
|
||||
}
|
||||
Err(err) => {
|
||||
self.topology_state.error(err.to_string());
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn register_all(&mut self, mut scores: ScoreVec<T>) {
|
||||
let mut score_mut = self.scores.write().expect("Should acquire lock");
|
||||
@@ -68,15 +73,7 @@ impl<T: Topology> Maestro<T> {
|
||||
}
|
||||
|
||||
fn is_topology_initialized(&self) -> bool {
|
||||
let result = self.topology_preparation_result.lock().unwrap();
|
||||
if let Some(outcome) = result.as_ref() {
|
||||
match outcome.status {
|
||||
InterpretStatus::SUCCESS => return true,
|
||||
_ => return false,
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
self.topology_state.status == TopologyStatus::Success
|
||||
}
|
||||
|
||||
pub async fn interpret(&self, score: Box<dyn Score<T>>) -> Result<Outcome, InterpretError> {
|
||||
@@ -87,10 +84,8 @@ impl<T: Topology> Maestro<T> {
|
||||
self.topology.name(),
|
||||
);
|
||||
}
|
||||
debug!("Running score {score:?}");
|
||||
let interpret = score.create_interpret();
|
||||
debug!("Launching interpret {interpret:?}");
|
||||
let result = interpret.execute(&self.inventory, &self.topology).await;
|
||||
debug!("Interpreting score {score:?}");
|
||||
let result = score.interpret(&self.inventory, &self.topology).await;
|
||||
debug!("Got result {result:?}");
|
||||
result
|
||||
}
|
||||
|
||||
@@ -1,22 +1,62 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::Serialize;
|
||||
use serde_value::Value;
|
||||
|
||||
use super::{interpret::Interpret, topology::Topology};
|
||||
use super::{
|
||||
data::Id,
|
||||
instrumentation::{self, HarmonyEvent},
|
||||
interpret::{Interpret, InterpretError, Outcome},
|
||||
inventory::Inventory,
|
||||
topology::Topology,
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
pub trait Score<T: Topology>:
|
||||
std::fmt::Debug + ScoreToString<T> + Send + Sync + CloneBoxScore<T> + SerializeScore<T>
|
||||
{
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>>;
|
||||
async fn interpret(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let id = Id::default();
|
||||
let interpret = self.create_interpret();
|
||||
|
||||
instrumentation::instrument(HarmonyEvent::InterpretExecutionStarted {
|
||||
execution_id: id.clone().to_string(),
|
||||
topology: topology.name().into(),
|
||||
interpret: interpret.get_name().to_string(),
|
||||
score: self.name(),
|
||||
message: format!("{} running...", interpret.get_name()),
|
||||
})
|
||||
.unwrap();
|
||||
let result = interpret.execute(inventory, topology).await;
|
||||
|
||||
instrumentation::instrument(HarmonyEvent::InterpretExecutionFinished {
|
||||
execution_id: id.clone().to_string(),
|
||||
topology: topology.name().into(),
|
||||
interpret: interpret.get_name().to_string(),
|
||||
score: self.name(),
|
||||
outcome: result.clone(),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn name(&self) -> String;
|
||||
|
||||
#[doc(hidden)]
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>>;
|
||||
}
|
||||
|
||||
pub trait SerializeScore<T: Topology> {
|
||||
fn serialize(&self) -> Value;
|
||||
}
|
||||
|
||||
impl<'de, S, T> SerializeScore<T> for S
|
||||
impl<S, T> SerializeScore<T> for S
|
||||
where
|
||||
T: Topology,
|
||||
S: Score<T> + Serialize,
|
||||
@@ -24,7 +64,7 @@ where
|
||||
fn serialize(&self) -> Value {
|
||||
// TODO not sure if this is the right place to handle the error or it should bubble
|
||||
// up?
|
||||
serde_value::to_value(&self).expect("Score should serialize successfully")
|
||||
serde_value::to_value(self).expect("Score should serialize successfully")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,8 +4,6 @@ use harmony_types::net::MacAddress;
|
||||
use log::info;
|
||||
|
||||
use crate::executors::ExecutorError;
|
||||
use crate::interpret::InterpretError;
|
||||
use crate::interpret::Outcome;
|
||||
|
||||
use super::DHCPStaticEntry;
|
||||
use super::DhcpServer;
|
||||
@@ -19,6 +17,8 @@ use super::K8sclient;
|
||||
use super::LoadBalancer;
|
||||
use super::LoadBalancerService;
|
||||
use super::LogicalHost;
|
||||
use super::PreparationError;
|
||||
use super::PreparationOutcome;
|
||||
use super::Router;
|
||||
use super::TftpServer;
|
||||
|
||||
@@ -48,7 +48,7 @@ impl Topology for HAClusterTopology {
|
||||
fn name(&self) -> &str {
|
||||
"HAClusterTopology"
|
||||
}
|
||||
async fn ensure_ready(&self) -> Result<Outcome, InterpretError> {
|
||||
async fn ensure_ready(&self) -> Result<PreparationOutcome, PreparationError> {
|
||||
todo!(
|
||||
"ensure_ready, not entirely sure what it should do here, probably something like verify that the hosts are reachable and all services are up and ready."
|
||||
)
|
||||
@@ -244,10 +244,12 @@ impl Topology for DummyInfra {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn ensure_ready(&self) -> Result<Outcome, InterpretError> {
|
||||
async fn ensure_ready(&self) -> Result<PreparationOutcome, PreparationError> {
|
||||
let dummy_msg = "This is a dummy infrastructure that does nothing";
|
||||
info!("{dummy_msg}");
|
||||
Ok(Outcome::success(dummy_msg.to_string()))
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: dummy_msg.into(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
use derive_new::new;
|
||||
use futures_util::StreamExt;
|
||||
use k8s_openapi::{
|
||||
ClusterResourceScope, NamespaceResourceScope,
|
||||
api::{apps::v1::Deployment, core::v1::Pod},
|
||||
};
|
||||
use kube::{
|
||||
Client, Config, Error, Resource,
|
||||
api::{Api, AttachParams, ListParams, Patch, PatchParams, ResourceExt},
|
||||
api::{Api, AttachParams, DeleteParams, ListParams, Patch, PatchParams, ResourceExt},
|
||||
config::{KubeConfigOptions, Kubeconfig},
|
||||
core::ErrorResponse,
|
||||
runtime::reflector::Lookup,
|
||||
@@ -17,14 +16,25 @@ use kube::{
|
||||
runtime::wait::await_condition,
|
||||
};
|
||||
use log::{debug, error, trace};
|
||||
use serde::de::DeserializeOwned;
|
||||
use similar::{DiffableStr, TextDiff};
|
||||
use serde::{Serialize, de::DeserializeOwned};
|
||||
use serde_json::json;
|
||||
use similar::TextDiff;
|
||||
use tokio::io::AsyncReadExt;
|
||||
|
||||
#[derive(new, Clone)]
|
||||
pub struct K8sClient {
|
||||
client: Client,
|
||||
}
|
||||
|
||||
impl Serialize for K8sClient {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for K8sClient {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
// This is a poor man's debug implementation for now as kube::Client does not provide much
|
||||
@@ -43,6 +53,66 @@ impl K8sClient {
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn get_deployment(
|
||||
&self,
|
||||
name: &str,
|
||||
namespace: Option<&str>,
|
||||
) -> Result<Option<Deployment>, Error> {
|
||||
let deps: Api<Deployment> = if let Some(ns) = namespace {
|
||||
Api::namespaced(self.client.clone(), ns)
|
||||
} else {
|
||||
Api::default_namespaced(self.client.clone())
|
||||
};
|
||||
Ok(deps.get_opt(name).await?)
|
||||
}
|
||||
|
||||
pub async fn get_pod(&self, name: &str, namespace: Option<&str>) -> Result<Option<Pod>, Error> {
|
||||
let pods: Api<Pod> = if let Some(ns) = namespace {
|
||||
Api::namespaced(self.client.clone(), ns)
|
||||
} else {
|
||||
Api::default_namespaced(self.client.clone())
|
||||
};
|
||||
Ok(pods.get_opt(name).await?)
|
||||
}
|
||||
|
||||
pub async fn scale_deployment(
|
||||
&self,
|
||||
name: &str,
|
||||
namespace: Option<&str>,
|
||||
replicas: u32,
|
||||
) -> Result<(), Error> {
|
||||
let deployments: Api<Deployment> = if let Some(ns) = namespace {
|
||||
Api::namespaced(self.client.clone(), ns)
|
||||
} else {
|
||||
Api::default_namespaced(self.client.clone())
|
||||
};
|
||||
|
||||
let patch = json!({
|
||||
"spec": {
|
||||
"replicas": replicas
|
||||
}
|
||||
});
|
||||
let pp = PatchParams::default();
|
||||
let scale = Patch::Apply(&patch);
|
||||
deployments.patch_scale(name, &pp, &scale).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn delete_deployment(
|
||||
&self,
|
||||
name: &str,
|
||||
namespace: Option<&str>,
|
||||
) -> Result<(), Error> {
|
||||
let deployments: Api<Deployment> = if let Some(ns) = namespace {
|
||||
Api::namespaced(self.client.clone(), ns)
|
||||
} else {
|
||||
Api::default_namespaced(self.client.clone())
|
||||
};
|
||||
let delete_params = DeleteParams::default();
|
||||
deployments.delete(name, &delete_params).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn wait_until_deployment_ready(
|
||||
&self,
|
||||
name: String,
|
||||
@@ -58,13 +128,75 @@ impl K8sClient {
|
||||
}
|
||||
|
||||
let establish = await_condition(api, name.as_str(), conditions::is_deployment_completed());
|
||||
let t = if let Some(t) = timeout { t } else { 300 };
|
||||
let t = timeout.unwrap_or(300);
|
||||
let res = tokio::time::timeout(std::time::Duration::from_secs(t), establish).await;
|
||||
|
||||
if let Ok(r) = res {
|
||||
return Ok(());
|
||||
if res.is_ok() {
|
||||
Ok(())
|
||||
} else {
|
||||
return Err("timed out while waiting for deployment".to_string());
|
||||
Err("timed out while waiting for deployment".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// Will execute a commond in the first pod found that matches the specified label
|
||||
/// '{label}={name}'
|
||||
pub async fn exec_app_capture_output(
|
||||
&self,
|
||||
name: String,
|
||||
label: String,
|
||||
namespace: Option<&str>,
|
||||
command: Vec<&str>,
|
||||
) -> Result<String, String> {
|
||||
let api: Api<Pod>;
|
||||
|
||||
if let Some(ns) = namespace {
|
||||
api = Api::namespaced(self.client.clone(), ns);
|
||||
} else {
|
||||
api = Api::default_namespaced(self.client.clone());
|
||||
}
|
||||
let pod_list = api
|
||||
.list(&ListParams::default().labels(format!("{label}={name}").as_str()))
|
||||
.await
|
||||
.expect("couldn't get list of pods");
|
||||
|
||||
let res = api
|
||||
.exec(
|
||||
pod_list
|
||||
.items
|
||||
.first()
|
||||
.expect("couldn't get pod")
|
||||
.name()
|
||||
.expect("couldn't get pod name")
|
||||
.into_owned()
|
||||
.as_str(),
|
||||
command,
|
||||
&AttachParams::default().stdout(true).stderr(true),
|
||||
)
|
||||
.await;
|
||||
match res {
|
||||
Err(e) => Err(e.to_string()),
|
||||
Ok(mut process) => {
|
||||
let status = process
|
||||
.take_status()
|
||||
.expect("Couldn't get status")
|
||||
.await
|
||||
.expect("Couldn't unwrap status");
|
||||
|
||||
if let Some(s) = status.status {
|
||||
let mut stdout_buf = String::new();
|
||||
if let Some(mut stdout) = process.stdout().take() {
|
||||
stdout.read_to_string(&mut stdout_buf).await;
|
||||
}
|
||||
debug!("Status: {} - {:?}", s, status.details);
|
||||
if s == "Success" {
|
||||
Ok(stdout_buf)
|
||||
} else {
|
||||
Err(s)
|
||||
}
|
||||
} else {
|
||||
Err("Couldn't get inner status of pod exec".to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -103,7 +235,7 @@ impl K8sClient {
|
||||
.await;
|
||||
|
||||
match res {
|
||||
Err(e) => return Err(e.to_string()),
|
||||
Err(e) => Err(e.to_string()),
|
||||
Ok(mut process) => {
|
||||
let status = process
|
||||
.take_status()
|
||||
@@ -112,14 +244,10 @@ impl K8sClient {
|
||||
.expect("Couldn't unwrap status");
|
||||
|
||||
if let Some(s) = status.status {
|
||||
debug!("Status: {}", s);
|
||||
if s == "Success" {
|
||||
return Ok(());
|
||||
debug!("Status: {} - {:?}", s, status.details);
|
||||
if s == "Success" { Ok(()) } else { Err(s) }
|
||||
} else {
|
||||
return Err(s);
|
||||
}
|
||||
} else {
|
||||
return Err("Couldn't get inner status of pod exec".to_string());
|
||||
Err("Couldn't get inner status of pod exec".to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -160,8 +288,9 @@ impl K8sClient {
|
||||
trace!("Received current value {current:#?}");
|
||||
// The resource exists, so we calculate and display a diff.
|
||||
println!("\nPerforming dry-run for resource: '{}'", name);
|
||||
let mut current_yaml = serde_yaml::to_value(¤t)
|
||||
.expect(&format!("Could not serialize current value : {current:#?}"));
|
||||
let mut current_yaml = serde_yaml::to_value(¤t).unwrap_or_else(|_| {
|
||||
panic!("Could not serialize current value : {current:#?}")
|
||||
});
|
||||
if current_yaml.is_mapping() && current_yaml.get("status").is_some() {
|
||||
let map = current_yaml.as_mapping_mut().unwrap();
|
||||
let removed = map.remove_entry("status");
|
||||
@@ -228,7 +357,7 @@ impl K8sClient {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn apply_many<K>(&self, resource: &Vec<K>, ns: Option<&str>) -> Result<Vec<K>, Error>
|
||||
pub async fn apply_many<K>(&self, resource: &[K], ns: Option<&str>) -> Result<Vec<K>, Error>
|
||||
where
|
||||
K: Resource + Clone + std::fmt::Debug + DeserializeOwned + serde::Serialize,
|
||||
<K as Resource>::Scope: ApplyStrategy<K>,
|
||||
@@ -244,7 +373,7 @@ impl K8sClient {
|
||||
|
||||
pub async fn apply_yaml_many(
|
||||
&self,
|
||||
yaml: &Vec<serde_yaml::Value>,
|
||||
#[allow(clippy::ptr_arg)] yaml: &Vec<serde_yaml::Value>,
|
||||
ns: Option<&str>,
|
||||
) -> Result<(), Error> {
|
||||
for y in yaml.iter() {
|
||||
|
||||
@@ -7,22 +7,40 @@ use tokio::sync::OnceCell;
|
||||
|
||||
use crate::{
|
||||
executors::ExecutorError,
|
||||
interpret::{InterpretError, Outcome},
|
||||
interpret::InterpretStatus,
|
||||
inventory::Inventory,
|
||||
modules::k3d::K3DInstallationScore,
|
||||
modules::{
|
||||
k3d::K3DInstallationScore,
|
||||
monitoring::kube_prometheus::crd::{
|
||||
crd_alertmanager_config::CRDPrometheus,
|
||||
prometheus_operator::prometheus_operator_helm_chart_score,
|
||||
},
|
||||
prometheus::{
|
||||
k8s_prometheus_alerting_score::K8sPrometheusCRDAlertingScore,
|
||||
prometheus::PrometheusApplicationMonitoring,
|
||||
},
|
||||
},
|
||||
score::Score,
|
||||
};
|
||||
|
||||
use super::{
|
||||
DeploymentTarget, HelmCommand, K8sclient, MultiTargetTopology, Topology,
|
||||
DeploymentTarget, HelmCommand, K8sclient, MultiTargetTopology, PreparationError,
|
||||
PreparationOutcome, Topology,
|
||||
k8s::K8sClient,
|
||||
tenant::{TenantConfig, TenantManager, k8s::K8sTenantManager},
|
||||
oberservability::monitoring::AlertReceiver,
|
||||
tenant::{
|
||||
TenantConfig, TenantManager,
|
||||
k8s::K8sTenantManager,
|
||||
network_policy::{
|
||||
K3dNetworkPolicyStrategy, NetworkPolicyStrategy, NoopNetworkPolicyStrategy,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct K8sState {
|
||||
client: Arc<K8sClient>,
|
||||
_source: K8sSource,
|
||||
source: K8sSource,
|
||||
message: String,
|
||||
}
|
||||
|
||||
@@ -56,8 +74,42 @@ impl K8sclient for K8sAnywhereTopology {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PrometheusApplicationMonitoring<CRDPrometheus> for K8sAnywhereTopology {
|
||||
async fn install_prometheus(
|
||||
&self,
|
||||
sender: &CRDPrometheus,
|
||||
inventory: &Inventory,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<CRDPrometheus>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let po_result = self.ensure_prometheus_operator(sender).await?;
|
||||
|
||||
if po_result == PreparationOutcome::Noop {
|
||||
debug!("Skipping Prometheus CR installation due to missing operator.");
|
||||
return Ok(po_result);
|
||||
}
|
||||
|
||||
let result = self
|
||||
.get_k8s_prometheus_application_score(sender.clone(), receivers)
|
||||
.await
|
||||
.interpret(inventory, self)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(outcome) => match outcome.status {
|
||||
InterpretStatus::SUCCESS => Ok(PreparationOutcome::Success {
|
||||
details: outcome.message,
|
||||
}),
|
||||
InterpretStatus::NOOP => Ok(PreparationOutcome::Noop),
|
||||
_ => Err(PreparationError::new(outcome.message)),
|
||||
},
|
||||
Err(err) => Err(PreparationError::new(err.to_string())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for K8sAnywhereTopology {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
@@ -82,6 +134,19 @@ impl K8sAnywhereTopology {
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_k8s_prometheus_application_score(
|
||||
&self,
|
||||
sender: CRDPrometheus,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<CRDPrometheus>>>>,
|
||||
) -> K8sPrometheusCRDAlertingScore {
|
||||
K8sPrometheusCRDAlertingScore {
|
||||
sender,
|
||||
receivers: receivers.unwrap_or_default(),
|
||||
service_monitors: vec![],
|
||||
prometheus_rules: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
fn is_helm_available(&self) -> Result<(), String> {
|
||||
let version_result = Command::new("helm")
|
||||
.arg("version")
|
||||
@@ -110,15 +175,23 @@ impl K8sAnywhereTopology {
|
||||
K3DInstallationScore::default()
|
||||
}
|
||||
|
||||
async fn try_install_k3d(&self) -> Result<(), InterpretError> {
|
||||
self.get_k3d_installation_score()
|
||||
.create_interpret()
|
||||
.execute(&Inventory::empty(), self)
|
||||
.await?;
|
||||
Ok(())
|
||||
async fn try_install_k3d(&self) -> Result<(), PreparationError> {
|
||||
let result = self
|
||||
.get_k3d_installation_score()
|
||||
.interpret(&Inventory::empty(), self)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(outcome) => match outcome.status {
|
||||
InterpretStatus::SUCCESS => Ok(()),
|
||||
InterpretStatus::NOOP => Ok(()),
|
||||
_ => Err(PreparationError::new(outcome.message)),
|
||||
},
|
||||
Err(err) => Err(PreparationError::new(err.to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
async fn try_get_or_install_k8s_client(&self) -> Result<Option<K8sState>, InterpretError> {
|
||||
async fn try_get_or_install_k8s_client(&self) -> Result<Option<K8sState>, PreparationError> {
|
||||
let k8s_anywhere_config = &self.config;
|
||||
|
||||
// TODO this deserves some refactoring, it is becoming a bit hard to figure out
|
||||
@@ -128,16 +201,16 @@ impl K8sAnywhereTopology {
|
||||
} else {
|
||||
if let Some(kubeconfig) = &k8s_anywhere_config.kubeconfig {
|
||||
debug!("Loading kubeconfig {kubeconfig}");
|
||||
match self.try_load_kubeconfig(&kubeconfig).await {
|
||||
match self.try_load_kubeconfig(kubeconfig).await {
|
||||
Some(client) => {
|
||||
return Ok(Some(K8sState {
|
||||
client: Arc::new(client),
|
||||
_source: K8sSource::Kubeconfig,
|
||||
source: K8sSource::Kubeconfig,
|
||||
message: format!("Loaded k8s client from kubeconfig {kubeconfig}"),
|
||||
}));
|
||||
}
|
||||
None => {
|
||||
return Err(InterpretError::new(format!(
|
||||
return Err(PreparationError::new(format!(
|
||||
"Failed to load kubeconfig from {kubeconfig}"
|
||||
)));
|
||||
}
|
||||
@@ -174,7 +247,7 @@ impl K8sAnywhereTopology {
|
||||
let state = match k3d.get_client().await {
|
||||
Ok(client) => K8sState {
|
||||
client: Arc::new(K8sClient::new(client)),
|
||||
_source: K8sSource::LocalK3d,
|
||||
source: K8sSource::LocalK3d,
|
||||
message: "K8s client ready".to_string(),
|
||||
},
|
||||
Err(_) => todo!(),
|
||||
@@ -183,15 +256,21 @@ impl K8sAnywhereTopology {
|
||||
Ok(Some(state))
|
||||
}
|
||||
|
||||
async fn ensure_k8s_tenant_manager(&self) -> Result<(), String> {
|
||||
if let Some(_) = self.tenant_manager.get() {
|
||||
async fn ensure_k8s_tenant_manager(&self, k8s_state: &K8sState) -> Result<(), String> {
|
||||
if self.tenant_manager.get().is_some() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.tenant_manager
|
||||
.get_or_try_init(async || -> Result<K8sTenantManager, String> {
|
||||
let k8s_client = self.k8s_client().await?;
|
||||
Ok(K8sTenantManager::new(k8s_client))
|
||||
let network_policy_strategy: Box<dyn NetworkPolicyStrategy> = match k8s_state.source
|
||||
{
|
||||
K8sSource::LocalK3d => Box::new(K3dNetworkPolicyStrategy::new()),
|
||||
K8sSource::Kubeconfig => Box::new(NoopNetworkPolicyStrategy::new()),
|
||||
};
|
||||
|
||||
Ok(K8sTenantManager::new(k8s_client, network_policy_strategy))
|
||||
})
|
||||
.await?;
|
||||
|
||||
@@ -206,6 +285,55 @@ impl K8sAnywhereTopology {
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
async fn ensure_prometheus_operator(
|
||||
&self,
|
||||
sender: &CRDPrometheus,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let status = Command::new("sh")
|
||||
.args(["-c", "kubectl get crd -A | grep -i prometheuses"])
|
||||
.status()
|
||||
.map_err(|e| PreparationError::new(format!("could not connect to cluster: {}", e)))?;
|
||||
|
||||
if !status.success() {
|
||||
if let Some(Some(k8s_state)) = self.k8s_state.get() {
|
||||
match k8s_state.source {
|
||||
K8sSource::LocalK3d => {
|
||||
debug!("installing prometheus operator");
|
||||
let op_score =
|
||||
prometheus_operator_helm_chart_score(sender.namespace.clone());
|
||||
let result = op_score.interpret(&Inventory::empty(), self).await;
|
||||
|
||||
return match result {
|
||||
Ok(outcome) => match outcome.status {
|
||||
InterpretStatus::SUCCESS => Ok(PreparationOutcome::Success {
|
||||
details: "installed prometheus operator".into(),
|
||||
}),
|
||||
InterpretStatus::NOOP => Ok(PreparationOutcome::Noop),
|
||||
_ => Err(PreparationError::new(
|
||||
"failed to install prometheus operator (unknown error)".into(),
|
||||
)),
|
||||
},
|
||||
Err(err) => Err(PreparationError::new(err.to_string())),
|
||||
};
|
||||
}
|
||||
K8sSource::Kubeconfig => {
|
||||
debug!("unable to install prometheus operator, contact cluster admin");
|
||||
return Ok(PreparationOutcome::Noop);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
warn!("Unable to detect k8s_state. Skipping Prometheus Operator install.");
|
||||
return Ok(PreparationOutcome::Noop);
|
||||
}
|
||||
}
|
||||
|
||||
debug!("Prometheus operator is already present, skipping install");
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "prometheus operator present in cluster".into(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -263,26 +391,25 @@ impl Topology for K8sAnywhereTopology {
|
||||
"K8sAnywhereTopology"
|
||||
}
|
||||
|
||||
async fn ensure_ready(&self) -> Result<Outcome, InterpretError> {
|
||||
async fn ensure_ready(&self) -> Result<PreparationOutcome, PreparationError> {
|
||||
let k8s_state = self
|
||||
.k8s_state
|
||||
.get_or_try_init(|| self.try_get_or_install_k8s_client())
|
||||
.await?;
|
||||
|
||||
let k8s_state: &K8sState = k8s_state.as_ref().ok_or(InterpretError::new(
|
||||
"No K8s client could be found or installed".to_string(),
|
||||
let k8s_state: &K8sState = k8s_state.as_ref().ok_or(PreparationError::new(
|
||||
"no K8s client could be found or installed".to_string(),
|
||||
))?;
|
||||
|
||||
self.ensure_k8s_tenant_manager()
|
||||
self.ensure_k8s_tenant_manager(k8s_state)
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e))?;
|
||||
.map_err(PreparationError::new)?;
|
||||
|
||||
match self.is_helm_available() {
|
||||
Ok(()) => Ok(Outcome::success(format!(
|
||||
"{} + helm available",
|
||||
k8s_state.message.clone()
|
||||
))),
|
||||
Err(e) => Err(InterpretError::new(format!("helm unavailable: {}", e))),
|
||||
Ok(()) => Ok(PreparationOutcome::Success {
|
||||
details: format!("{} + helm available", k8s_state.message.clone()),
|
||||
}),
|
||||
Err(e) => Err(PreparationError::new(format!("helm unavailable: {}", e))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
use async_trait::async_trait;
|
||||
use derive_new::new;
|
||||
|
||||
use crate::interpret::{InterpretError, Outcome};
|
||||
|
||||
use super::{HelmCommand, Topology};
|
||||
use super::{HelmCommand, PreparationError, PreparationOutcome, Topology};
|
||||
|
||||
#[derive(new)]
|
||||
pub struct LocalhostTopology;
|
||||
@@ -14,10 +12,10 @@ impl Topology for LocalhostTopology {
|
||||
"LocalHostTopology"
|
||||
}
|
||||
|
||||
async fn ensure_ready(&self) -> Result<Outcome, InterpretError> {
|
||||
Ok(Outcome::success(
|
||||
"Localhost is Chuck Norris, always ready.".to_string(),
|
||||
))
|
||||
async fn ensure_ready(&self) -> Result<PreparationOutcome, PreparationError> {
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Localhost is Chuck Norris, always ready.".into(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ mod k8s_anywhere;
|
||||
mod localhost;
|
||||
pub mod oberservability;
|
||||
pub mod tenant;
|
||||
use derive_new::new;
|
||||
pub use k8s_anywhere::*;
|
||||
pub use localhost::*;
|
||||
pub mod k8s;
|
||||
@@ -26,10 +27,13 @@ pub use tftp::*;
|
||||
mod helm_command;
|
||||
pub use helm_command::*;
|
||||
|
||||
use super::{
|
||||
executors::ExecutorError,
|
||||
instrumentation::{self, HarmonyEvent},
|
||||
};
|
||||
use std::error::Error;
|
||||
use std::net::IpAddr;
|
||||
|
||||
use super::interpret::{InterpretError, Outcome};
|
||||
|
||||
/// Represents a logical view of an infrastructure environment providing specific capabilities.
|
||||
///
|
||||
/// A Topology acts as a self-contained "package" responsible for managing access
|
||||
@@ -57,9 +61,128 @@ pub trait Topology: Send + Sync {
|
||||
/// * **Internal Orchestration:** For complex topologies, this method might manage dependencies on other sub-topologies, ensuring *their* `ensure_ready` is called first. Using nested `Maestros` to run setup `Scores` against these sub-topologies is the recommended pattern for non-trivial bootstrapping, allowing reuse of Harmony's core orchestration logic.
|
||||
///
|
||||
/// # Returns
|
||||
/// - `Ok(Outcome)`: Indicates the topology is now ready. The `Outcome` status might be `SUCCESS` if actions were taken, or `NOOP` if it was already ready. The message should provide context.
|
||||
/// - `Err(TopologyError)`: Indicates the topology could not reach a ready state due to configuration issues, discovery failures, bootstrap errors, or unsupported environments.
|
||||
async fn ensure_ready(&self) -> Result<Outcome, InterpretError>;
|
||||
/// - `Ok(PreparationOutcome)`: Indicates the topology is now ready. The `Outcome` status might be `SUCCESS` if actions were taken, or `NOOP` if it was already ready. The message should provide context.
|
||||
/// - `Err(PreparationError)`: Indicates the topology could not reach a ready state due to configuration issues, discovery failures, bootstrap errors, or unsupported environments.
|
||||
async fn ensure_ready(&self) -> Result<PreparationOutcome, PreparationError>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum PreparationOutcome {
|
||||
Success { details: String },
|
||||
Noop,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, new)]
|
||||
pub struct PreparationError {
|
||||
msg: String,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for PreparationError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(&self.msg)
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for PreparationError {}
|
||||
|
||||
impl From<ExecutorError> for PreparationError {
|
||||
fn from(value: ExecutorError) -> Self {
|
||||
Self {
|
||||
msg: format!("InterpretError : {value}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<kube::Error> for PreparationError {
|
||||
fn from(value: kube::Error) -> Self {
|
||||
Self {
|
||||
msg: format!("PreparationError : {value}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for PreparationError {
|
||||
fn from(value: String) -> Self {
|
||||
Self {
|
||||
msg: format!("PreparationError : {value}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum TopologyStatus {
|
||||
Queued,
|
||||
Preparing,
|
||||
Success,
|
||||
Noop,
|
||||
Error,
|
||||
}
|
||||
|
||||
pub struct TopologyState {
|
||||
pub topology: String,
|
||||
pub status: TopologyStatus,
|
||||
}
|
||||
|
||||
impl TopologyState {
|
||||
pub fn new(topology: String) -> Self {
|
||||
let instance = Self {
|
||||
topology,
|
||||
status: TopologyStatus::Queued,
|
||||
};
|
||||
|
||||
instrumentation::instrument(HarmonyEvent::TopologyStateChanged {
|
||||
topology: instance.topology.clone(),
|
||||
status: instance.status.clone(),
|
||||
message: None,
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
instance
|
||||
}
|
||||
|
||||
pub fn prepare(&mut self) {
|
||||
self.status = TopologyStatus::Preparing;
|
||||
|
||||
instrumentation::instrument(HarmonyEvent::TopologyStateChanged {
|
||||
topology: self.topology.clone(),
|
||||
status: self.status.clone(),
|
||||
message: None,
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub fn success(&mut self, message: String) {
|
||||
self.status = TopologyStatus::Success;
|
||||
|
||||
instrumentation::instrument(HarmonyEvent::TopologyStateChanged {
|
||||
topology: self.topology.clone(),
|
||||
status: self.status.clone(),
|
||||
message: Some(message),
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub fn noop(&mut self) {
|
||||
self.status = TopologyStatus::Noop;
|
||||
|
||||
instrumentation::instrument(HarmonyEvent::TopologyStateChanged {
|
||||
topology: self.topology.clone(),
|
||||
status: self.status.clone(),
|
||||
message: None,
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub fn error(&mut self, message: String) {
|
||||
self.status = TopologyStatus::Error;
|
||||
|
||||
instrumentation::instrument(HarmonyEvent::TopologyStateChanged {
|
||||
topology: self.topology.clone(),
|
||||
status: self.status.clone(),
|
||||
message: Some(message),
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -88,7 +211,7 @@ impl Serialize for Url {
|
||||
{
|
||||
match self {
|
||||
Url::LocalFolder(path) => serializer.serialize_str(path),
|
||||
Url::Url(url) => serializer.serialize_str(&url.as_str()),
|
||||
Url::Url(url) => serializer.serialize_str(url.as_str()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
use std::any::Any;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use log::debug;
|
||||
|
||||
@@ -43,7 +45,7 @@ impl<S: AlertSender + Installable<T>, T: Topology> Interpret<T> for AlertingInte
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
todo!()
|
||||
InterpretName::Alerting
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
@@ -62,7 +64,9 @@ impl<S: AlertSender + Installable<T>, T: Topology> Interpret<T> for AlertingInte
|
||||
#[async_trait]
|
||||
pub trait AlertReceiver<S: AlertSender>: std::fmt::Debug + Send + Sync {
|
||||
async fn install(&self, sender: &S) -> Result<Outcome, InterpretError>;
|
||||
fn name(&self) -> String;
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<S>>;
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -72,6 +76,6 @@ pub trait AlertRule<S: AlertSender>: std::fmt::Debug + Send + Sync {
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait ScrapeTarger<S: AlertSender> {
|
||||
pub trait ScrapeTarget<S: AlertSender> {
|
||||
async fn install(&self, sender: &S) -> Result<(), InterpretError>;
|
||||
}
|
||||
|
||||
@@ -27,11 +27,11 @@ pub struct UnmanagedRouter {
|
||||
|
||||
impl Router for UnmanagedRouter {
|
||||
fn get_gateway(&self) -> IpAddress {
|
||||
self.gateway.clone()
|
||||
self.gateway
|
||||
}
|
||||
|
||||
fn get_cidr(&self) -> Ipv4Cidr {
|
||||
self.cidr.clone()
|
||||
self.cidr
|
||||
}
|
||||
|
||||
fn get_host(&self) -> LogicalHost {
|
||||
|
||||
@@ -15,36 +15,38 @@ use k8s_openapi::{
|
||||
apimachinery::pkg::util::intstr::IntOrString,
|
||||
};
|
||||
use kube::Resource;
|
||||
use log::{debug, info, warn};
|
||||
use log::debug;
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde_json::json;
|
||||
use tokio::sync::OnceCell;
|
||||
|
||||
use super::{TenantConfig, TenantManager};
|
||||
use super::{TenantConfig, TenantManager, network_policy::NetworkPolicyStrategy};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Debug)]
|
||||
pub struct K8sTenantManager {
|
||||
k8s_client: Arc<K8sClient>,
|
||||
k8s_tenant_config: Arc<OnceCell<TenantConfig>>,
|
||||
network_policy_strategy: Box<dyn NetworkPolicyStrategy>,
|
||||
}
|
||||
|
||||
impl K8sTenantManager {
|
||||
pub fn new(client: Arc<K8sClient>) -> Self {
|
||||
pub fn new(
|
||||
client: Arc<K8sClient>,
|
||||
network_policy_strategy: Box<dyn NetworkPolicyStrategy>,
|
||||
) -> Self {
|
||||
Self {
|
||||
k8s_client: client,
|
||||
k8s_tenant_config: Arc::new(OnceCell::new()),
|
||||
network_policy_strategy,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl K8sTenantManager {
|
||||
fn get_namespace_name(&self, config: &TenantConfig) -> String {
|
||||
config.name.clone()
|
||||
}
|
||||
|
||||
fn ensure_constraints(&self, _namespace: &Namespace) -> Result<(), ExecutorError> {
|
||||
warn!("Validate that when tenant already exists (by id) that name has not changed");
|
||||
warn!("Make sure other Tenant constraints are respected by this k8s implementation");
|
||||
// TODO: Ensure constraints are applied to namespace (https://git.nationtech.io/NationTech/harmony/issues/98)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -219,24 +221,6 @@ impl K8sTenantManager {
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"to": [
|
||||
{
|
||||
"ipBlock": {
|
||||
"cidr": "10.43.0.1/32",
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"to": [
|
||||
{
|
||||
"ipBlock": {
|
||||
"cidr": "172.23.0.0/16",
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"to": [
|
||||
{
|
||||
@@ -304,19 +288,19 @@ impl K8sTenantManager {
|
||||
let ports: Option<Vec<NetworkPolicyPort>> =
|
||||
c.1.as_ref().map(|spec| match &spec.data {
|
||||
super::PortSpecData::SinglePort(port) => vec![NetworkPolicyPort {
|
||||
port: Some(IntOrString::Int(port.clone().into())),
|
||||
port: Some(IntOrString::Int((*port).into())),
|
||||
..Default::default()
|
||||
}],
|
||||
super::PortSpecData::PortRange(start, end) => vec![NetworkPolicyPort {
|
||||
port: Some(IntOrString::Int(start.clone().into())),
|
||||
end_port: Some(end.clone().into()),
|
||||
port: Some(IntOrString::Int((*start).into())),
|
||||
end_port: Some((*end).into()),
|
||||
protocol: None, // Not currently supported by Harmony
|
||||
}],
|
||||
|
||||
super::PortSpecData::ListOfPorts(items) => items
|
||||
.iter()
|
||||
.map(|i| NetworkPolicyPort {
|
||||
port: Some(IntOrString::Int(i.clone().into())),
|
||||
port: Some(IntOrString::Int((*i).into())),
|
||||
..Default::default()
|
||||
})
|
||||
.collect(),
|
||||
@@ -361,19 +345,19 @@ impl K8sTenantManager {
|
||||
let ports: Option<Vec<NetworkPolicyPort>> =
|
||||
c.1.as_ref().map(|spec| match &spec.data {
|
||||
super::PortSpecData::SinglePort(port) => vec![NetworkPolicyPort {
|
||||
port: Some(IntOrString::Int(port.clone().into())),
|
||||
port: Some(IntOrString::Int((*port).into())),
|
||||
..Default::default()
|
||||
}],
|
||||
super::PortSpecData::PortRange(start, end) => vec![NetworkPolicyPort {
|
||||
port: Some(IntOrString::Int(start.clone().into())),
|
||||
end_port: Some(end.clone().into()),
|
||||
port: Some(IntOrString::Int((*start).into())),
|
||||
end_port: Some((*end).into()),
|
||||
protocol: None, // Not currently supported by Harmony
|
||||
}],
|
||||
|
||||
super::PortSpecData::ListOfPorts(items) => items
|
||||
.iter()
|
||||
.map(|i| NetworkPolicyPort {
|
||||
port: Some(IntOrString::Int(i.clone().into())),
|
||||
port: Some(IntOrString::Int((*i).into())),
|
||||
..Default::default()
|
||||
})
|
||||
.collect(),
|
||||
@@ -406,12 +390,27 @@ impl K8sTenantManager {
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for K8sTenantManager {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
k8s_client: self.k8s_client.clone(),
|
||||
k8s_tenant_config: self.k8s_tenant_config.clone(),
|
||||
network_policy_strategy: self.network_policy_strategy.clone_box(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl TenantManager for K8sTenantManager {
|
||||
async fn provision_tenant(&self, config: &TenantConfig) -> Result<(), ExecutorError> {
|
||||
let namespace = self.build_namespace(config)?;
|
||||
let resource_quota = self.build_resource_quota(config)?;
|
||||
|
||||
let network_policy = self.build_network_policy(config)?;
|
||||
let network_policy = self
|
||||
.network_policy_strategy
|
||||
.adjust_policy(network_policy, config);
|
||||
|
||||
let resource_limit_range = self.build_limit_range(config)?;
|
||||
|
||||
self.ensure_constraints(&namespace)?;
|
||||
@@ -428,13 +427,14 @@ impl TenantManager for K8sTenantManager {
|
||||
debug!("Creating network_policy for tenant {}", config.name);
|
||||
self.apply_resource(network_policy, config).await?;
|
||||
|
||||
info!(
|
||||
debug!(
|
||||
"Success provisionning K8s tenant id {} name {}",
|
||||
config.id, config.name
|
||||
);
|
||||
self.store_config(config);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_tenant_config(&self) -> Option<TenantConfig> {
|
||||
self.k8s_tenant_config.get().cloned()
|
||||
}
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
pub mod k8s;
|
||||
mod manager;
|
||||
use std::str::FromStr;
|
||||
|
||||
pub use manager::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
pub mod network_policy;
|
||||
|
||||
use crate::data::Id;
|
||||
pub use manager::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::str::FromStr;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] // Assuming serde for Scores
|
||||
pub struct TenantConfig {
|
||||
|
||||
120
harmony/src/domain/topology/tenant/network_policy.rs
Normal file
120
harmony/src/domain/topology/tenant/network_policy.rs
Normal file
@@ -0,0 +1,120 @@
|
||||
use k8s_openapi::api::networking::v1::{
|
||||
IPBlock, NetworkPolicy, NetworkPolicyEgressRule, NetworkPolicyPeer, NetworkPolicySpec,
|
||||
};
|
||||
|
||||
use super::TenantConfig;
|
||||
|
||||
pub trait NetworkPolicyStrategy: Send + Sync + std::fmt::Debug {
|
||||
fn clone_box(&self) -> Box<dyn NetworkPolicyStrategy>;
|
||||
|
||||
fn adjust_policy(&self, policy: NetworkPolicy, config: &TenantConfig) -> NetworkPolicy;
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct NoopNetworkPolicyStrategy {}
|
||||
|
||||
impl NoopNetworkPolicyStrategy {
|
||||
pub fn new() -> Self {
|
||||
Self {}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for NoopNetworkPolicyStrategy {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl NetworkPolicyStrategy for NoopNetworkPolicyStrategy {
|
||||
fn clone_box(&self) -> Box<dyn NetworkPolicyStrategy> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn adjust_policy(&self, policy: NetworkPolicy, _config: &TenantConfig) -> NetworkPolicy {
|
||||
policy
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct K3dNetworkPolicyStrategy {}
|
||||
|
||||
impl K3dNetworkPolicyStrategy {
|
||||
pub fn new() -> Self {
|
||||
Self {}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for K3dNetworkPolicyStrategy {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl NetworkPolicyStrategy for K3dNetworkPolicyStrategy {
|
||||
fn clone_box(&self) -> Box<dyn NetworkPolicyStrategy> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn adjust_policy(&self, policy: NetworkPolicy, _config: &TenantConfig) -> NetworkPolicy {
|
||||
let mut egress = policy
|
||||
.spec
|
||||
.clone()
|
||||
.unwrap_or_default()
|
||||
.egress
|
||||
.clone()
|
||||
.unwrap_or_default();
|
||||
egress.push(NetworkPolicyEgressRule {
|
||||
to: Some(vec![NetworkPolicyPeer {
|
||||
ip_block: Some(IPBlock {
|
||||
cidr: "172.18.0.0/16".into(), // TODO: query the IP range https://git.nationtech.io/NationTech/harmony/issues/108
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
}]),
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
NetworkPolicy {
|
||||
spec: Some(NetworkPolicySpec {
|
||||
egress: Some(egress),
|
||||
..policy.spec.unwrap_or_default()
|
||||
}),
|
||||
..policy
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use k8s_openapi::api::networking::v1::{
|
||||
IPBlock, NetworkPolicy, NetworkPolicyEgressRule, NetworkPolicyPeer, NetworkPolicySpec,
|
||||
};
|
||||
|
||||
use super::{K3dNetworkPolicyStrategy, NetworkPolicyStrategy};
|
||||
|
||||
#[test]
|
||||
pub fn should_add_ip_block_for_k3d_harmony_server() {
|
||||
let strategy = K3dNetworkPolicyStrategy::new();
|
||||
|
||||
let policy =
|
||||
strategy.adjust_policy(NetworkPolicy::default(), &super::TenantConfig::default());
|
||||
|
||||
let expected_policy = NetworkPolicy {
|
||||
spec: Some(NetworkPolicySpec {
|
||||
egress: Some(vec![NetworkPolicyEgressRule {
|
||||
to: Some(vec![NetworkPolicyPeer {
|
||||
ip_block: Some(IPBlock {
|
||||
cidr: "172.18.0.0/16".into(),
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
}]),
|
||||
..Default::default()
|
||||
}]),
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(expected_policy, policy);
|
||||
}
|
||||
}
|
||||
@@ -60,7 +60,7 @@ impl DnsServer for OPNSenseFirewall {
|
||||
}
|
||||
|
||||
fn get_ip(&self) -> IpAddress {
|
||||
OPNSenseFirewall::get_ip(&self)
|
||||
OPNSenseFirewall::get_ip(self)
|
||||
}
|
||||
|
||||
fn get_host(&self) -> LogicalHost {
|
||||
|
||||
@@ -48,7 +48,7 @@ impl HttpServer for OPNSenseFirewall {
|
||||
async fn ensure_initialized(&self) -> Result<(), ExecutorError> {
|
||||
let mut config = self.opnsense_config.write().await;
|
||||
let caddy = config.caddy();
|
||||
if let None = caddy.get_full_config() {
|
||||
if caddy.get_full_config().is_none() {
|
||||
info!("Http config not available in opnsense config, installing package");
|
||||
config.install_package("os-caddy").await.map_err(|e| {
|
||||
ExecutorError::UnexpectedError(format!(
|
||||
|
||||
@@ -121,10 +121,12 @@ pub(crate) fn haproxy_xml_config_to_harmony_loadbalancer(
|
||||
|
||||
LoadBalancerService {
|
||||
backend_servers,
|
||||
listening_port: frontend.bind.parse().expect(&format!(
|
||||
listening_port: frontend.bind.parse().unwrap_or_else(|_| {
|
||||
panic!(
|
||||
"HAProxy frontend address should be a valid SocketAddr, got {}",
|
||||
frontend.bind
|
||||
)),
|
||||
)
|
||||
}),
|
||||
health_check,
|
||||
}
|
||||
})
|
||||
@@ -167,28 +169,28 @@ pub(crate) fn get_health_check_for_backend(
|
||||
None => return None,
|
||||
};
|
||||
|
||||
let haproxy_health_check = match haproxy
|
||||
let haproxy_health_check = haproxy
|
||||
.healthchecks
|
||||
.healthchecks
|
||||
.iter()
|
||||
.find(|h| &h.uuid == health_check_uuid)
|
||||
{
|
||||
Some(health_check) => health_check,
|
||||
None => return None,
|
||||
};
|
||||
.find(|h| &h.uuid == health_check_uuid)?;
|
||||
|
||||
let binding = haproxy_health_check.health_check_type.to_uppercase();
|
||||
let uppercase = binding.as_str();
|
||||
match uppercase {
|
||||
"TCP" => {
|
||||
if let Some(checkport) = haproxy_health_check.checkport.content.as_ref() {
|
||||
if checkport.len() > 0 {
|
||||
return Some(HealthCheck::TCP(Some(checkport.parse().expect(&format!(
|
||||
if !checkport.is_empty() {
|
||||
return Some(HealthCheck::TCP(Some(checkport.parse().unwrap_or_else(
|
||||
|_| {
|
||||
panic!(
|
||||
"HAProxy check port should be a valid port number, got {checkport}"
|
||||
)))));
|
||||
)
|
||||
},
|
||||
))));
|
||||
}
|
||||
}
|
||||
return Some(HealthCheck::TCP(None));
|
||||
Some(HealthCheck::TCP(None))
|
||||
}
|
||||
"HTTP" => {
|
||||
let path: String = haproxy_health_check
|
||||
@@ -355,16 +357,13 @@ mod tests {
|
||||
|
||||
// Create an HAProxy instance with servers
|
||||
let mut haproxy = HAProxy::default();
|
||||
let mut server = HAProxyServer::default();
|
||||
server.uuid = "server1".to_string();
|
||||
server.address = "192.168.1.1".to_string();
|
||||
server.port = 80;
|
||||
|
||||
let server = HAProxyServer {
|
||||
uuid: "server1".to_string(),
|
||||
address: "192.168.1.1".to_string(),
|
||||
port: 80,
|
||||
..Default::default()
|
||||
};
|
||||
haproxy.servers.servers.push(server);
|
||||
let mut server = HAProxyServer::default();
|
||||
server.uuid = "server3".to_string();
|
||||
server.address = "192.168.1.3".to_string();
|
||||
server.port = 8080;
|
||||
|
||||
// Call the function
|
||||
let result = get_servers_for_backend(&backend, &haproxy);
|
||||
@@ -384,10 +383,12 @@ mod tests {
|
||||
let backend = HAProxyBackend::default();
|
||||
// Create an HAProxy instance with servers
|
||||
let mut haproxy = HAProxy::default();
|
||||
let mut server = HAProxyServer::default();
|
||||
server.uuid = "server1".to_string();
|
||||
server.address = "192.168.1.1".to_string();
|
||||
server.port = 80;
|
||||
let server = HAProxyServer {
|
||||
uuid: "server1".to_string(),
|
||||
address: "192.168.1.1".to_string(),
|
||||
port: 80,
|
||||
..Default::default()
|
||||
};
|
||||
haproxy.servers.servers.push(server);
|
||||
// Call the function
|
||||
let result = get_servers_for_backend(&backend, &haproxy);
|
||||
@@ -402,10 +403,12 @@ mod tests {
|
||||
backend.linked_servers.content = Some("server4,server5".to_string());
|
||||
// Create an HAProxy instance with servers
|
||||
let mut haproxy = HAProxy::default();
|
||||
let mut server = HAProxyServer::default();
|
||||
server.uuid = "server1".to_string();
|
||||
server.address = "192.168.1.1".to_string();
|
||||
server.port = 80;
|
||||
let server = HAProxyServer {
|
||||
uuid: "server1".to_string(),
|
||||
address: "192.168.1.1".to_string(),
|
||||
port: 80,
|
||||
..Default::default()
|
||||
};
|
||||
haproxy.servers.servers.push(server);
|
||||
// Call the function
|
||||
let result = get_servers_for_backend(&backend, &haproxy);
|
||||
@@ -416,20 +419,28 @@ mod tests {
|
||||
#[test]
|
||||
fn test_get_servers_for_backend_multiple_linked_servers() {
|
||||
// Create a backend with multiple linked servers
|
||||
#[allow(clippy::field_reassign_with_default)]
|
||||
let mut backend = HAProxyBackend::default();
|
||||
backend.linked_servers.content = Some("server1,server2".to_string());
|
||||
//
|
||||
// Create an HAProxy instance with matching servers
|
||||
let mut haproxy = HAProxy::default();
|
||||
let mut server = HAProxyServer::default();
|
||||
server.uuid = "server1".to_string();
|
||||
server.address = "some-hostname.test.mcd".to_string();
|
||||
server.port = 80;
|
||||
let server = HAProxyServer {
|
||||
uuid: "server1".to_string(),
|
||||
address: "some-hostname.test.mcd".to_string(),
|
||||
port: 80,
|
||||
..Default::default()
|
||||
};
|
||||
haproxy.servers.servers.push(server);
|
||||
let mut server = HAProxyServer::default();
|
||||
server.uuid = "server2".to_string();
|
||||
server.address = "192.168.1.2".to_string();
|
||||
server.port = 8080;
|
||||
|
||||
let server = HAProxyServer {
|
||||
uuid: "server2".to_string(),
|
||||
address: "192.168.1.2".to_string(),
|
||||
port: 8080,
|
||||
..Default::default()
|
||||
};
|
||||
haproxy.servers.servers.push(server);
|
||||
|
||||
// Call the function
|
||||
let result = get_servers_for_backend(&backend, &haproxy);
|
||||
// Check the result
|
||||
|
||||
@@ -58,7 +58,7 @@ impl TftpServer for OPNSenseFirewall {
|
||||
async fn ensure_initialized(&self) -> Result<(), ExecutorError> {
|
||||
let mut config = self.opnsense_config.write().await;
|
||||
let tftp = config.tftp();
|
||||
if let None = tftp.get_full_config() {
|
||||
if tftp.get_full_config().is_none() {
|
||||
info!("Tftp config not available in opnsense config, installing package");
|
||||
config.install_package("os-tftp").await.map_err(|e| {
|
||||
ExecutorError::UnexpectedError(format!(
|
||||
|
||||
@@ -13,7 +13,7 @@ pub trait ApplicationFeature<T: Topology>:
|
||||
fn name(&self) -> String;
|
||||
}
|
||||
|
||||
trait ApplicationFeatureClone<T: Topology> {
|
||||
pub trait ApplicationFeatureClone<T: Topology> {
|
||||
fn clone_box(&self) -> Box<dyn ApplicationFeature<T>>;
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@ where
|
||||
}
|
||||
|
||||
impl<T: Topology> Serialize for Box<dyn ApplicationFeature<T>> {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
|
||||
@@ -184,12 +184,11 @@ impl ArgoApplication {
|
||||
pub fn to_yaml(&self) -> serde_yaml::Value {
|
||||
let name = &self.name;
|
||||
let namespace = if let Some(ns) = self.namespace.as_ref() {
|
||||
&ns
|
||||
ns
|
||||
} else {
|
||||
"argocd"
|
||||
};
|
||||
let project = &self.project;
|
||||
let source = &self.source;
|
||||
|
||||
let yaml_str = format!(
|
||||
r#"
|
||||
@@ -228,7 +227,7 @@ spec:
|
||||
serde_yaml::to_value(&self.source).expect("couldn't serialize source to value");
|
||||
let sync_policy = serde_yaml::to_value(&self.sync_policy)
|
||||
.expect("couldn't serialize sync_policy to value");
|
||||
let revision_history_limit = serde_yaml::to_value(&self.revision_history_limit)
|
||||
let revision_history_limit = serde_yaml::to_value(self.revision_history_limit)
|
||||
.expect("couldn't serialize revision_history_limit to value");
|
||||
|
||||
spec.insert(
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::{io::Write, process::Command, sync::Arc};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use log::{debug, error};
|
||||
use log::info;
|
||||
use serde_yaml::Value;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
@@ -10,7 +10,7 @@ use crate::{
|
||||
data::Version,
|
||||
inventory::Inventory,
|
||||
modules::application::{
|
||||
Application, ApplicationFeature, HelmPackage, OCICompliant,
|
||||
ApplicationFeature, HelmPackage, OCICompliant,
|
||||
features::{ArgoApplication, ArgoHelmScore},
|
||||
},
|
||||
score::Score,
|
||||
@@ -56,14 +56,11 @@ impl<A: OCICompliant + HelmPackage> ContinuousDelivery<A> {
|
||||
chart_url: String,
|
||||
image_name: String,
|
||||
) -> Result<(), String> {
|
||||
error!(
|
||||
"FIXME This works only with local k3d installations, which is fine only for current demo purposes. We assume usage of K8sAnywhereTopology"
|
||||
);
|
||||
|
||||
error!("TODO hardcoded k3d bin path is wrong");
|
||||
// TODO: This works only with local k3d installations, which is fine only for current demo purposes. We assume usage of K8sAnywhereTopology"
|
||||
// https://git.nationtech.io/NationTech/harmony/issues/106
|
||||
let k3d_bin_path = (*HARMONY_DATA_DIR).join("k3d").join("k3d");
|
||||
// --- 1. Import the container image into the k3d cluster ---
|
||||
debug!(
|
||||
info!(
|
||||
"Importing image '{}' into k3d cluster 'harmony'",
|
||||
image_name
|
||||
);
|
||||
@@ -80,7 +77,7 @@ impl<A: OCICompliant + HelmPackage> ContinuousDelivery<A> {
|
||||
}
|
||||
|
||||
// --- 2. Get the kubeconfig for the k3d cluster and write it to a temp file ---
|
||||
debug!("Retrieving kubeconfig for k3d cluster 'harmony'");
|
||||
info!("Retrieving kubeconfig for k3d cluster 'harmony'");
|
||||
let kubeconfig_output = Command::new(&k3d_bin_path)
|
||||
.args(["kubeconfig", "get", "harmony"])
|
||||
.output()
|
||||
@@ -101,7 +98,7 @@ impl<A: OCICompliant + HelmPackage> ContinuousDelivery<A> {
|
||||
let kubeconfig_path = temp_kubeconfig.path().to_str().unwrap();
|
||||
|
||||
// --- 3. Install or upgrade the Helm chart in the cluster ---
|
||||
debug!(
|
||||
info!(
|
||||
"Deploying Helm chart '{}' to namespace '{}'",
|
||||
chart_url, app_name
|
||||
);
|
||||
@@ -131,7 +128,7 @@ impl<A: OCICompliant + HelmPackage> ContinuousDelivery<A> {
|
||||
));
|
||||
}
|
||||
|
||||
debug!("Successfully deployed '{}' to local k3d cluster.", app_name);
|
||||
info!("Successfully deployed '{}' to local k3d cluster.", app_name);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -151,14 +148,12 @@ impl<
|
||||
// Or ask for it when unknown
|
||||
|
||||
let helm_chart = self.application.build_push_helm_package(&image).await?;
|
||||
debug!("Pushed new helm chart {helm_chart}");
|
||||
|
||||
error!("TODO Make building image configurable/skippable if image already exists (prompt)");
|
||||
// TODO: Make building image configurable/skippable if image already exists (prompt)")
|
||||
// https://git.nationtech.io/NationTech/harmony/issues/104
|
||||
let image = self.application.build_push_oci_image().await?;
|
||||
debug!("Pushed new docker image {image}");
|
||||
|
||||
debug!("Installing ContinuousDelivery feature");
|
||||
// TODO this is a temporary hack for demo purposes, the deployment target should be driven
|
||||
// TODO: this is a temporary hack for demo purposes, the deployment target should be driven
|
||||
// by the topology only and we should not have to know how to perform tasks like this for
|
||||
// which the topology should be responsible.
|
||||
//
|
||||
@@ -171,17 +166,20 @@ impl<
|
||||
// access it. This forces every Topology to understand the concept of targets though... So
|
||||
// instead I'll create a new Capability which is MultiTargetTopology and we'll see how it
|
||||
// goes. It still does not feel right though.
|
||||
//
|
||||
// https://git.nationtech.io/NationTech/harmony/issues/106
|
||||
match topology.current_target() {
|
||||
DeploymentTarget::LocalDev => {
|
||||
info!("Deploying {} locally...", self.application.name());
|
||||
self.deploy_to_local_k3d(self.application.name(), helm_chart, image)
|
||||
.await?;
|
||||
}
|
||||
target => {
|
||||
debug!("Deploying to target {target:?}");
|
||||
info!("Deploying {} to target {target:?}", self.application.name());
|
||||
let score = ArgoHelmScore {
|
||||
namespace: "harmonydemo-staging".to_string(),
|
||||
openshift: false,
|
||||
domain: "argo.harmonydemo.apps.st.mcd".to_string(),
|
||||
namespace: "harmony-example-rust-webapp".to_string(),
|
||||
openshift: true,
|
||||
domain: "argo.harmonydemo.apps.ncd0.harmony.mcd".to_string(),
|
||||
argo_apps: vec![ArgoApplication::from(CDApplicationConfig {
|
||||
// helm pull oci://hub.nationtech.io/harmony/harmony-example-rust-webapp-chart --version 0.1.0
|
||||
version: Version::from("0.1.0").unwrap(),
|
||||
@@ -189,12 +187,11 @@ impl<
|
||||
helm_chart_name: "harmony-example-rust-webapp-chart".to_string(),
|
||||
values_overrides: None,
|
||||
name: "harmony-demo-rust-webapp".to_string(),
|
||||
namespace: "harmonydemo-staging".to_string(),
|
||||
namespace: "harmony-example-rust-webapp".to_string(),
|
||||
})],
|
||||
};
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(&Inventory::empty(), topology)
|
||||
.interpret(&Inventory::empty(), topology)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use async_trait::async_trait;
|
||||
use log::error;
|
||||
use non_blank_string_rs::NonBlankString;
|
||||
use serde::Serialize;
|
||||
use std::str::FromStr;
|
||||
@@ -50,20 +49,21 @@ impl<T: Topology + K8sclient + HelmCommand> Interpret<T> for ArgoInterpret {
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
error!("Uncomment below, only disabled for debugging");
|
||||
self.score
|
||||
.create_interpret()
|
||||
.execute(inventory, topology)
|
||||
.await?;
|
||||
self.score.interpret(inventory, topology).await?;
|
||||
|
||||
let k8s_client = topology.k8s_client().await?;
|
||||
k8s_client
|
||||
.apply_yaml_many(&self.argo_apps.iter().map(|a| a.to_yaml()).collect(), None)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
Ok(Outcome::success(format!(
|
||||
"Successfully installed ArgoCD and {} Applications",
|
||||
self.argo_apps.len()
|
||||
"ArgoCD installed with {} {}",
|
||||
self.argo_apps.len(),
|
||||
match self.argo_apps.len() {
|
||||
1 => "application",
|
||||
_ => "applications",
|
||||
}
|
||||
)))
|
||||
}
|
||||
|
||||
@@ -986,7 +986,7 @@ commitServer:
|
||||
);
|
||||
|
||||
HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str(&namespace).unwrap()),
|
||||
namespace: Some(NonBlankString::from_str(namespace).unwrap()),
|
||||
release_name: NonBlankString::from_str("argo-cd").unwrap(),
|
||||
chart_name: NonBlankString::from_str("argo/argo-cd").unwrap(),
|
||||
chart_version: Some(NonBlankString::from_str("8.1.2").unwrap()),
|
||||
|
||||
@@ -1,51 +1,67 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use base64::{Engine as _, engine::general_purpose};
|
||||
use log::{debug, info};
|
||||
use crate::modules::application::{Application, ApplicationFeature};
|
||||
use crate::modules::monitoring::application_monitoring::application_monitoring_score::ApplicationMonitoringScore;
|
||||
use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::CRDPrometheus;
|
||||
|
||||
use crate::topology::MultiTargetTopology;
|
||||
use crate::{
|
||||
inventory::Inventory,
|
||||
modules::{
|
||||
application::{ApplicationFeature, OCICompliant},
|
||||
monitoring::{
|
||||
alert_channel::webhook_receiver::WebhookReceiver,
|
||||
kube_prometheus::{
|
||||
helm_prometheus_alert_score::HelmPrometheusAlertingScore,
|
||||
types::{NamespaceSelector, ServiceMonitor},
|
||||
},
|
||||
ntfy::ntfy::NtfyScore,
|
||||
},
|
||||
modules::monitoring::{
|
||||
alert_channel::webhook_receiver::WebhookReceiver, ntfy::ntfy::NtfyScore,
|
||||
},
|
||||
score::Score,
|
||||
topology::{HelmCommand, K8sclient, Topology, Url, tenant::TenantManager},
|
||||
};
|
||||
use crate::{
|
||||
modules::prometheus::prometheus::PrometheusApplicationMonitoring,
|
||||
topology::oberservability::monitoring::AlertReceiver,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use base64::{Engine as _, engine::general_purpose};
|
||||
use log::{debug, info};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Monitoring {
|
||||
pub application: Arc<dyn OCICompliant>,
|
||||
pub application: Arc<dyn Application>,
|
||||
pub alert_receiver: Vec<Box<dyn AlertReceiver<CRDPrometheus>>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + HelmCommand + K8sclient + 'static + TenantManager> ApplicationFeature<T>
|
||||
for Monitoring
|
||||
impl<
|
||||
T: Topology
|
||||
+ HelmCommand
|
||||
+ 'static
|
||||
+ TenantManager
|
||||
+ K8sclient
|
||||
+ MultiTargetTopology
|
||||
+ std::fmt::Debug
|
||||
+ PrometheusApplicationMonitoring<CRDPrometheus>,
|
||||
> ApplicationFeature<T> for Monitoring
|
||||
{
|
||||
async fn ensure_installed(&self, topology: &T) -> Result<(), String> {
|
||||
info!("Ensuring monitoring is available for application");
|
||||
|
||||
let ntfy = NtfyScore {
|
||||
// namespace: topology
|
||||
// .get_tenant_config()
|
||||
// .await
|
||||
// .expect("couldn't get tenant config")
|
||||
// .name,
|
||||
namespace: self.application.name(),
|
||||
host: "localhost".to_string(),
|
||||
};
|
||||
ntfy.create_interpret()
|
||||
.execute(&Inventory::empty(), topology)
|
||||
let namespace = topology
|
||||
.get_tenant_config()
|
||||
.await
|
||||
.expect("couldn't create interpret for ntfy");
|
||||
.map(|ns| ns.name.clone())
|
||||
.unwrap_or_else(|| self.application.name());
|
||||
|
||||
let mut alerting_score = ApplicationMonitoringScore {
|
||||
sender: CRDPrometheus {
|
||||
namespace: namespace.clone(),
|
||||
client: topology.k8s_client().await.unwrap(),
|
||||
},
|
||||
application: self.application.clone(),
|
||||
receivers: self.alert_receiver.clone(),
|
||||
};
|
||||
let ntfy = NtfyScore {
|
||||
namespace: namespace.clone(),
|
||||
host: "ntfy.harmonydemo.apps.ncd0.harmony.mcd".to_string(),
|
||||
};
|
||||
ntfy.interpret(&Inventory::empty(), topology)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
let ntfy_default_auth_username = "harmony";
|
||||
let ntfy_default_auth_password = "harmony";
|
||||
@@ -70,7 +86,7 @@ impl<T: Topology + HelmCommand + K8sclient + 'static + TenantManager> Applicatio
|
||||
url::Url::parse(
|
||||
format!(
|
||||
"http://ntfy.{}.svc.cluster.local/rust-web-app?auth={ntfy_default_auth_param}",
|
||||
self.application.name()
|
||||
namespace.clone()
|
||||
)
|
||||
.as_str(),
|
||||
)
|
||||
@@ -78,31 +94,11 @@ impl<T: Topology + HelmCommand + K8sclient + 'static + TenantManager> Applicatio
|
||||
),
|
||||
};
|
||||
|
||||
let mut service_monitor = ServiceMonitor::default();
|
||||
service_monitor.namespace_selector = Some(NamespaceSelector {
|
||||
any: true,
|
||||
match_names: vec![],
|
||||
});
|
||||
|
||||
service_monitor.name = "rust-webapp".to_string();
|
||||
|
||||
// let alerting_score = ApplicationPrometheusMonitoringScore {
|
||||
// receivers: vec![Box::new(ntfy_receiver)],
|
||||
// rules: vec![],
|
||||
// service_monitors: vec![service_monitor],
|
||||
// };
|
||||
|
||||
let alerting_score = HelmPrometheusAlertingScore {
|
||||
receivers: vec![Box::new(ntfy_receiver)],
|
||||
rules: vec![],
|
||||
service_monitors: vec![service_monitor],
|
||||
};
|
||||
|
||||
alerting_score.receivers.push(Box::new(ntfy_receiver));
|
||||
alerting_score
|
||||
.create_interpret()
|
||||
.execute(&Inventory::empty(), topology)
|
||||
.interpret(&Inventory::empty(), topology)
|
||||
.await
|
||||
.unwrap();
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(())
|
||||
}
|
||||
fn name(&self) -> String {
|
||||
|
||||
@@ -10,14 +10,23 @@ pub use oci::*;
|
||||
pub use rust::*;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
data::{Id, Version},
|
||||
instrumentation::{self, HarmonyEvent},
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
topology::Topology,
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum ApplicationFeatureStatus {
|
||||
Installing,
|
||||
Installed,
|
||||
Failed { details: String },
|
||||
}
|
||||
|
||||
pub trait Application: std::fmt::Debug + Send + Sync {
|
||||
fn name(&self) -> String;
|
||||
}
|
||||
@@ -46,20 +55,41 @@ impl<A: Application, T: Topology + std::fmt::Debug> Interpret<T> for Application
|
||||
.join(", ")
|
||||
);
|
||||
for feature in self.features.iter() {
|
||||
debug!(
|
||||
"Installing feature {} for application {app_name}",
|
||||
feature.name()
|
||||
);
|
||||
instrumentation::instrument(HarmonyEvent::ApplicationFeatureStateChanged {
|
||||
topology: topology.name().into(),
|
||||
application: self.application.name(),
|
||||
feature: feature.name(),
|
||||
status: ApplicationFeatureStatus::Installing,
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let _ = match feature.ensure_installed(topology).await {
|
||||
Ok(()) => (),
|
||||
Ok(()) => {
|
||||
instrumentation::instrument(HarmonyEvent::ApplicationFeatureStateChanged {
|
||||
topology: topology.name().into(),
|
||||
application: self.application.name(),
|
||||
feature: feature.name(),
|
||||
status: ApplicationFeatureStatus::Installed,
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
Err(msg) => {
|
||||
instrumentation::instrument(HarmonyEvent::ApplicationFeatureStateChanged {
|
||||
topology: topology.name().into(),
|
||||
application: self.application.name(),
|
||||
feature: feature.name(),
|
||||
status: ApplicationFeatureStatus::Failed {
|
||||
details: msg.clone(),
|
||||
},
|
||||
})
|
||||
.unwrap();
|
||||
return Err(InterpretError::new(format!(
|
||||
"Application Interpret failed to install feature : {msg}"
|
||||
)));
|
||||
}
|
||||
};
|
||||
}
|
||||
Ok(Outcome::success("successfully created app".to_string()))
|
||||
Ok(Outcome::success("Application created".to_string()))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
@@ -78,3 +108,12 @@ impl<A: Application, T: Topology + std::fmt::Debug> Interpret<T> for Application
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for dyn Application {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -10,7 +10,7 @@ use dockerfile_builder::Dockerfile;
|
||||
use dockerfile_builder::instruction::{CMD, COPY, ENV, EXPOSE, FROM, RUN, USER, WORKDIR};
|
||||
use dockerfile_builder::instruction_builder::CopyBuilder;
|
||||
use futures_util::StreamExt;
|
||||
use log::{debug, error, log_enabled};
|
||||
use log::{debug, info, log_enabled};
|
||||
use serde::Serialize;
|
||||
use tar::Archive;
|
||||
|
||||
@@ -46,7 +46,7 @@ where
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
format!("Application: {}", self.application.name())
|
||||
format!("{} [ApplicationScore]", self.application.name())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,19 +73,19 @@ impl Application for RustWebapp {
|
||||
#[async_trait]
|
||||
impl HelmPackage for RustWebapp {
|
||||
async fn build_push_helm_package(&self, image_url: &str) -> Result<String, String> {
|
||||
debug!("Starting Helm chart build and push for '{}'", self.name);
|
||||
info!("Starting Helm chart build and push for '{}'", self.name);
|
||||
|
||||
// 1. Create the Helm chart files on disk.
|
||||
let chart_dir = self
|
||||
.create_helm_chart_files(image_url)
|
||||
.map_err(|e| format!("Failed to create Helm chart files: {}", e))?;
|
||||
debug!("Successfully created Helm chart files in {:?}", chart_dir);
|
||||
info!("Successfully created Helm chart files in {:?}", chart_dir);
|
||||
|
||||
// 2. Package the chart into a .tgz archive.
|
||||
let packaged_chart_path = self
|
||||
.package_helm_chart(&chart_dir)
|
||||
.map_err(|e| format!("Failed to package Helm chart: {}", e))?;
|
||||
debug!(
|
||||
info!(
|
||||
"Successfully packaged Helm chart: {}",
|
||||
packaged_chart_path.to_string_lossy()
|
||||
);
|
||||
@@ -94,7 +94,7 @@ impl HelmPackage for RustWebapp {
|
||||
let oci_chart_url = self
|
||||
.push_helm_chart(&packaged_chart_path)
|
||||
.map_err(|e| format!("Failed to push Helm chart: {}", e))?;
|
||||
debug!("Successfully pushed Helm chart to: {}", oci_chart_url);
|
||||
info!("Successfully pushed Helm chart to: {}", oci_chart_url);
|
||||
|
||||
Ok(oci_chart_url)
|
||||
}
|
||||
@@ -107,20 +107,20 @@ impl OCICompliant for RustWebapp {
|
||||
async fn build_push_oci_image(&self) -> Result<String, String> {
|
||||
// This function orchestrates the build and push process.
|
||||
// It's async to match the trait definition, though the underlying docker commands are blocking.
|
||||
debug!("Starting OCI image build and push for '{}'", self.name);
|
||||
info!("Starting OCI image build and push for '{}'", self.name);
|
||||
|
||||
// 1. Build the image by calling the synchronous helper function.
|
||||
let image_tag = self.image_name();
|
||||
self.build_docker_image(&image_tag)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to build Docker image: {}", e))?;
|
||||
debug!("Successfully built Docker image: {}", image_tag);
|
||||
info!("Successfully built Docker image: {}", image_tag);
|
||||
|
||||
// 2. Push the image to the registry.
|
||||
self.push_docker_image(&image_tag)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to push Docker image: {}", e))?;
|
||||
debug!("Successfully pushed Docker image to: {}", image_tag);
|
||||
info!("Successfully pushed Docker image to: {}", image_tag);
|
||||
|
||||
Ok(image_tag)
|
||||
}
|
||||
@@ -174,7 +174,7 @@ impl RustWebapp {
|
||||
.platform("linux/x86_64");
|
||||
|
||||
let mut temp_tar_builder = tar::Builder::new(Vec::new());
|
||||
let _ = temp_tar_builder
|
||||
temp_tar_builder
|
||||
.append_dir_all("", self.project_root.clone())
|
||||
.unwrap();
|
||||
let archive = temp_tar_builder
|
||||
@@ -195,7 +195,7 @@ impl RustWebapp {
|
||||
);
|
||||
|
||||
while let Some(msg) = image_build_stream.next().await {
|
||||
println!("Message: {msg:?}");
|
||||
debug!("Message: {msg:?}");
|
||||
}
|
||||
|
||||
Ok(image_name.to_string())
|
||||
@@ -219,7 +219,7 @@ impl RustWebapp {
|
||||
);
|
||||
|
||||
while let Some(msg) = push_image_stream.next().await {
|
||||
println!("Message: {msg:?}");
|
||||
debug!("Message: {msg:?}");
|
||||
}
|
||||
|
||||
Ok(image_tag.to_string())
|
||||
@@ -288,9 +288,8 @@ impl RustWebapp {
|
||||
.unwrap(),
|
||||
);
|
||||
// Copy the compiled binary from the builder stage.
|
||||
error!(
|
||||
"FIXME Should not be using score name here, instead should use name from Cargo.toml"
|
||||
);
|
||||
// TODO: Should not be using score name here, instead should use name from Cargo.toml
|
||||
// https://git.nationtech.io/NationTech/harmony/issues/105
|
||||
let binary_path_in_builder = format!("/app/target/release/{}", self.name);
|
||||
let binary_path_in_final = format!("/home/appuser/{}", self.name);
|
||||
dockerfile.push(
|
||||
@@ -328,9 +327,8 @@ impl RustWebapp {
|
||||
));
|
||||
|
||||
// Copy only the compiled binary from the builder stage.
|
||||
error!(
|
||||
"FIXME Should not be using score name here, instead should use name from Cargo.toml"
|
||||
);
|
||||
// TODO: Should not be using score name here, instead should use name from Cargo.toml
|
||||
// https://git.nationtech.io/NationTech/harmony/issues/105
|
||||
let binary_path_in_builder = format!("/app/target/release/{}", self.name);
|
||||
let binary_path_in_final = format!("/usr/local/bin/{}", self.name);
|
||||
dockerfile.push(
|
||||
@@ -530,10 +528,7 @@ spec:
|
||||
}
|
||||
|
||||
/// Packages a Helm chart directory into a .tgz file.
|
||||
fn package_helm_chart(
|
||||
&self,
|
||||
chart_dir: &PathBuf,
|
||||
) -> Result<PathBuf, Box<dyn std::error::Error>> {
|
||||
fn package_helm_chart(&self, chart_dir: &Path) -> Result<PathBuf, Box<dyn std::error::Error>> {
|
||||
let chart_dirname = chart_dir.file_name().expect("Should find a chart dirname");
|
||||
debug!(
|
||||
"Launching `helm package {}` cli with CWD {}",
|
||||
@@ -546,14 +541,13 @@ spec:
|
||||
);
|
||||
let output = process::Command::new("helm")
|
||||
.args(["package", chart_dirname.to_str().unwrap()])
|
||||
.current_dir(&self.project_root.join(".harmony_generated").join("helm")) // Run package from the parent dir
|
||||
.current_dir(self.project_root.join(".harmony_generated").join("helm")) // Run package from the parent dir
|
||||
.output()?;
|
||||
|
||||
self.check_output(&output, "Failed to package Helm chart")?;
|
||||
|
||||
// Helm prints the path of the created chart to stdout.
|
||||
let tgz_name = String::from_utf8(output.stdout)?
|
||||
.trim()
|
||||
.split_whitespace()
|
||||
.last()
|
||||
.unwrap_or_default()
|
||||
@@ -573,7 +567,7 @@ spec:
|
||||
/// Pushes a packaged Helm chart to an OCI registry.
|
||||
fn push_helm_chart(
|
||||
&self,
|
||||
packaged_chart_path: &PathBuf,
|
||||
packaged_chart_path: &Path,
|
||||
) -> Result<String, Box<dyn std::error::Error>> {
|
||||
// The chart name is the file stem of the .tgz file
|
||||
let chart_file_name = packaged_chart_path.file_stem().unwrap().to_str().unwrap();
|
||||
|
||||
@@ -41,6 +41,6 @@ impl<T: Topology + HelmCommand> Score<T> for CertManagerHelmScore {
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
format!("CertManagerHelmScore")
|
||||
"CertManagerHelmScore".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -111,7 +111,7 @@ impl DhcpInterpret {
|
||||
|
||||
let boot_filename_outcome = match &self.score.boot_filename {
|
||||
Some(boot_filename) => {
|
||||
dhcp_server.set_boot_filename(&boot_filename).await?;
|
||||
dhcp_server.set_boot_filename(boot_filename).await?;
|
||||
Outcome::new(
|
||||
InterpretStatus::SUCCESS,
|
||||
format!("Dhcp Interpret Set boot filename to {boot_filename}"),
|
||||
@@ -122,7 +122,7 @@ impl DhcpInterpret {
|
||||
|
||||
let filename_outcome = match &self.score.filename {
|
||||
Some(filename) => {
|
||||
dhcp_server.set_filename(&filename).await?;
|
||||
dhcp_server.set_filename(filename).await?;
|
||||
Outcome::new(
|
||||
InterpretStatus::SUCCESS,
|
||||
format!("Dhcp Interpret Set filename to {filename}"),
|
||||
@@ -133,7 +133,7 @@ impl DhcpInterpret {
|
||||
|
||||
let filename64_outcome = match &self.score.filename64 {
|
||||
Some(filename64) => {
|
||||
dhcp_server.set_filename64(&filename64).await?;
|
||||
dhcp_server.set_filename64(filename64).await?;
|
||||
Outcome::new(
|
||||
InterpretStatus::SUCCESS,
|
||||
format!("Dhcp Interpret Set filename64 to {filename64}"),
|
||||
@@ -144,7 +144,7 @@ impl DhcpInterpret {
|
||||
|
||||
let filenameipxe_outcome = match &self.score.filenameipxe {
|
||||
Some(filenameipxe) => {
|
||||
dhcp_server.set_filenameipxe(&filenameipxe).await?;
|
||||
dhcp_server.set_filenameipxe(filenameipxe).await?;
|
||||
Outcome::new(
|
||||
InterpretStatus::SUCCESS,
|
||||
format!("Dhcp Interpret Set filenameipxe to {filenameipxe}"),
|
||||
@@ -209,7 +209,7 @@ impl<T: DhcpServer> Interpret<T> for DhcpInterpret {
|
||||
|
||||
Ok(Outcome::new(
|
||||
InterpretStatus::SUCCESS,
|
||||
format!("Dhcp Interpret execution successful"),
|
||||
"Dhcp Interpret execution successful".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -112,7 +112,7 @@ impl<T: Topology + DnsServer> Interpret<T> for DnsInterpret {
|
||||
|
||||
Ok(Outcome::new(
|
||||
InterpretStatus::SUCCESS,
|
||||
format!("Dns Interpret execution successful"),
|
||||
"Dns Interpret execution successful".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -55,7 +55,7 @@ impl<T: Topology + HelmCommand> Score<T> for HelmChartScore {
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
format!("{} {} HelmChartScore", self.release_name, self.chart_name)
|
||||
format!("{} [HelmChartScore]", self.release_name)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -90,14 +90,10 @@ impl HelmChartInterpret {
|
||||
);
|
||||
|
||||
match add_output.status.success() {
|
||||
true => {
|
||||
return Ok(());
|
||||
}
|
||||
false => {
|
||||
return Err(InterpretError::new(format!(
|
||||
true => Ok(()),
|
||||
false => Err(InterpretError::new(format!(
|
||||
"Failed to add helm repository!\n{full_output}"
|
||||
)));
|
||||
}
|
||||
))),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -212,7 +208,7 @@ impl<T: Topology + HelmCommand> Interpret<T> for HelmChartInterpret {
|
||||
}
|
||||
|
||||
let res = helm_executor.install_or_upgrade(
|
||||
&ns,
|
||||
ns,
|
||||
&self.score.release_name,
|
||||
&self.score.chart_name,
|
||||
self.score.chart_version.as_ref(),
|
||||
@@ -229,24 +225,27 @@ impl<T: Topology + HelmCommand> Interpret<T> for HelmChartInterpret {
|
||||
match status {
|
||||
helm_wrapper_rs::HelmDeployStatus::Deployed => Ok(Outcome::new(
|
||||
InterpretStatus::SUCCESS,
|
||||
"Helm Chart deployed".to_string(),
|
||||
format!("Helm Chart {} deployed", self.score.release_name),
|
||||
)),
|
||||
helm_wrapper_rs::HelmDeployStatus::PendingInstall => Ok(Outcome::new(
|
||||
InterpretStatus::RUNNING,
|
||||
"Helm Chart Pending install".to_string(),
|
||||
format!("Helm Chart {} pending install...", self.score.release_name),
|
||||
)),
|
||||
helm_wrapper_rs::HelmDeployStatus::PendingUpgrade => Ok(Outcome::new(
|
||||
InterpretStatus::RUNNING,
|
||||
"Helm Chart pending upgrade".to_string(),
|
||||
)),
|
||||
helm_wrapper_rs::HelmDeployStatus::Failed => Err(InterpretError::new(
|
||||
"Failed to install helm chart".to_string(),
|
||||
format!("Helm Chart {} pending upgrade...", self.score.release_name),
|
||||
)),
|
||||
helm_wrapper_rs::HelmDeployStatus::Failed => Err(InterpretError::new(format!(
|
||||
"Helm Chart {} installation failed",
|
||||
self.score.release_name
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
todo!()
|
||||
InterpretName::HelmChart
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
@@ -77,14 +77,11 @@ impl HelmCommandExecutor {
|
||||
)?;
|
||||
}
|
||||
|
||||
let out = match self.clone().run_command(
|
||||
let out = self.clone().run_command(
|
||||
self.chart
|
||||
.clone()
|
||||
.helm_args(self.globals.chart_home.clone().unwrap()),
|
||||
) {
|
||||
Ok(out) => out,
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
)?;
|
||||
|
||||
// TODO: don't use unwrap here
|
||||
let s = String::from_utf8(out.stdout).unwrap();
|
||||
@@ -98,14 +95,11 @@ impl HelmCommandExecutor {
|
||||
}
|
||||
|
||||
pub fn version(self) -> Result<String, std::io::Error> {
|
||||
let out = match self.run_command(vec![
|
||||
let out = self.run_command(vec![
|
||||
"version".to_string(),
|
||||
"-c".to_string(),
|
||||
"--short".to_string(),
|
||||
]) {
|
||||
Ok(out) => out,
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
])?;
|
||||
|
||||
// TODO: don't use unwrap
|
||||
Ok(String::from_utf8(out.stdout).unwrap())
|
||||
@@ -129,15 +123,11 @@ impl HelmCommandExecutor {
|
||||
None => PathBuf::from(TempDir::new()?.path()),
|
||||
};
|
||||
|
||||
match self.chart.values_inline {
|
||||
Some(yaml_str) => {
|
||||
let tf: TempFile;
|
||||
tf = temp_file::with_contents(yaml_str.as_bytes());
|
||||
if let Some(yaml_str) = self.chart.values_inline {
|
||||
let tf: TempFile = temp_file::with_contents(yaml_str.as_bytes());
|
||||
self.chart
|
||||
.additional_values_files
|
||||
.push(PathBuf::from(tf.path()));
|
||||
}
|
||||
None => (),
|
||||
};
|
||||
|
||||
self.env.insert(
|
||||
@@ -180,9 +170,9 @@ impl HelmChart {
|
||||
match self.repo {
|
||||
Some(r) => {
|
||||
if r.starts_with("oci://") {
|
||||
args.push(String::from(
|
||||
args.push(
|
||||
r.trim_end_matches("/").to_string() + "/" + self.name.clone().as_str(),
|
||||
));
|
||||
);
|
||||
} else {
|
||||
args.push("--repo".to_string());
|
||||
args.push(r.to_string());
|
||||
@@ -193,13 +183,10 @@ impl HelmChart {
|
||||
None => args.push(self.name),
|
||||
};
|
||||
|
||||
match self.version {
|
||||
Some(v) => {
|
||||
if let Some(v) = self.version {
|
||||
args.push("--version".to_string());
|
||||
args.push(v.to_string());
|
||||
}
|
||||
None => (),
|
||||
}
|
||||
|
||||
args
|
||||
}
|
||||
@@ -362,7 +349,7 @@ impl<T: Topology + K8sclient + HelmCommand> Interpret<T> for HelmChartInterpretV
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
todo!()
|
||||
InterpretName::HelmCommand
|
||||
}
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use log::{debug, info};
|
||||
use log::debug;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
config::HARMONY_DATA_DIR,
|
||||
data::{Id, Version},
|
||||
instrumentation::{self, HarmonyEvent},
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
@@ -30,14 +29,14 @@ impl Default for K3DInstallationScore {
|
||||
}
|
||||
|
||||
impl<T: Topology> Score<T> for K3DInstallationScore {
|
||||
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(K3dInstallationInterpret {
|
||||
score: self.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
todo!()
|
||||
"K3dInstallationScore".into()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,20 +50,14 @@ impl<T: Topology> Interpret<T> for K3dInstallationInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
_topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
instrumentation::instrument(HarmonyEvent::InterpretExecutionStarted {
|
||||
topology: topology.name().into(),
|
||||
interpret: "k3d-installation".into(),
|
||||
message: "installing k3d...".into(),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let k3d = k3d_rs::K3d::new(
|
||||
self.score.installation_path.clone(),
|
||||
Some(self.score.cluster_name.clone()),
|
||||
);
|
||||
let outcome = match k3d.ensure_installed().await {
|
||||
|
||||
match k3d.ensure_installed().await {
|
||||
Ok(_client) => {
|
||||
let msg = format!("k3d cluster '{}' installed ", self.score.cluster_name);
|
||||
debug!("{msg}");
|
||||
@@ -73,16 +66,7 @@ impl<T: Topology> Interpret<T> for K3dInstallationInterpret {
|
||||
Err(msg) => Err(InterpretError::new(format!(
|
||||
"failed to ensure k3d is installed : {msg}"
|
||||
))),
|
||||
};
|
||||
|
||||
instrumentation::instrument(HarmonyEvent::InterpretExecutionFinished {
|
||||
topology: topology.name().into(),
|
||||
interpret: "k3d-installation".into(),
|
||||
outcome: outcome.clone(),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
outcome
|
||||
}
|
||||
}
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::K3dInstallation
|
||||
|
||||
@@ -89,7 +89,7 @@ where
|
||||
))
|
||||
}
|
||||
fn get_name(&self) -> InterpretName {
|
||||
todo!()
|
||||
InterpretName::K8sResource
|
||||
}
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
|
||||
@@ -128,13 +128,12 @@ impl<T: Topology + K8sclient + HelmCommand> Interpret<T> for LAMPInterpret {
|
||||
|
||||
info!("Deploying score {deployment_score:#?}");
|
||||
|
||||
deployment_score
|
||||
.create_interpret()
|
||||
.execute(inventory, topology)
|
||||
.await?;
|
||||
deployment_score.interpret(inventory, topology).await?;
|
||||
|
||||
info!("LAMP deployment_score {deployment_score:?}");
|
||||
|
||||
let ingress_path = ingress_path!("/");
|
||||
|
||||
let lamp_ingress = K8sIngressScore {
|
||||
name: fqdn!("lamp-ingress"),
|
||||
host: fqdn!("test"),
|
||||
@@ -144,17 +143,14 @@ impl<T: Topology + K8sclient + HelmCommand> Interpret<T> for LAMPInterpret {
|
||||
.as_str()
|
||||
),
|
||||
port: 8080,
|
||||
path: Some(ingress_path!("/")),
|
||||
path: Some(ingress_path),
|
||||
path_type: None,
|
||||
namespace: self
|
||||
.get_namespace()
|
||||
.map(|nbs| fqdn!(nbs.to_string().as_str())),
|
||||
};
|
||||
|
||||
lamp_ingress
|
||||
.create_interpret()
|
||||
.execute(inventory, topology)
|
||||
.await?;
|
||||
lamp_ingress.interpret(inventory, topology).await?;
|
||||
|
||||
info!("LAMP lamp_ingress {lamp_ingress:?}");
|
||||
|
||||
@@ -164,7 +160,7 @@ impl<T: Topology + K8sclient + HelmCommand> Interpret<T> for LAMPInterpret {
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
todo!()
|
||||
InterpretName::Lamp
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
@@ -213,7 +209,7 @@ impl LAMPInterpret {
|
||||
repository: None,
|
||||
};
|
||||
|
||||
score.create_interpret().execute(inventory, topology).await
|
||||
score.interpret(inventory, topology).await
|
||||
}
|
||||
fn build_dockerfile(&self, score: &LAMPScore) -> Result<PathBuf, Box<dyn std::error::Error>> {
|
||||
let mut dockerfile = Dockerfile::new();
|
||||
|
||||
@@ -14,5 +14,6 @@ pub mod monitoring;
|
||||
pub mod okd;
|
||||
pub mod opnsense;
|
||||
pub mod prometheus;
|
||||
pub mod storage;
|
||||
pub mod tenant;
|
||||
pub mod tftp;
|
||||
|
||||
@@ -1,7 +1,16 @@
|
||||
use std::any::Any;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use k8s_openapi::api::core::v1::Secret;
|
||||
use kube::api::ObjectMeta;
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
use serde_yaml::{Mapping, Value};
|
||||
|
||||
use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::{
|
||||
AlertmanagerConfig, AlertmanagerConfigSpec, CRDPrometheus,
|
||||
};
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
modules::monitoring::{
|
||||
@@ -20,14 +29,98 @@ pub struct DiscordWebhook {
|
||||
pub url: Url,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<CRDPrometheus> for DiscordWebhook {
|
||||
async fn install(&self, sender: &CRDPrometheus) -> Result<Outcome, InterpretError> {
|
||||
let ns = sender.namespace.clone();
|
||||
let secret_name = format!("{}-secret", self.name.clone());
|
||||
let webhook_key = format!("{}", self.url.clone());
|
||||
|
||||
let mut string_data = BTreeMap::new();
|
||||
string_data.insert("webhook-url".to_string(), webhook_key.clone());
|
||||
|
||||
let secret = Secret {
|
||||
metadata: kube::core::ObjectMeta {
|
||||
name: Some(secret_name.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
string_data: Some(string_data),
|
||||
type_: Some("Opaque".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let _ = sender.client.apply(&secret, Some(&ns)).await;
|
||||
|
||||
let spec = AlertmanagerConfigSpec {
|
||||
data: json!({
|
||||
"route": {
|
||||
"receiver": self.name,
|
||||
},
|
||||
"receivers": [
|
||||
{
|
||||
"name": self.name,
|
||||
"discordConfigs": [
|
||||
{
|
||||
"apiURL": {
|
||||
"name": secret_name,
|
||||
"key": "webhook-url",
|
||||
},
|
||||
"title": "{{ template \"discord.default.title\" . }}",
|
||||
"message": "{{ template \"discord.default.message\" . }}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}),
|
||||
};
|
||||
|
||||
let alertmanager_configs = AlertmanagerConfig {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.name.clone()),
|
||||
labels: Some(std::collections::BTreeMap::from([(
|
||||
"alertmanagerConfig".to_string(),
|
||||
"enabled".to_string(),
|
||||
)])),
|
||||
namespace: Some(ns),
|
||||
..Default::default()
|
||||
},
|
||||
spec,
|
||||
};
|
||||
|
||||
sender
|
||||
.client
|
||||
.apply(&alertmanager_configs, Some(&sender.namespace))
|
||||
.await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"installed crd-alertmanagerconfigs for {}",
|
||||
self.name
|
||||
)))
|
||||
}
|
||||
fn name(&self) -> String {
|
||||
"discord-webhook".to_string()
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<CRDPrometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<Prometheus> for DiscordWebhook {
|
||||
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_receiver(self).await
|
||||
}
|
||||
fn name(&self) -> String {
|
||||
"discord-webhook".to_string()
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<Prometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -48,6 +141,12 @@ impl AlertReceiver<KubePrometheus> for DiscordWebhook {
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<KubePrometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
fn name(&self) -> String {
|
||||
"discord-webhook".to_string()
|
||||
}
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
|
||||
@@ -1,11 +1,19 @@
|
||||
use std::any::Any;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use kube::api::ObjectMeta;
|
||||
use log::debug;
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
use serde_yaml::{Mapping, Value};
|
||||
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
modules::monitoring::{
|
||||
kube_prometheus::{
|
||||
crd::crd_alertmanager_config::{
|
||||
AlertmanagerConfig, AlertmanagerConfigSpec, CRDPrometheus,
|
||||
},
|
||||
prometheus::{KubePrometheus, KubePrometheusReceiver},
|
||||
types::{AlertChannelConfig, AlertManagerChannelConfig},
|
||||
},
|
||||
@@ -20,14 +28,81 @@ pub struct WebhookReceiver {
|
||||
pub url: Url,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<CRDPrometheus> for WebhookReceiver {
|
||||
async fn install(&self, sender: &CRDPrometheus) -> Result<Outcome, InterpretError> {
|
||||
let spec = AlertmanagerConfigSpec {
|
||||
data: json!({
|
||||
"route": {
|
||||
"receiver": self.name,
|
||||
},
|
||||
"receivers": [
|
||||
{
|
||||
"name": self.name,
|
||||
"webhookConfigs": [
|
||||
{
|
||||
"url": self.url,
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}),
|
||||
};
|
||||
|
||||
let alertmanager_configs = AlertmanagerConfig {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.name.clone()),
|
||||
labels: Some(std::collections::BTreeMap::from([(
|
||||
"alertmanagerConfig".to_string(),
|
||||
"enabled".to_string(),
|
||||
)])),
|
||||
namespace: Some(sender.namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec,
|
||||
};
|
||||
debug!(
|
||||
"alert manager configs: \n{:#?}",
|
||||
alertmanager_configs.clone()
|
||||
);
|
||||
|
||||
sender
|
||||
.client
|
||||
.apply(&alertmanager_configs, Some(&sender.namespace))
|
||||
.await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"installed crd-alertmanagerconfigs for {}",
|
||||
self.name
|
||||
)))
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"webhook-receiver".to_string()
|
||||
}
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<CRDPrometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<Prometheus> for WebhookReceiver {
|
||||
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_receiver(self).await
|
||||
}
|
||||
fn name(&self) -> String {
|
||||
"webhook-receiver".to_string()
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<Prometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -44,9 +119,15 @@ impl AlertReceiver<KubePrometheus> for WebhookReceiver {
|
||||
async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_receiver(self).await
|
||||
}
|
||||
fn name(&self) -> String {
|
||||
"webhook-receiver".to_string()
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<KubePrometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
|
||||
@@ -18,7 +18,7 @@ use crate::{
|
||||
#[async_trait]
|
||||
impl AlertRule<KubePrometheus> for AlertManagerRuleGroup {
|
||||
async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_rule(&self).await
|
||||
sender.install_rule(self).await
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertRule<KubePrometheus>> {
|
||||
Box::new(self.clone())
|
||||
@@ -28,7 +28,7 @@ impl AlertRule<KubePrometheus> for AlertManagerRuleGroup {
|
||||
#[async_trait]
|
||||
impl AlertRule<Prometheus> for AlertManagerRuleGroup {
|
||||
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_rule(&self).await
|
||||
sender.install_rule(self).await
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertRule<Prometheus>> {
|
||||
Box::new(self.clone())
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
data::{Id, Version},
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::{
|
||||
application::Application,
|
||||
monitoring::kube_prometheus::crd::crd_alertmanager_config::CRDPrometheus,
|
||||
prometheus::prometheus::PrometheusApplicationMonitoring,
|
||||
},
|
||||
score::Score,
|
||||
topology::{PreparationOutcome, Topology, oberservability::monitoring::AlertReceiver},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ApplicationMonitoringScore {
|
||||
pub sender: CRDPrometheus,
|
||||
pub application: Arc<dyn Application>,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<CRDPrometheus>>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + PrometheusApplicationMonitoring<CRDPrometheus>> Score<T>
|
||||
for ApplicationMonitoringScore
|
||||
{
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(ApplicationMonitoringInterpret {
|
||||
score: self.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
format!(
|
||||
"{} monitoring [ApplicationMonitoringScore]",
|
||||
self.application.name()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ApplicationMonitoringInterpret {
|
||||
score: ApplicationMonitoringScore,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + PrometheusApplicationMonitoring<CRDPrometheus>> Interpret<T>
|
||||
for ApplicationMonitoringInterpret
|
||||
{
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let result = topology
|
||||
.install_prometheus(
|
||||
&self.score.sender,
|
||||
inventory,
|
||||
Some(self.score.receivers.clone()),
|
||||
)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(outcome) => match outcome {
|
||||
PreparationOutcome::Success { details: _ } => {
|
||||
Ok(Outcome::success("Prometheus installed".into()))
|
||||
}
|
||||
PreparationOutcome::Noop => Ok(Outcome::noop()),
|
||||
},
|
||||
Err(err) => Err(InterpretError::from(err)),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::ApplicationMonitoring
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
modules::monitoring::{
|
||||
kube_prometheus::types::ServiceMonitor,
|
||||
prometheus::{prometheus::Prometheus, prometheus_config::PrometheusConfig},
|
||||
},
|
||||
score::Score,
|
||||
topology::{
|
||||
HelmCommand, Topology,
|
||||
oberservability::monitoring::{AlertReceiver, AlertRule, AlertingInterpret},
|
||||
tenant::TenantManager,
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct ApplicationPrometheusMonitoringScore {
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>,
|
||||
pub rules: Vec<Box<dyn AlertRule<Prometheus>>>,
|
||||
pub service_monitors: Vec<ServiceMonitor>,
|
||||
}
|
||||
|
||||
impl<T: Topology + HelmCommand + TenantManager> Score<T> for ApplicationPrometheusMonitoringScore {
|
||||
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
|
||||
let mut prom_config = PrometheusConfig::new();
|
||||
prom_config.alert_manager = true;
|
||||
|
||||
let config = Arc::new(Mutex::new(prom_config));
|
||||
config
|
||||
.try_lock()
|
||||
.expect("couldn't lock config")
|
||||
.additional_service_monitors = self.service_monitors.clone();
|
||||
Box::new(AlertingInterpret {
|
||||
sender: Prometheus::new(),
|
||||
receivers: self.receivers.clone(),
|
||||
rules: self.rules.clone(),
|
||||
})
|
||||
}
|
||||
fn name(&self) -> String {
|
||||
"ApplicationPrometheusMonitoringScore".to_string()
|
||||
}
|
||||
}
|
||||
@@ -1 +1 @@
|
||||
pub mod k8s_application_monitoring_score;
|
||||
pub mod application_monitoring_score;
|
||||
|
||||
@@ -4,15 +4,14 @@ use std::str::FromStr;
|
||||
use crate::modules::helm::chart::HelmChartScore;
|
||||
|
||||
pub fn grafana_helm_chart_score(ns: &str) -> HelmChartScore {
|
||||
let values = format!(
|
||||
r#"
|
||||
let values = r#"
|
||||
rbac:
|
||||
namespaced: true
|
||||
sidecar:
|
||||
dashboards:
|
||||
enabled: true
|
||||
"#
|
||||
);
|
||||
.to_string();
|
||||
|
||||
HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str(ns).unwrap()),
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::topology::{
|
||||
k8s::K8sClient,
|
||||
oberservability::monitoring::{AlertReceiver, AlertSender},
|
||||
};
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "monitoring.coreos.com",
|
||||
version = "v1alpha1",
|
||||
kind = "AlertmanagerConfig",
|
||||
plural = "alertmanagerconfigs",
|
||||
namespaced
|
||||
)]
|
||||
pub struct AlertmanagerConfigSpec {
|
||||
#[serde(flatten)]
|
||||
pub data: serde_json::Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct CRDPrometheus {
|
||||
pub namespace: String,
|
||||
pub client: Arc<K8sClient>,
|
||||
}
|
||||
|
||||
impl AlertSender for CRDPrometheus {
|
||||
fn name(&self) -> String {
|
||||
"CRDAlertManager".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Box<dyn AlertReceiver<CRDPrometheus>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertReceiver<CRDPrometheus>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::crd_prometheuses::LabelSelector;
|
||||
|
||||
/// Rust CRD for `Alertmanager` from Prometheus Operator
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "monitoring.coreos.com",
|
||||
version = "v1",
|
||||
kind = "Alertmanager",
|
||||
plural = "alertmanagers",
|
||||
namespaced
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct AlertmanagerSpec {
|
||||
/// Number of replicas for HA
|
||||
pub replicas: i32,
|
||||
|
||||
/// Selectors for AlertmanagerConfig CRDs
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub alertmanager_config_selector: Option<LabelSelector>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub alertmanager_config_namespace_selector: Option<LabelSelector>,
|
||||
|
||||
/// Optional pod template metadata (annotations, labels)
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub pod_metadata: Option<LabelSelector>,
|
||||
|
||||
/// Optional topology spread settings
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub version: Option<String>,
|
||||
}
|
||||
|
||||
impl Default for AlertmanagerSpec {
|
||||
fn default() -> Self {
|
||||
AlertmanagerSpec {
|
||||
replicas: 1,
|
||||
|
||||
// Match all AlertmanagerConfigs in the same namespace
|
||||
alertmanager_config_namespace_selector: None,
|
||||
|
||||
// Empty selector matches all AlertmanagerConfigs in that namespace
|
||||
alertmanager_config_selector: Some(LabelSelector::default()),
|
||||
|
||||
pod_metadata: None,
|
||||
version: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
use crate::modules::prometheus::alerts::k8s::{
|
||||
deployment::alert_deployment_unavailable,
|
||||
pod::{alert_container_restarting, alert_pod_not_ready, pod_failed},
|
||||
pvc::high_pvc_fill_rate_over_two_days,
|
||||
service::alert_service_down,
|
||||
};
|
||||
|
||||
use super::crd_prometheus_rules::Rule;
|
||||
|
||||
pub fn build_default_application_rules() -> Vec<Rule> {
|
||||
let pod_failed: Rule = pod_failed().into();
|
||||
let container_restarting: Rule = alert_container_restarting().into();
|
||||
let pod_not_ready: Rule = alert_pod_not_ready().into();
|
||||
let service_down: Rule = alert_service_down().into();
|
||||
let deployment_unavailable: Rule = alert_deployment_unavailable().into();
|
||||
let high_pvc_fill_rate: Rule = high_pvc_fill_rate_over_two_days().into();
|
||||
vec![
|
||||
pod_failed,
|
||||
container_restarting,
|
||||
pod_not_ready,
|
||||
service_down,
|
||||
deployment_unavailable,
|
||||
high_pvc_fill_rate,
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,153 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::crd_prometheuses::LabelSelector;
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "grafana.integreatly.org",
|
||||
version = "v1beta1",
|
||||
kind = "Grafana",
|
||||
plural = "grafanas",
|
||||
namespaced
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GrafanaSpec {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub config: Option<GrafanaConfig>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub admin_user: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub admin_password: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub ingress: Option<GrafanaIngress>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub persistence: Option<GrafanaPersistence>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub resources: Option<ResourceRequirements>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GrafanaConfig {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub log: Option<GrafanaLogConfig>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub security: Option<GrafanaSecurityConfig>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GrafanaLogConfig {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub mode: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub level: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GrafanaSecurityConfig {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub admin_user: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub admin_password: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GrafanaIngress {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub enabled: Option<bool>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub hosts: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GrafanaPersistence {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub enabled: Option<bool>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub storage_class_name: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub size: Option<String>,
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "grafana.integreatly.org",
|
||||
version = "v1beta1",
|
||||
kind = "GrafanaDashboard",
|
||||
plural = "grafanadashboards",
|
||||
namespaced
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GrafanaDashboardSpec {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub resync_period: Option<String>,
|
||||
|
||||
pub instance_selector: LabelSelector,
|
||||
|
||||
pub json: String,
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "grafana.integreatly.org",
|
||||
version = "v1beta1",
|
||||
kind = "GrafanaDatasource",
|
||||
plural = "grafanadatasources",
|
||||
namespaced
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GrafanaDatasourceSpec {
|
||||
pub instance_selector: LabelSelector,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub allow_cross_namespace_import: Option<bool>,
|
||||
|
||||
pub datasource: GrafanaDatasourceConfig,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GrafanaDatasourceConfig {
|
||||
pub access: String,
|
||||
pub database: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub json_data: Option<BTreeMap<String, String>>,
|
||||
pub name: String,
|
||||
pub r#type: String,
|
||||
pub url: String,
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ResourceRequirements {
|
||||
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
|
||||
pub limits: BTreeMap<String, String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
|
||||
pub requests: BTreeMap<String, String>,
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
|
||||
|
||||
#[derive(CustomResource, Debug, Serialize, Deserialize, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "monitoring.coreos.com",
|
||||
version = "v1",
|
||||
kind = "PrometheusRule",
|
||||
plural = "prometheusrules",
|
||||
namespaced
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PrometheusRuleSpec {
|
||||
pub groups: Vec<RuleGroup>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
|
||||
pub struct RuleGroup {
|
||||
pub name: String,
|
||||
pub rules: Vec<Rule>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Rule {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub alert: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub expr: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub for_: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub labels: Option<std::collections::BTreeMap<String, String>>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub annotations: Option<std::collections::BTreeMap<String, String>>,
|
||||
}
|
||||
|
||||
impl From<PrometheusAlertRule> for Rule {
|
||||
fn from(value: PrometheusAlertRule) -> Self {
|
||||
Rule {
|
||||
alert: Some(value.alert),
|
||||
expr: Some(value.expr),
|
||||
for_: value.r#for,
|
||||
labels: Some(value.labels.into_iter().collect::<BTreeMap<_, _>>()),
|
||||
annotations: Some(value.annotations.into_iter().collect::<BTreeMap<_, _>>()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,118 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::modules::monitoring::kube_prometheus::types::Operator;
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "monitoring.coreos.com",
|
||||
version = "v1",
|
||||
kind = "Prometheus",
|
||||
plural = "prometheuses",
|
||||
namespaced
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PrometheusSpec {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub alerting: Option<PrometheusSpecAlerting>,
|
||||
|
||||
pub service_account_name: String,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub service_monitor_namespace_selector: Option<LabelSelector>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub service_monitor_selector: Option<LabelSelector>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub service_discovery_role: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub pod_monitor_selector: Option<LabelSelector>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub rule_selector: Option<LabelSelector>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub rule_namespace_selector: Option<LabelSelector>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct NamespaceSelector {
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub match_names: Vec<String>,
|
||||
}
|
||||
|
||||
/// Contains alerting configuration, specifically Alertmanager endpoints.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||
pub struct PrometheusSpecAlerting {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub alertmanagers: Option<Vec<AlertmanagerEndpoints>>,
|
||||
}
|
||||
|
||||
/// Represents an Alertmanager endpoint configuration used by Prometheus.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||
pub struct AlertmanagerEndpoints {
|
||||
/// Name of the Alertmanager Service.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub name: Option<String>,
|
||||
|
||||
/// Namespace of the Alertmanager Service.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub namespace: Option<String>,
|
||||
|
||||
/// Port to access on the Alertmanager Service (e.g. "web").
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub port: Option<String>,
|
||||
|
||||
/// Scheme to use for connecting (e.g. "http").
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub scheme: Option<String>,
|
||||
// Other fields like `tls_config`, `path_prefix`, etc., can be added if needed.
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct LabelSelector {
|
||||
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
|
||||
pub match_labels: BTreeMap<String, String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub match_expressions: Vec<LabelSelectorRequirement>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct LabelSelectorRequirement {
|
||||
pub key: String,
|
||||
pub operator: Operator,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub values: Vec<String>,
|
||||
}
|
||||
|
||||
impl Default for PrometheusSpec {
|
||||
fn default() -> Self {
|
||||
PrometheusSpec {
|
||||
alerting: None,
|
||||
|
||||
service_account_name: "prometheus".into(),
|
||||
|
||||
// null means "only my namespace"
|
||||
service_monitor_namespace_selector: None,
|
||||
|
||||
// empty selector means match all ServiceMonitors in that namespace
|
||||
service_monitor_selector: Some(LabelSelector::default()),
|
||||
|
||||
service_discovery_role: Some("Endpoints".into()),
|
||||
|
||||
pod_monitor_selector: None,
|
||||
|
||||
rule_selector: None,
|
||||
|
||||
rule_namespace_selector: Some(LabelSelector::default()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,203 @@
|
||||
pub fn build_default_dashboard(namespace: &str) -> String {
|
||||
let dashboard = format!(
|
||||
r#"{{
|
||||
"annotations": {{
|
||||
"list": []
|
||||
}},
|
||||
"editable": true,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"iteration": 171105,
|
||||
"panels": [
|
||||
{{
|
||||
"datasource": "$datasource",
|
||||
"fieldConfig": {{
|
||||
"defaults": {{
|
||||
"unit": "short"
|
||||
}},
|
||||
"overrides": []
|
||||
}},
|
||||
"gridPos": {{
|
||||
"h": 6,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
}},
|
||||
"id": 1,
|
||||
"options": {{
|
||||
"reduceOptions": {{
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
}}
|
||||
}},
|
||||
"pluginVersion": "9.0.0",
|
||||
"targets": [
|
||||
{{
|
||||
"expr": "sum(kube_pod_status_phase{{namespace=\"{namespace}\", phase=\"Running\"}})",
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}}
|
||||
],
|
||||
"title": "Pods in Namespace",
|
||||
"type": "stat"
|
||||
}},
|
||||
{{
|
||||
"datasource": "$datasource",
|
||||
"fieldConfig": {{
|
||||
"defaults": {{
|
||||
"unit": "short"
|
||||
}},
|
||||
"overrides": []
|
||||
}},
|
||||
"gridPos": {{
|
||||
"h": 6,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 0
|
||||
}},
|
||||
"id": 2,
|
||||
"options": {{
|
||||
"reduceOptions": {{
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
}}
|
||||
}},
|
||||
"pluginVersion": "9.0.0",
|
||||
"targets": [
|
||||
{{
|
||||
"expr": "sum(kube_pod_status_phase{{phase=\"Failed\", namespace=\"{namespace}\"}})",
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}}
|
||||
],
|
||||
"title": "Pods in Failed State",
|
||||
"type": "stat"
|
||||
}},
|
||||
{{
|
||||
"datasource": "$datasource",
|
||||
"fieldConfig": {{
|
||||
"defaults": {{
|
||||
"unit": "percentunit"
|
||||
}},
|
||||
"overrides": []
|
||||
}},
|
||||
"gridPos": {{
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 6
|
||||
}},
|
||||
"id": 3,
|
||||
"options": {{
|
||||
"reduceOptions": {{
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
}}
|
||||
}},
|
||||
"pluginVersion": "9.0.0",
|
||||
"targets": [
|
||||
{{
|
||||
"expr": "sum(kube_deployment_status_replicas_available{{namespace=\"{namespace}\"}}) / sum(kube_deployment_spec_replicas{{namespace=\"{namespace}\"}})",
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}}
|
||||
],
|
||||
"title": "Deployment Health (Available / Desired)",
|
||||
"type": "stat"
|
||||
}},
|
||||
{{
|
||||
"datasource": "$datasource",
|
||||
"fieldConfig": {{
|
||||
"defaults": {{
|
||||
"unit": "short"
|
||||
}},
|
||||
"overrides": []
|
||||
}},
|
||||
"gridPos": {{
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 12
|
||||
}},
|
||||
"id": 4,
|
||||
"options": {{
|
||||
"reduceOptions": {{
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
}}
|
||||
}},
|
||||
"pluginVersion": "9.0.0",
|
||||
"targets": [
|
||||
{{
|
||||
"expr": "sum by(pod) (rate(kube_pod_container_status_restarts_total{{namespace=\"{namespace}\"}}[5m]))",
|
||||
"legendFormat": "{{{{pod}}}}",
|
||||
"refId": "A"
|
||||
}}
|
||||
],
|
||||
"title": "Container Restarts (per pod)",
|
||||
"type": "timeseries"
|
||||
}},
|
||||
{{
|
||||
"datasource": "$datasource",
|
||||
"fieldConfig": {{
|
||||
"defaults": {{
|
||||
"unit": "short"
|
||||
}},
|
||||
"overrides": []
|
||||
}},
|
||||
"gridPos": {{
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 18
|
||||
}},
|
||||
"id": 5,
|
||||
"options": {{
|
||||
"reduceOptions": {{
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
}}
|
||||
}},
|
||||
"pluginVersion": "9.0.0",
|
||||
"targets": [
|
||||
{{
|
||||
"expr": "sum(ALERTS{{alertstate=\"firing\", namespace=\"{namespace}\"}}) or vector(0)",
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}}
|
||||
],
|
||||
"title": "Firing Alerts in Namespace",
|
||||
"type": "stat"
|
||||
}}
|
||||
],
|
||||
"schemaVersion": 36,
|
||||
"templating": {{
|
||||
"list": [
|
||||
{{
|
||||
"name": "datasource",
|
||||
"type": "datasource",
|
||||
"pluginId": "prometheus",
|
||||
"label": "Prometheus",
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"hide": 0,
|
||||
"current": {{
|
||||
"selected": true,
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
}}
|
||||
}}
|
||||
]
|
||||
}},
|
||||
"title": "Tenant Namespace Overview",
|
||||
"version": 1
|
||||
}}"#
|
||||
);
|
||||
dashboard
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use non_blank_string_rs::NonBlankString;
|
||||
|
||||
use crate::modules::helm::chart::HelmChartScore;
|
||||
|
||||
pub fn grafana_operator_helm_chart_score(ns: String) -> HelmChartScore {
|
||||
HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str(&ns).unwrap()),
|
||||
release_name: NonBlankString::from_str("grafana_operator").unwrap(),
|
||||
chart_name: NonBlankString::from_str("oci://ghcr.io/grafana/helm-charts/grafana-operator")
|
||||
.unwrap(),
|
||||
chart_version: None,
|
||||
values_overrides: None,
|
||||
values_yaml: None,
|
||||
create_namespace: true,
|
||||
install_only: true,
|
||||
repository: None,
|
||||
}
|
||||
}
|
||||
11
harmony/src/modules/monitoring/kube_prometheus/crd/mod.rs
Normal file
11
harmony/src/modules/monitoring/kube_prometheus/crd/mod.rs
Normal file
@@ -0,0 +1,11 @@
|
||||
pub mod crd_alertmanager_config;
|
||||
pub mod crd_alertmanagers;
|
||||
pub mod crd_default_rules;
|
||||
pub mod crd_grafana;
|
||||
pub mod crd_prometheus_rules;
|
||||
pub mod crd_prometheuses;
|
||||
pub mod grafana_default_dashboard;
|
||||
pub mod grafana_operator;
|
||||
pub mod prometheus_operator;
|
||||
pub mod role;
|
||||
pub mod service_monitor;
|
||||
@@ -0,0 +1,22 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use non_blank_string_rs::NonBlankString;
|
||||
|
||||
use crate::modules::helm::chart::HelmChartScore;
|
||||
|
||||
pub fn prometheus_operator_helm_chart_score(ns: String) -> HelmChartScore {
|
||||
HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str(&ns).unwrap()),
|
||||
release_name: NonBlankString::from_str("prometheus-operator").unwrap(),
|
||||
chart_name: NonBlankString::from_str(
|
||||
"oci://hub.nationtech.io/harmony/nt-prometheus-operator",
|
||||
)
|
||||
.unwrap(),
|
||||
chart_version: None,
|
||||
values_overrides: None,
|
||||
values_yaml: None,
|
||||
create_namespace: true,
|
||||
install_only: true,
|
||||
repository: None,
|
||||
}
|
||||
}
|
||||
62
harmony/src/modules/monitoring/kube_prometheus/crd/role.rs
Normal file
62
harmony/src/modules/monitoring/kube_prometheus/crd/role.rs
Normal file
@@ -0,0 +1,62 @@
|
||||
use k8s_openapi::api::{
|
||||
core::v1::ServiceAccount,
|
||||
rbac::v1::{PolicyRule, Role, RoleBinding, RoleRef, Subject},
|
||||
};
|
||||
use kube::api::ObjectMeta;
|
||||
|
||||
pub fn build_prom_role(role_name: String, namespace: String) -> Role {
|
||||
Role {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(role_name),
|
||||
namespace: Some(namespace),
|
||||
..Default::default()
|
||||
},
|
||||
rules: Some(vec![PolicyRule {
|
||||
api_groups: Some(vec!["".into()]), // core API group
|
||||
resources: Some(vec!["services".into(), "endpoints".into(), "pods".into()]),
|
||||
verbs: vec!["get".into(), "list".into(), "watch".into()],
|
||||
..Default::default()
|
||||
}]),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_prom_rolebinding(
|
||||
role_name: String,
|
||||
namespace: String,
|
||||
service_account_name: String,
|
||||
) -> RoleBinding {
|
||||
RoleBinding {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(format!("{}-rolebinding", role_name)),
|
||||
namespace: Some(namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
role_ref: RoleRef {
|
||||
api_group: "rbac.authorization.k8s.io".into(),
|
||||
kind: "Role".into(),
|
||||
name: role_name,
|
||||
},
|
||||
subjects: Some(vec![Subject {
|
||||
kind: "ServiceAccount".into(),
|
||||
name: service_account_name,
|
||||
namespace: Some(namespace.clone()),
|
||||
..Default::default()
|
||||
}]),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_prom_service_account(
|
||||
service_account_name: String,
|
||||
namespace: String,
|
||||
) -> ServiceAccount {
|
||||
ServiceAccount {
|
||||
automount_service_account_token: None,
|
||||
image_pull_secrets: None,
|
||||
metadata: ObjectMeta {
|
||||
name: Some(service_account_name),
|
||||
namespace: Some(namespace),
|
||||
..Default::default()
|
||||
},
|
||||
secrets: None,
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,87 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::modules::monitoring::kube_prometheus::types::{
|
||||
HTTPScheme, MatchExpression, NamespaceSelector, Operator, Selector,
|
||||
ServiceMonitor as KubeServiceMonitor, ServiceMonitorEndpoint,
|
||||
};
|
||||
|
||||
/// This is the top-level struct for the ServiceMonitor Custom Resource.
|
||||
/// The `#[derive(CustomResource)]` macro handles all the boilerplate for you,
|
||||
/// including the `impl Resource`.
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "monitoring.coreos.com",
|
||||
version = "v1",
|
||||
kind = "ServiceMonitor",
|
||||
plural = "servicemonitors",
|
||||
namespaced
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ServiceMonitorSpec {
|
||||
/// A label selector to select services to monitor.
|
||||
pub selector: Selector,
|
||||
|
||||
/// A list of endpoints on the selected services to be monitored.
|
||||
pub endpoints: Vec<ServiceMonitorEndpoint>,
|
||||
|
||||
/// Selector to select which namespaces the Kubernetes Endpoints objects
|
||||
/// are discovered from.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub namespace_selector: Option<NamespaceSelector>,
|
||||
|
||||
/// The label to use to retrieve the job name from.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub job_label: Option<String>,
|
||||
|
||||
/// Pod-based target labels to transfer from the Kubernetes Pod onto the target.
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub pod_target_labels: Vec<String>,
|
||||
|
||||
/// TargetLabels transfers labels on the Kubernetes Service object to the target.
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub target_labels: Vec<String>,
|
||||
}
|
||||
|
||||
impl Default for ServiceMonitorSpec {
|
||||
fn default() -> Self {
|
||||
let labels = HashMap::new();
|
||||
Self {
|
||||
selector: Selector {
|
||||
match_labels: { labels },
|
||||
match_expressions: vec![MatchExpression {
|
||||
key: "app.kubernetes.io/name".into(),
|
||||
operator: Operator::Exists,
|
||||
values: vec![],
|
||||
}],
|
||||
},
|
||||
endpoints: vec![ServiceMonitorEndpoint {
|
||||
port: Some("http".to_string()),
|
||||
path: Some("/metrics".into()),
|
||||
interval: Some("30s".into()),
|
||||
scheme: Some(HTTPScheme::HTTP),
|
||||
..Default::default()
|
||||
}],
|
||||
namespace_selector: None, // only the same namespace
|
||||
job_label: Some("app".into()),
|
||||
pod_target_labels: vec![],
|
||||
target_labels: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<KubeServiceMonitor> for ServiceMonitorSpec {
|
||||
fn from(value: KubeServiceMonitor) -> Self {
|
||||
Self {
|
||||
selector: value.selector,
|
||||
endpoints: value.endpoints,
|
||||
namespace_selector: value.namespace_selector,
|
||||
job_label: value.job_label,
|
||||
pod_target_labels: value.pod_target_labels,
|
||||
target_labels: value.target_labels,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -27,6 +27,12 @@ pub struct KubePrometheusConfig {
|
||||
pub alert_rules: Vec<AlertManagerAdditionalPromRules>,
|
||||
pub additional_service_monitors: Vec<ServiceMonitor>,
|
||||
}
|
||||
impl Default for KubePrometheusConfig {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl KubePrometheusConfig {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
@@ -35,7 +41,7 @@ impl KubePrometheusConfig {
|
||||
windows_monitoring: false,
|
||||
alert_manager: true,
|
||||
grafana: true,
|
||||
node_exporter: false,
|
||||
node_exporter: true,
|
||||
prometheus: true,
|
||||
kubernetes_service_monitors: true,
|
||||
kubernetes_api_server: true,
|
||||
|
||||
@@ -12,8 +12,8 @@ use crate::modules::{
|
||||
helm::chart::HelmChartScore,
|
||||
monitoring::kube_prometheus::types::{
|
||||
AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerConfig,
|
||||
AlertManagerRoute, AlertManagerSpec, AlertManagerValues, ConfigReloader, Limits,
|
||||
PrometheusConfig, Requests, Resources,
|
||||
AlertManagerConfigSelector, AlertManagerRoute, AlertManagerSpec, AlertManagerValues,
|
||||
ConfigReloader, Limits, PrometheusConfig, Requests, Resources,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -35,7 +35,7 @@ pub fn kube_prometheus_helm_chart_score(
|
||||
let kube_proxy = config.kube_proxy.to_string();
|
||||
let kube_state_metrics = config.kube_state_metrics.to_string();
|
||||
let node_exporter = config.node_exporter.to_string();
|
||||
let prometheus_operator = config.prometheus_operator.to_string();
|
||||
let _prometheus_operator = config.prometheus_operator.to_string();
|
||||
let prometheus = config.prometheus.to_string();
|
||||
let resource_limit = Resources {
|
||||
limits: Limits {
|
||||
@@ -64,7 +64,7 @@ pub fn kube_prometheus_helm_chart_score(
|
||||
indent_lines(&yaml, indent_level + 2)
|
||||
)
|
||||
}
|
||||
let resource_section = resource_block(&resource_limit, 2);
|
||||
let _resource_section = resource_block(&resource_limit, 2);
|
||||
|
||||
let mut values = format!(
|
||||
r#"
|
||||
@@ -332,6 +332,11 @@ prometheusOperator:
|
||||
.push(receiver.channel_receiver.clone());
|
||||
}
|
||||
|
||||
let mut labels = BTreeMap::new();
|
||||
labels.insert("alertmanagerConfig".to_string(), "enabled".to_string());
|
||||
let alert_manager_config_selector = AlertManagerConfigSelector {
|
||||
match_labels: labels,
|
||||
};
|
||||
let alert_manager_values = AlertManagerValues {
|
||||
alertmanager: AlertManager {
|
||||
enabled: config.alert_manager,
|
||||
@@ -347,6 +352,8 @@ prometheusOperator:
|
||||
cpu: "100m".to_string(),
|
||||
},
|
||||
},
|
||||
alert_manager_config_selector,
|
||||
replicas: 2,
|
||||
},
|
||||
init_config_reloader: ConfigReloader {
|
||||
resources: Resources {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
pub mod crd;
|
||||
pub mod helm;
|
||||
pub mod helm_prometheus_alert_score;
|
||||
pub mod prometheus;
|
||||
|
||||
@@ -55,6 +55,12 @@ pub struct KubePrometheus {
|
||||
pub config: Arc<Mutex<KubePrometheusConfig>>,
|
||||
}
|
||||
|
||||
impl Default for KubePrometheus {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl KubePrometheus {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
@@ -113,8 +119,7 @@ impl KubePrometheus {
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
kube_prometheus_helm_chart_score(self.config.clone())
|
||||
.create_interpret()
|
||||
.execute(inventory, topology)
|
||||
.interpret(inventory, topology)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::Serialize;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_yaml::{Mapping, Sequence, Value};
|
||||
|
||||
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::AlertManagerRuleGroup;
|
||||
@@ -55,6 +56,14 @@ pub struct AlertManagerChannelConfig {
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct AlertManagerSpec {
|
||||
pub(crate) resources: Resources,
|
||||
pub replicas: u32,
|
||||
pub alert_manager_config_selector: AlertManagerConfigSelector,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct AlertManagerConfigSelector {
|
||||
pub match_labels: BTreeMap<String, String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
@@ -86,7 +95,7 @@ pub struct AlertGroup {
|
||||
pub groups: Vec<AlertManagerRuleGroup>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
|
||||
pub enum HTTPScheme {
|
||||
#[serde(rename = "http")]
|
||||
HTTP,
|
||||
@@ -94,7 +103,7 @@ pub enum HTTPScheme {
|
||||
HTTPS,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
|
||||
pub enum Operator {
|
||||
In,
|
||||
NotIn,
|
||||
@@ -139,74 +148,83 @@ pub struct ServiceMonitorTLSConfig {
|
||||
pub server_name: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ServiceMonitorEndpoint {
|
||||
// ## Name of the endpoint's service port
|
||||
// ## Mutually exclusive with targetPort
|
||||
/// Name of the service port this endpoint refers to.
|
||||
pub port: Option<String>,
|
||||
|
||||
// ## Name or number of the endpoint's target port
|
||||
// ## Mutually exclusive with port
|
||||
pub target_port: Option<String>,
|
||||
|
||||
// ## File containing bearer token to be used when scraping targets
|
||||
// ##
|
||||
pub bearer_token_file: Option<String>,
|
||||
|
||||
// ## Interval at which metrics should be scraped
|
||||
// ##
|
||||
/// Interval at which metrics should be scraped.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub interval: Option<String>,
|
||||
|
||||
// ## HTTP path to scrape for metrics
|
||||
// ##
|
||||
pub path: String,
|
||||
/// The HTTP path to scrape for metrics.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub path: Option<String>,
|
||||
|
||||
// ## HTTP scheme to use for scraping
|
||||
// ##
|
||||
pub scheme: HTTPScheme,
|
||||
/// HTTP scheme to use for scraping.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub scheme: Option<HTTPScheme>,
|
||||
|
||||
// ## TLS configuration to use when scraping the endpoint
|
||||
// ##
|
||||
pub tls_config: Option<ServiceMonitorTLSConfig>,
|
||||
/// Relabelings to apply to samples before scraping.
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub relabelings: Vec<RelabelConfig>,
|
||||
|
||||
// ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
|
||||
// ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
|
||||
// ##
|
||||
// # - action: keep
|
||||
// # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
|
||||
// # sourceLabels: [__name__]
|
||||
pub metric_relabelings: Vec<Mapping>,
|
||||
|
||||
// ## RelabelConfigs to apply to samples before scraping
|
||||
// ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig
|
||||
// ##
|
||||
// # - sourceLabels: [__meta_kubernetes_pod_node_name]
|
||||
// # separator: ;
|
||||
// # regex: ^(.*)$
|
||||
// # targetLabel: nodename
|
||||
// # replacement: $1
|
||||
// # action: replace
|
||||
pub relabelings: Vec<Mapping>,
|
||||
/// MetricRelabelings to apply to samples after scraping, but before ingestion.
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub metric_relabelings: Vec<RelabelConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct RelabelConfig {
|
||||
/// The action to perform based on the regex matching.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub action: Option<String>,
|
||||
|
||||
/// A list of labels from which to extract values.
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub source_labels: Vec<String>,
|
||||
|
||||
/// Separator to be used when concatenating source_labels.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub separator: Option<String>,
|
||||
|
||||
/// The label to which the resulting value is written.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub target_label: Option<String>,
|
||||
|
||||
/// A regular expression to match against the concatenated source label values.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub regex: Option<String>,
|
||||
|
||||
/// The replacement value to use.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub replacement: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct MatchExpression {
|
||||
pub key: String,
|
||||
pub operator: Operator,
|
||||
pub operator: Operator, // "In", "NotIn", "Exists", "DoesNotExist"
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub values: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Selector {
|
||||
// # label selector for services
|
||||
/// A map of key-value pairs to match.
|
||||
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
|
||||
pub match_labels: HashMap<String, String>,
|
||||
|
||||
/// A list of label selector requirements.
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub match_expressions: Vec<MatchExpression>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ServiceMonitor {
|
||||
pub name: String,
|
||||
@@ -250,10 +268,15 @@ pub struct ServiceMonitor {
|
||||
pub fallback_scrape_protocol: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct NamespaceSelector {
|
||||
/// Select all namespaces.
|
||||
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
|
||||
pub any: bool,
|
||||
|
||||
/// List of namespace names to select from.
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub match_names: Vec<String>,
|
||||
}
|
||||
|
||||
@@ -275,19 +298,3 @@ impl Default for ServiceMonitor {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ServiceMonitorEndpoint {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
port: Some("80".to_string()),
|
||||
target_port: Default::default(),
|
||||
bearer_token_file: Default::default(),
|
||||
interval: Default::default(),
|
||||
path: "/metrics".to_string(),
|
||||
scheme: HTTPScheme::HTTP,
|
||||
tls_config: Default::default(),
|
||||
metric_relabelings: Default::default(),
|
||||
relabelings: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,25 @@
|
||||
use non_blank_string_rs::NonBlankString;
|
||||
use std::str::FromStr;
|
||||
|
||||
use crate::modules::helm::chart::{HelmChartScore, HelmRepository};
|
||||
use crate::{modules::helm::chart::HelmChartScore, topology::DeploymentTarget};
|
||||
|
||||
pub fn ntfy_helm_chart_score(
|
||||
namespace: String,
|
||||
host: String,
|
||||
target: DeploymentTarget,
|
||||
) -> HelmChartScore {
|
||||
// TODO not actually the correct logic, this should be fixed by using an ingresss which is the
|
||||
// correct k8s standard.
|
||||
//
|
||||
// Another option is to delegate to the topology the ingress technology it wants to use Route,
|
||||
// Ingress or other
|
||||
let route_enabled = match target {
|
||||
DeploymentTarget::LocalDev => false,
|
||||
DeploymentTarget::Staging => true,
|
||||
DeploymentTarget::Production => true,
|
||||
};
|
||||
let ingress_enabled = !route_enabled;
|
||||
|
||||
pub fn ntfy_helm_chart_score(namespace: String, host: String) -> HelmChartScore {
|
||||
let values = format!(
|
||||
r#"
|
||||
replicaCount: 1
|
||||
@@ -25,23 +41,14 @@ serviceAccount:
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 80
|
||||
port: 8080
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
# annotations:
|
||||
# kubernetes.io/ingress.class: nginx
|
||||
# kubernetes.io/tls-acme: "true"
|
||||
hosts:
|
||||
- host: {host}
|
||||
paths:
|
||||
- path: /
|
||||
pathType: ImplementationSpecific
|
||||
tls: []
|
||||
# - secretName: chart-example-tls
|
||||
# hosts:
|
||||
# - chart-example.local
|
||||
enabled: {ingress_enabled}
|
||||
|
||||
route:
|
||||
enabled: {route_enabled}
|
||||
host: {host}
|
||||
|
||||
autoscaling:
|
||||
enabled: false
|
||||
@@ -49,7 +56,7 @@ autoscaling:
|
||||
config:
|
||||
enabled: true
|
||||
data:
|
||||
# base-url: "https://ntfy.something.com"
|
||||
base-url: "https://{host}"
|
||||
auth-file: "/var/cache/ntfy/user.db"
|
||||
auth-default-access: "deny-all"
|
||||
cache-file: "/var/cache/ntfy/cache.db"
|
||||
@@ -58,6 +65,8 @@ config:
|
||||
# web-root: "disable"
|
||||
enable-signup: false
|
||||
enable-login: "true"
|
||||
enable-metrics: "true"
|
||||
listen-http: ":8080"
|
||||
|
||||
persistence:
|
||||
enabled: true
|
||||
@@ -68,16 +77,12 @@ persistence:
|
||||
HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str(&namespace).unwrap()),
|
||||
release_name: NonBlankString::from_str("ntfy").unwrap(),
|
||||
chart_name: NonBlankString::from_str("sarab97/ntfy").unwrap(),
|
||||
chart_version: Some(NonBlankString::from_str("0.1.7").unwrap()),
|
||||
chart_name: NonBlankString::from_str("oci://hub.nationtech.io/harmony/ntfy").unwrap(),
|
||||
chart_version: Some(NonBlankString::from_str("0.1.7-nationtech.1").unwrap()),
|
||||
values_overrides: None,
|
||||
values_yaml: Some(values.to_string()),
|
||||
create_namespace: true,
|
||||
install_only: false,
|
||||
repository: Some(HelmRepository::new(
|
||||
"sarab97".to_string(),
|
||||
url::Url::parse("https://charts.sarabsingh.com").unwrap(),
|
||||
true,
|
||||
)),
|
||||
repository: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,2 +1,3 @@
|
||||
pub mod helm;
|
||||
#[allow(clippy::module_inception)]
|
||||
pub mod ntfy;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use log::debug;
|
||||
use log::info;
|
||||
use serde::Serialize;
|
||||
use strum::{Display, EnumString};
|
||||
|
||||
@@ -11,7 +11,7 @@ use crate::{
|
||||
inventory::Inventory,
|
||||
modules::monitoring::ntfy::helm::ntfy_helm_chart::ntfy_helm_chart_score,
|
||||
score::Score,
|
||||
topology::{HelmCommand, K8sclient, Topology, k8s::K8sClient},
|
||||
topology::{HelmCommand, K8sclient, MultiTargetTopology, Topology, k8s::K8sClient},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
@@ -20,7 +20,7 @@ pub struct NtfyScore {
|
||||
pub host: String,
|
||||
}
|
||||
|
||||
impl<T: Topology + HelmCommand + K8sclient> Score<T> for NtfyScore {
|
||||
impl<T: Topology + HelmCommand + K8sclient + MultiTargetTopology> Score<T> for NtfyScore {
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(NtfyInterpret {
|
||||
score: self.clone(),
|
||||
@@ -28,7 +28,7 @@ impl<T: Topology + HelmCommand + K8sclient> Score<T> for NtfyScore {
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
format!("Ntfy")
|
||||
"alert receiver [NtfyScore]".into()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39,31 +39,21 @@ pub struct NtfyInterpret {
|
||||
|
||||
#[derive(Debug, EnumString, Display)]
|
||||
enum NtfyAccessMode {
|
||||
#[strum(serialize = "read-write", serialize = "rw", to_string = "read-write")]
|
||||
#[strum(serialize = "read-write", serialize = "rw")]
|
||||
ReadWrite,
|
||||
#[strum(
|
||||
serialize = "read-only",
|
||||
serialize = "ro",
|
||||
serialize = "read",
|
||||
to_string = "read-only"
|
||||
)]
|
||||
#[strum(serialize = "read-only", serialize = "ro", serialize = "read")]
|
||||
ReadOnly,
|
||||
#[strum(
|
||||
serialize = "write-only",
|
||||
serialize = "wo",
|
||||
serialize = "write",
|
||||
to_string = "write-only"
|
||||
)]
|
||||
#[strum(serialize = "write-only", serialize = "wo", serialize = "write")]
|
||||
WriteOnly,
|
||||
#[strum(serialize = "none", to_string = "deny")]
|
||||
#[strum(serialize = "deny", serialize = "none")]
|
||||
Deny,
|
||||
}
|
||||
|
||||
#[derive(Debug, EnumString, Display)]
|
||||
enum NtfyRole {
|
||||
#[strum(serialize = "user", to_string = "user")]
|
||||
#[strum(serialize = "user")]
|
||||
User,
|
||||
#[strum(serialize = "admin", to_string = "admin")]
|
||||
#[strum(serialize = "admin")]
|
||||
Admin,
|
||||
}
|
||||
|
||||
@@ -87,7 +77,7 @@ impl NtfyInterpret {
|
||||
vec![
|
||||
"sh",
|
||||
"-c",
|
||||
format!("NTFY_PASSWORD={password} ntfy user add --role={role} {username}")
|
||||
format!("NTFY_PASSWORD={password} ntfy user add --role={role} --ignore-exists {username}")
|
||||
.as_str(),
|
||||
],
|
||||
)
|
||||
@@ -95,69 +85,52 @@ impl NtfyInterpret {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn set_access(
|
||||
&self,
|
||||
k8s_client: Arc<K8sClient>,
|
||||
username: &str,
|
||||
topic: &str,
|
||||
mode: NtfyAccessMode,
|
||||
) -> Result<(), String> {
|
||||
k8s_client
|
||||
.exec_app(
|
||||
"ntfy".to_string(),
|
||||
Some(&self.score.namespace),
|
||||
vec![
|
||||
"sh",
|
||||
"-c",
|
||||
format!("ntfy access {username} {topic} {mode}").as_str(),
|
||||
],
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// We need a ntfy interpret to wrap the HelmChartScore in order to run the score, and then bootstrap the config inside ntfy
|
||||
#[async_trait]
|
||||
impl<T: Topology + HelmCommand + K8sclient> Interpret<T> for NtfyInterpret {
|
||||
impl<T: Topology + HelmCommand + K8sclient + MultiTargetTopology> Interpret<T> for NtfyInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
ntfy_helm_chart_score(self.score.namespace.clone(), self.score.host.clone())
|
||||
.create_interpret()
|
||||
.execute(inventory, topology)
|
||||
ntfy_helm_chart_score(
|
||||
self.score.namespace.clone(),
|
||||
self.score.host.clone(),
|
||||
topology.current_target(),
|
||||
)
|
||||
.interpret(inventory, topology)
|
||||
.await?;
|
||||
|
||||
debug!("installed ntfy helm chart");
|
||||
info!("installed ntfy helm chart");
|
||||
let client = topology
|
||||
.k8s_client()
|
||||
.await
|
||||
.expect("couldn't get k8s client");
|
||||
|
||||
info!("deploying ntfy...");
|
||||
client
|
||||
.wait_until_deployment_ready(
|
||||
"ntfy".to_string(),
|
||||
Some(&self.score.namespace.as_str()),
|
||||
Some(self.score.namespace.as_str()),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
debug!("created k8s client");
|
||||
info!("ntfy deployed");
|
||||
|
||||
info!("adding user harmony");
|
||||
self.add_user(client, "harmony", "harmony", Some(NtfyRole::Admin))
|
||||
.await?;
|
||||
info!("user added");
|
||||
|
||||
debug!("exec into pod done");
|
||||
|
||||
Ok(Outcome::success("installed ntfy".to_string()))
|
||||
Ok(Outcome::success("Ntfy installed".to_string()))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
todo!()
|
||||
InterpretName::Ntfy
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
pub mod helm;
|
||||
#[allow(clippy::module_inception)]
|
||||
pub mod prometheus;
|
||||
pub mod prometheus_config;
|
||||
|
||||
@@ -37,6 +37,12 @@ impl AlertSender for Prometheus {
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Prometheus {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl Prometheus {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
@@ -94,8 +100,7 @@ impl Prometheus {
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
prometheus_helm_chart_score(self.config.clone())
|
||||
.create_interpret()
|
||||
.execute(inventory, topology)
|
||||
.interpret(inventory, topology)
|
||||
.await
|
||||
}
|
||||
pub async fn install_grafana<T: Topology + HelmCommand + Send + Sync>(
|
||||
@@ -110,13 +115,12 @@ impl Prometheus {
|
||||
|
||||
if let Some(ns) = namespace.as_deref() {
|
||||
grafana_helm_chart_score(ns)
|
||||
.create_interpret()
|
||||
.execute(inventory, topology)
|
||||
.interpret(inventory, topology)
|
||||
.await
|
||||
} else {
|
||||
Err(InterpretError::new(format!(
|
||||
"could not install grafana, missing namespace",
|
||||
)))
|
||||
Err(InterpretError::new(
|
||||
"could not install grafana, missing namespace".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,12 @@ pub struct PrometheusConfig {
|
||||
pub additional_service_monitors: Vec<ServiceMonitor>,
|
||||
}
|
||||
|
||||
impl Default for PrometheusConfig {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl PrometheusConfig {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
|
||||
@@ -32,7 +32,7 @@ impl OKDBootstrapDhcpScore {
|
||||
logical_host: topology.bootstrap_host.clone(),
|
||||
physical_host: inventory
|
||||
.worker_host
|
||||
.get(0)
|
||||
.first()
|
||||
.expect("Should have at least one worker to be used as bootstrap node")
|
||||
.clone(),
|
||||
});
|
||||
|
||||
@@ -6,6 +6,12 @@ pub struct OKDUpgradeScore {
|
||||
_target_version: Version,
|
||||
}
|
||||
|
||||
impl Default for OKDUpgradeScore {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl OKDUpgradeScore {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
|
||||
23
harmony/src/modules/prometheus/alerts/k8s/deployment.rs
Normal file
23
harmony/src/modules/prometheus/alerts/k8s/deployment.rs
Normal file
@@ -0,0 +1,23 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
|
||||
|
||||
pub fn alert_deployment_unavailable() -> PrometheusAlertRule {
|
||||
PrometheusAlertRule {
|
||||
alert: "DeploymentUnavailable".into(),
|
||||
expr: "kube_deployment_status_replicas_unavailable > 0".into(),
|
||||
r#for: Some("2m".into()),
|
||||
labels: HashMap::from([("severity".into(), "warning".into())]),
|
||||
annotations: HashMap::from([
|
||||
(
|
||||
"summary".into(),
|
||||
"Deployment has unavailable replicas".into(),
|
||||
),
|
||||
(
|
||||
"description".into(),
|
||||
"A deployment in this namespace has unavailable replicas for over 2 minutes."
|
||||
.into(),
|
||||
),
|
||||
]),
|
||||
}
|
||||
}
|
||||
37
harmony/src/modules/prometheus/alerts/k8s/memory_usage.rs
Normal file
37
harmony/src/modules/prometheus/alerts/k8s/memory_usage.rs
Normal file
@@ -0,0 +1,37 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
|
||||
|
||||
pub fn alert_high_memory_usage() -> PrometheusAlertRule {
|
||||
PrometheusAlertRule {
|
||||
alert: "HighMemoryUsage".into(),
|
||||
expr: "container_memory_working_set_bytes{container!=\"\",namespace!=\"\"} > 500000000"
|
||||
.into(),
|
||||
r#for: Some("2m".into()),
|
||||
labels: HashMap::from([("severity".into(), "warning".into())]),
|
||||
annotations: HashMap::from([
|
||||
("summary".into(), "Pod is using high memory".into()),
|
||||
(
|
||||
"description".into(),
|
||||
"A pod is consuming more than 500Mi of memory.".into(),
|
||||
),
|
||||
]),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn alert_high_cpu_usage() -> PrometheusAlertRule {
|
||||
PrometheusAlertRule {
|
||||
alert: "HighCPUUsage".into(),
|
||||
expr: "rate(container_cpu_usage_seconds_total{container!=\"\",namespace!=\"\"}[1m]) > 0.9"
|
||||
.into(),
|
||||
r#for: Some("1m".into()),
|
||||
labels: HashMap::from([("severity".into(), "warning".into())]),
|
||||
annotations: HashMap::from([
|
||||
("summary".into(), "Pod is using high CPU".into()),
|
||||
(
|
||||
"description".into(),
|
||||
"A pod is using more than 90% of a core over 1 minute.".into(),
|
||||
),
|
||||
]),
|
||||
}
|
||||
}
|
||||
@@ -1 +1,5 @@
|
||||
pub mod deployment;
|
||||
pub mod memory_usage;
|
||||
pub mod pod;
|
||||
pub mod pvc;
|
||||
pub mod service;
|
||||
|
||||
55
harmony/src/modules/prometheus/alerts/k8s/pod.rs
Normal file
55
harmony/src/modules/prometheus/alerts/k8s/pod.rs
Normal file
@@ -0,0 +1,55 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
|
||||
|
||||
pub fn pod_failed() -> PrometheusAlertRule {
|
||||
PrometheusAlertRule {
|
||||
alert: "PodFailed".into(),
|
||||
expr: "kube_pod_status_phase{phase=\"Failed\"} > 2".into(),
|
||||
r#for: Some("2m".into()),
|
||||
labels: HashMap::from([("severity".into(), "critical".into())]),
|
||||
annotations: HashMap::from([
|
||||
("summary".into(), "A pod has failed".into()),
|
||||
(
|
||||
"description".into(),
|
||||
"One or more pods are in Failed phase.".into(),
|
||||
),
|
||||
]),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn alert_container_restarting() -> PrometheusAlertRule {
|
||||
PrometheusAlertRule {
|
||||
alert: "ContainerRestarting".into(),
|
||||
expr: "increase(kube_pod_container_status_restarts_total[5m]) > 3".into(),
|
||||
r#for: Some("5m".into()),
|
||||
labels: HashMap::from([("severity".into(), "warning".into())]),
|
||||
annotations: HashMap::from([
|
||||
(
|
||||
"summary".into(),
|
||||
"Container is restarting frequently".into(),
|
||||
),
|
||||
(
|
||||
"description".into(),
|
||||
"A container in this namespace has restarted more than 3 times in 5 minutes."
|
||||
.into(),
|
||||
),
|
||||
]),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn alert_pod_not_ready() -> PrometheusAlertRule {
|
||||
PrometheusAlertRule {
|
||||
alert: "PodNotReady".into(),
|
||||
expr: "kube_pod_status_ready{condition=\"true\"} == 0".into(),
|
||||
r#for: Some("2m".into()),
|
||||
labels: HashMap::from([("severity".into(), "warning".into())]),
|
||||
annotations: HashMap::from([
|
||||
("summary".into(), "Pod is not ready".into()),
|
||||
(
|
||||
"description".into(),
|
||||
"A pod in the namespace is not reporting Ready status.".into(),
|
||||
),
|
||||
]),
|
||||
}
|
||||
}
|
||||
19
harmony/src/modules/prometheus/alerts/k8s/service.rs
Normal file
19
harmony/src/modules/prometheus/alerts/k8s/service.rs
Normal file
@@ -0,0 +1,19 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
|
||||
|
||||
pub fn alert_service_down() -> PrometheusAlertRule {
|
||||
PrometheusAlertRule {
|
||||
alert: "ServiceDown".into(),
|
||||
expr: "up == 0".into(),
|
||||
r#for: Some("1m".into()),
|
||||
labels: HashMap::from([("severity".into(), "critical".into())]),
|
||||
annotations: HashMap::from([
|
||||
("summary".into(), "Service is down".into()),
|
||||
(
|
||||
"description".into(),
|
||||
"A target service in the namespace is not responding to Prometheus scrapes.".into(),
|
||||
),
|
||||
]),
|
||||
}
|
||||
}
|
||||
570
harmony/src/modules/prometheus/k8s_prometheus_alerting_score.rs
Normal file
570
harmony/src/modules/prometheus/k8s_prometheus_alerting_score.rs
Normal file
@@ -0,0 +1,570 @@
|
||||
use std::fs;
|
||||
use std::{collections::BTreeMap, sync::Arc};
|
||||
use tempfile::tempdir;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use kube::api::ObjectMeta;
|
||||
use log::{debug, info};
|
||||
use serde::Serialize;
|
||||
use std::process::Command;
|
||||
|
||||
use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::CRDPrometheus;
|
||||
use crate::modules::monitoring::kube_prometheus::crd::crd_default_rules::build_default_application_rules;
|
||||
use crate::modules::monitoring::kube_prometheus::crd::crd_grafana::{
|
||||
Grafana, GrafanaDashboard, GrafanaDashboardSpec, GrafanaDatasource, GrafanaDatasourceConfig,
|
||||
GrafanaDatasourceSpec, GrafanaSpec,
|
||||
};
|
||||
use crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::{
|
||||
PrometheusRule, PrometheusRuleSpec, RuleGroup,
|
||||
};
|
||||
use crate::modules::monitoring::kube_prometheus::crd::grafana_default_dashboard::build_default_dashboard;
|
||||
use crate::modules::monitoring::kube_prometheus::crd::service_monitor::{
|
||||
ServiceMonitor, ServiceMonitorSpec,
|
||||
};
|
||||
use crate::topology::oberservability::monitoring::AlertReceiver;
|
||||
use crate::topology::{K8sclient, Topology, k8s::K8sClient};
|
||||
use crate::{
|
||||
data::{Id, Version},
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::monitoring::kube_prometheus::crd::{
|
||||
crd_alertmanagers::{Alertmanager, AlertmanagerSpec},
|
||||
crd_prometheuses::{
|
||||
AlertmanagerEndpoints, LabelSelector, Prometheus, PrometheusSpec,
|
||||
PrometheusSpecAlerting,
|
||||
},
|
||||
role::{build_prom_role, build_prom_rolebinding, build_prom_service_account},
|
||||
},
|
||||
score::Score,
|
||||
};
|
||||
|
||||
use super::prometheus::PrometheusApplicationMonitoring;
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct K8sPrometheusCRDAlertingScore {
|
||||
pub sender: CRDPrometheus,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<CRDPrometheus>>>,
|
||||
pub service_monitors: Vec<ServiceMonitor>,
|
||||
pub prometheus_rules: Vec<RuleGroup>,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient + PrometheusApplicationMonitoring<CRDPrometheus>> Score<T>
|
||||
for K8sPrometheusCRDAlertingScore
|
||||
{
|
||||
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
|
||||
Box::new(K8sPrometheusCRDAlertingInterpret {
|
||||
sender: self.sender.clone(),
|
||||
receivers: self.receivers.clone(),
|
||||
service_monitors: self.service_monitors.clone(),
|
||||
prometheus_rules: self.prometheus_rules.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"prometheus alerting [CRDAlertingScore]".into()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct K8sPrometheusCRDAlertingInterpret {
|
||||
pub sender: CRDPrometheus,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<CRDPrometheus>>>,
|
||||
pub service_monitors: Vec<ServiceMonitor>,
|
||||
pub prometheus_rules: Vec<RuleGroup>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient + PrometheusApplicationMonitoring<CRDPrometheus>> Interpret<T>
|
||||
for K8sPrometheusCRDAlertingInterpret
|
||||
{
|
||||
async fn execute(
|
||||
&self,
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let client = topology.k8s_client().await.unwrap();
|
||||
self.ensure_grafana_operator().await?;
|
||||
self.install_prometheus(&client).await?;
|
||||
self.install_alert_manager(&client).await?;
|
||||
self.install_client_kube_metrics().await?;
|
||||
self.install_grafana(&client).await?;
|
||||
self.install_receivers(&self.sender, &self.receivers)
|
||||
.await?;
|
||||
self.install_rules(&self.prometheus_rules, &client).await?;
|
||||
self.install_monitors(self.service_monitors.clone(), &client)
|
||||
.await?;
|
||||
Ok(Outcome::success(
|
||||
"K8s monitoring components installed".to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::K8sPrometheusCrdAlerting
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl K8sPrometheusCRDAlertingInterpret {
|
||||
async fn crd_exists(&self, crd: &str) -> bool {
|
||||
let status = Command::new("sh")
|
||||
.args(["-c", &format!("kubectl get crd -A | grep -i {crd}")])
|
||||
.status()
|
||||
.map_err(|e| InterpretError::new(format!("could not connect to cluster: {}", e)))
|
||||
.unwrap();
|
||||
|
||||
status.success()
|
||||
}
|
||||
|
||||
async fn install_chart(
|
||||
&self,
|
||||
chart_path: String,
|
||||
chart_name: String,
|
||||
) -> Result<(), InterpretError> {
|
||||
let temp_dir =
|
||||
tempdir().map_err(|e| InterpretError::new(format!("Tempdir error: {}", e)))?;
|
||||
let temp_path = temp_dir.path().to_path_buf();
|
||||
debug!("Using temp directory: {}", temp_path.display());
|
||||
let chart = format!("{}/{}", chart_path, chart_name);
|
||||
let pull_output = Command::new("helm")
|
||||
.args(["pull", &chart, "--destination", temp_path.to_str().unwrap()])
|
||||
.output()
|
||||
.map_err(|e| InterpretError::new(format!("Helm pull error: {}", e)))?;
|
||||
|
||||
if !pull_output.status.success() {
|
||||
return Err(InterpretError::new(format!(
|
||||
"Helm pull failed: {}",
|
||||
String::from_utf8_lossy(&pull_output.stderr)
|
||||
)));
|
||||
}
|
||||
|
||||
let tgz_path = fs::read_dir(&temp_path)
|
||||
.unwrap()
|
||||
.filter_map(|entry| {
|
||||
let entry = entry.ok()?;
|
||||
let path = entry.path();
|
||||
if path.extension()? == "tgz" {
|
||||
Some(path)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.next()
|
||||
.ok_or_else(|| InterpretError::new("Could not find pulled Helm chart".into()))?;
|
||||
|
||||
debug!("Installing chart from: {}", tgz_path.display());
|
||||
|
||||
let install_output = Command::new("helm")
|
||||
.args([
|
||||
"upgrade",
|
||||
"--install",
|
||||
&chart_name,
|
||||
tgz_path.to_str().unwrap(),
|
||||
"--namespace",
|
||||
&self.sender.namespace.clone(),
|
||||
"--create-namespace",
|
||||
"--wait",
|
||||
"--atomic",
|
||||
])
|
||||
.output()
|
||||
.map_err(|e| InterpretError::new(format!("Helm install error: {}", e)))?;
|
||||
|
||||
if !install_output.status.success() {
|
||||
return Err(InterpretError::new(format!(
|
||||
"Helm install failed: {}",
|
||||
String::from_utf8_lossy(&install_output.stderr)
|
||||
)));
|
||||
}
|
||||
|
||||
debug!(
|
||||
"Installed chart {}/{} in namespace: {}",
|
||||
&chart_path,
|
||||
&chart_name,
|
||||
self.sender.namespace.clone()
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn ensure_grafana_operator(&self) -> Result<Outcome, InterpretError> {
|
||||
if self.crd_exists("grafanas.grafana.integreatly.org").await {
|
||||
debug!("grafana CRDs already exist — skipping install.");
|
||||
return Ok(Outcome::success("Grafana CRDs already exist".to_string()));
|
||||
}
|
||||
|
||||
let _ = Command::new("helm")
|
||||
.args([
|
||||
"repo",
|
||||
"add",
|
||||
"grafana-operator",
|
||||
"https://grafana.github.io/helm-charts",
|
||||
])
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
let _ = Command::new("helm")
|
||||
.args(["repo", "update"])
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
let output = Command::new("helm")
|
||||
.args([
|
||||
"install",
|
||||
"grafana-operator",
|
||||
"grafana-operator/grafana-operator",
|
||||
"--namespace",
|
||||
&self.sender.namespace.clone(),
|
||||
"--create-namespace",
|
||||
"--set",
|
||||
"namespaceScope=true",
|
||||
])
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
if !output.status.success() {
|
||||
return Err(InterpretError::new(format!(
|
||||
"helm install failed:\nstdout: {}\nstderr: {}",
|
||||
String::from_utf8_lossy(&output.stdout),
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(Outcome::success(format!(
|
||||
"installed grafana operator in ns {}",
|
||||
self.sender.namespace.clone()
|
||||
)))
|
||||
}
|
||||
|
||||
async fn install_prometheus(&self, client: &Arc<K8sClient>) -> Result<Outcome, InterpretError> {
|
||||
debug!(
|
||||
"installing crd-prometheuses in namespace {}",
|
||||
self.sender.namespace.clone()
|
||||
);
|
||||
debug!("building role/rolebinding/serviceaccount for crd-prometheus");
|
||||
let rolename = format!("{}-prom", self.sender.namespace.clone());
|
||||
let sa_name = format!("{}-prom-sa", self.sender.namespace.clone());
|
||||
let role = build_prom_role(rolename.clone(), self.sender.namespace.clone());
|
||||
let rolebinding = build_prom_rolebinding(
|
||||
rolename.clone(),
|
||||
self.sender.namespace.clone(),
|
||||
sa_name.clone(),
|
||||
);
|
||||
let sa = build_prom_service_account(sa_name.clone(), self.sender.namespace.clone());
|
||||
let prom_spec = PrometheusSpec {
|
||||
alerting: Some(PrometheusSpecAlerting {
|
||||
alertmanagers: Some(vec![AlertmanagerEndpoints {
|
||||
name: Some("alertmanager-operated".into()),
|
||||
namespace: Some(self.sender.namespace.clone()),
|
||||
port: Some("web".into()),
|
||||
scheme: Some("http".into()),
|
||||
}]),
|
||||
}),
|
||||
service_account_name: sa_name.clone(),
|
||||
service_monitor_namespace_selector: Some(LabelSelector {
|
||||
match_labels: BTreeMap::from([(
|
||||
"kubernetes.io/metadata.name".to_string(),
|
||||
self.sender.namespace.clone(),
|
||||
)]),
|
||||
match_expressions: vec![],
|
||||
}),
|
||||
service_monitor_selector: Some(LabelSelector {
|
||||
match_labels: BTreeMap::from([("client".to_string(), "prometheus".to_string())]),
|
||||
..Default::default()
|
||||
}),
|
||||
|
||||
service_discovery_role: Some("Endpoints".into()),
|
||||
|
||||
pod_monitor_selector: Some(LabelSelector {
|
||||
match_labels: BTreeMap::from([("client".to_string(), "prometheus".to_string())]),
|
||||
..Default::default()
|
||||
}),
|
||||
|
||||
rule_selector: Some(LabelSelector {
|
||||
match_labels: BTreeMap::from([("role".to_string(), "prometheus-rule".to_string())]),
|
||||
..Default::default()
|
||||
}),
|
||||
|
||||
rule_namespace_selector: Some(LabelSelector {
|
||||
match_labels: BTreeMap::from([(
|
||||
"kubernetes.io/metadata.name".to_string(),
|
||||
self.sender.namespace.clone(),
|
||||
)]),
|
||||
match_expressions: vec![],
|
||||
}),
|
||||
};
|
||||
let prom = Prometheus {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.sender.namespace.clone()),
|
||||
labels: Some(std::collections::BTreeMap::from([
|
||||
("alertmanagerConfig".to_string(), "enabled".to_string()),
|
||||
("client".to_string(), "prometheus".to_string()),
|
||||
])),
|
||||
namespace: Some(self.sender.namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: prom_spec,
|
||||
};
|
||||
client
|
||||
.apply(&role, Some(&self.sender.namespace.clone()))
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
info!(
|
||||
"installed prometheus role: {:#?} in ns {:#?}",
|
||||
role.metadata.name.unwrap(),
|
||||
role.metadata.namespace.unwrap()
|
||||
);
|
||||
client
|
||||
.apply(&rolebinding, Some(&self.sender.namespace.clone()))
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
info!(
|
||||
"installed prometheus rolebinding: {:#?} in ns {:#?}",
|
||||
rolebinding.metadata.name.unwrap(),
|
||||
rolebinding.metadata.namespace.unwrap()
|
||||
);
|
||||
client
|
||||
.apply(&sa, Some(&self.sender.namespace.clone()))
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
info!(
|
||||
"installed prometheus service account: {:#?} in ns {:#?}",
|
||||
sa.metadata.name.unwrap(),
|
||||
sa.metadata.namespace.unwrap()
|
||||
);
|
||||
client
|
||||
.apply(&prom, Some(&self.sender.namespace.clone()))
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
info!(
|
||||
"installed prometheus: {:#?} in ns {:#?}",
|
||||
&prom.metadata.name.clone().unwrap(),
|
||||
&prom.metadata.namespace.clone().unwrap()
|
||||
);
|
||||
|
||||
Ok(Outcome::success(format!(
|
||||
"successfully deployed crd-prometheus {:#?}",
|
||||
prom
|
||||
)))
|
||||
}
|
||||
|
||||
async fn install_alert_manager(
|
||||
&self,
|
||||
client: &Arc<K8sClient>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let am = Alertmanager {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.sender.namespace.clone()),
|
||||
labels: Some(std::collections::BTreeMap::from([(
|
||||
"alertmanagerConfig".to_string(),
|
||||
"enabled".to_string(),
|
||||
)])),
|
||||
namespace: Some(self.sender.namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: AlertmanagerSpec::default(),
|
||||
};
|
||||
client
|
||||
.apply(&am, Some(&self.sender.namespace.clone()))
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
Ok(Outcome::success(format!(
|
||||
"successfully deployed service monitor {:#?}",
|
||||
am.metadata.name
|
||||
)))
|
||||
}
|
||||
async fn install_monitors(
|
||||
&self,
|
||||
mut monitors: Vec<ServiceMonitor>,
|
||||
client: &Arc<K8sClient>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let default_service_monitor = ServiceMonitor {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.sender.namespace.clone()),
|
||||
labels: Some(std::collections::BTreeMap::from([
|
||||
("alertmanagerConfig".to_string(), "enabled".to_string()),
|
||||
("client".to_string(), "prometheus".to_string()),
|
||||
(
|
||||
"app.kubernetes.io/name".to_string(),
|
||||
"kube-state-metrics".to_string(),
|
||||
),
|
||||
])),
|
||||
namespace: Some(self.sender.namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: ServiceMonitorSpec::default(),
|
||||
};
|
||||
monitors.push(default_service_monitor);
|
||||
for monitor in monitors.iter() {
|
||||
client
|
||||
.apply(monitor, Some(&self.sender.namespace.clone()))
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
}
|
||||
Ok(Outcome::success(
|
||||
"succesfully deployed service monitors".to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
async fn install_rules(
|
||||
&self,
|
||||
#[allow(clippy::ptr_arg)] rules: &Vec<RuleGroup>,
|
||||
client: &Arc<K8sClient>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let mut prom_rule_spec = PrometheusRuleSpec {
|
||||
groups: rules.clone(),
|
||||
};
|
||||
|
||||
let default_rules_group = RuleGroup {
|
||||
name: "default-rules".to_string(),
|
||||
rules: build_default_application_rules(),
|
||||
};
|
||||
|
||||
prom_rule_spec.groups.push(default_rules_group);
|
||||
let prom_rules = PrometheusRule {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.sender.namespace.clone()),
|
||||
labels: Some(std::collections::BTreeMap::from([
|
||||
("alertmanagerConfig".to_string(), "enabled".to_string()),
|
||||
("role".to_string(), "prometheus-rule".to_string()),
|
||||
])),
|
||||
namespace: Some(self.sender.namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: prom_rule_spec,
|
||||
};
|
||||
client
|
||||
.apply(&prom_rules, Some(&self.sender.namespace.clone()))
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
Ok(Outcome::success(format!(
|
||||
"successfully deployed rules {:#?}",
|
||||
prom_rules.metadata.name
|
||||
)))
|
||||
}
|
||||
|
||||
async fn install_client_kube_metrics(&self) -> Result<Outcome, InterpretError> {
|
||||
self.install_chart(
|
||||
"oci://hub.nationtech.io/harmony".to_string(),
|
||||
"nt-kube-metrics".to_string(),
|
||||
)
|
||||
.await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"Installed client kube metrics in ns {}",
|
||||
&self.sender.namespace.clone()
|
||||
)))
|
||||
}
|
||||
|
||||
async fn install_grafana(&self, client: &Arc<K8sClient>) -> Result<Outcome, InterpretError> {
|
||||
let mut label = BTreeMap::new();
|
||||
label.insert("dashboards".to_string(), "grafana".to_string());
|
||||
let labels = LabelSelector {
|
||||
match_labels: label.clone(),
|
||||
match_expressions: vec![],
|
||||
};
|
||||
let mut json_data = BTreeMap::new();
|
||||
json_data.insert("timeInterval".to_string(), "5s".to_string());
|
||||
let namespace = self.sender.namespace.clone();
|
||||
|
||||
let json = build_default_dashboard(&namespace);
|
||||
|
||||
let graf_data_source = GrafanaDatasource {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(format!(
|
||||
"grafana-datasource-{}",
|
||||
self.sender.namespace.clone()
|
||||
)),
|
||||
namespace: Some(self.sender.namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: GrafanaDatasourceSpec {
|
||||
instance_selector: labels.clone(),
|
||||
allow_cross_namespace_import: Some(false),
|
||||
datasource: GrafanaDatasourceConfig {
|
||||
access: "proxy".to_string(),
|
||||
database: Some("prometheus".to_string()),
|
||||
json_data: Some(json_data),
|
||||
//this is fragile
|
||||
name: format!("prometheus-{}-0", self.sender.namespace.clone()),
|
||||
r#type: "prometheus".to_string(),
|
||||
url: format!(
|
||||
"http://prometheus-operated.{}.svc.cluster.local:9090",
|
||||
self.sender.namespace.clone()
|
||||
),
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
client
|
||||
.apply(&graf_data_source, Some(&self.sender.namespace.clone()))
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
|
||||
let graf_dashboard = GrafanaDashboard {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(format!(
|
||||
"grafana-dashboard-{}",
|
||||
self.sender.namespace.clone()
|
||||
)),
|
||||
namespace: Some(self.sender.namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: GrafanaDashboardSpec {
|
||||
resync_period: Some("30s".to_string()),
|
||||
instance_selector: labels.clone(),
|
||||
json,
|
||||
},
|
||||
};
|
||||
|
||||
client
|
||||
.apply(&graf_dashboard, Some(&self.sender.namespace.clone()))
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
|
||||
let grafana = Grafana {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(format!("grafana-{}", self.sender.namespace.clone())),
|
||||
namespace: Some(self.sender.namespace.clone()),
|
||||
labels: Some(label.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: GrafanaSpec {
|
||||
config: None,
|
||||
admin_user: None,
|
||||
admin_password: None,
|
||||
ingress: None,
|
||||
persistence: None,
|
||||
resources: None,
|
||||
},
|
||||
};
|
||||
client
|
||||
.apply(&grafana, Some(&self.sender.namespace.clone()))
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
Ok(Outcome::success(format!(
|
||||
"successfully deployed grafana instance {:#?}",
|
||||
grafana.metadata.name
|
||||
)))
|
||||
}
|
||||
|
||||
async fn install_receivers(
|
||||
&self,
|
||||
sender: &CRDPrometheus,
|
||||
receivers: &Vec<Box<dyn AlertReceiver<CRDPrometheus>>>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
for receiver in receivers.iter() {
|
||||
receiver.install(sender).await.map_err(|err| {
|
||||
InterpretError::new(format!("failed to install receiver: {}", err))
|
||||
})?;
|
||||
}
|
||||
Ok(Outcome::success("successfully deployed receivers".into()))
|
||||
}
|
||||
}
|
||||
@@ -1 +1,4 @@
|
||||
pub mod alerts;
|
||||
pub mod k8s_prometheus_alerting_score;
|
||||
#[allow(clippy::module_inception)]
|
||||
pub mod prometheus;
|
||||
|
||||
19
harmony/src/modules/prometheus/prometheus.rs
Normal file
19
harmony/src/modules/prometheus/prometheus.rs
Normal file
@@ -0,0 +1,19 @@
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::{
|
||||
inventory::Inventory,
|
||||
topology::{
|
||||
PreparationError, PreparationOutcome,
|
||||
oberservability::monitoring::{AlertReceiver, AlertSender},
|
||||
},
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
pub trait PrometheusApplicationMonitoring<S: AlertSender> {
|
||||
async fn install_prometheus(
|
||||
&self,
|
||||
sender: &S,
|
||||
inventory: &Inventory,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<S>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError>;
|
||||
}
|
||||
419
harmony/src/modules/storage/ceph/ceph_remove_osd_score.rs
Normal file
419
harmony/src/modules/storage/ceph/ceph_remove_osd_score.rs
Normal file
@@ -0,0 +1,419 @@
|
||||
use std::{
|
||||
process::Command,
|
||||
sync::Arc,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use log::{info, warn};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::time::sleep;
|
||||
|
||||
use crate::{
|
||||
data::{Id, Version},
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, k8s::K8sClient},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct CephRemoveOsd {
|
||||
pub osd_deployment_name: String,
|
||||
pub rook_ceph_namespace: String,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for CephRemoveOsd {
|
||||
fn name(&self) -> String {
|
||||
format!("CephRemoveOsdScore")
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(CephRemoveOsdInterpret {
|
||||
score: self.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CephRemoveOsdInterpret {
|
||||
score: CephRemoveOsd,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient> Interpret<T> for CephRemoveOsdInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let client = topology.k8s_client().await.unwrap();
|
||||
self.verify_ceph_toolbox_exists(client.clone()).await?;
|
||||
self.scale_deployment(client.clone()).await?;
|
||||
self.verify_deployment_scaled(client.clone()).await?;
|
||||
self.delete_deployment(client.clone()).await?;
|
||||
self.verify_deployment_deleted(client.clone()).await?;
|
||||
let osd_id_full = self.get_ceph_osd_id().unwrap();
|
||||
self.purge_ceph_osd(client.clone(), &osd_id_full).await?;
|
||||
self.verify_ceph_osd_removal(client.clone(), &osd_id_full)
|
||||
.await?;
|
||||
|
||||
Ok(Outcome::success(format!(
|
||||
"Successfully removed OSD {} from rook-ceph cluster by deleting deployment {}",
|
||||
osd_id_full, self.score.osd_deployment_name
|
||||
)))
|
||||
}
|
||||
fn get_name(&self) -> InterpretName {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl CephRemoveOsdInterpret {
|
||||
pub fn get_ceph_osd_id(&self) -> Result<String, InterpretError> {
|
||||
let osd_id_numeric = self
|
||||
.score
|
||||
.osd_deployment_name
|
||||
.split('-')
|
||||
.nth(3)
|
||||
.ok_or_else(|| {
|
||||
InterpretError::new(format!(
|
||||
"Could not parse OSD id from deployment name {}",
|
||||
self.score.osd_deployment_name
|
||||
))
|
||||
})?;
|
||||
let osd_id_full = format!("osd.{}", osd_id_numeric);
|
||||
|
||||
info!(
|
||||
"Targeting Ceph OSD: {} (parsed from deployment {})",
|
||||
osd_id_full, self.score.osd_deployment_name
|
||||
);
|
||||
|
||||
Ok(osd_id_full)
|
||||
}
|
||||
|
||||
pub async fn verify_ceph_toolbox_exists(
|
||||
&self,
|
||||
client: Arc<K8sClient>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let toolbox_dep = "rook-ceph-tools".to_string();
|
||||
|
||||
match client
|
||||
.get_deployment(&toolbox_dep, Some(&self.score.rook_ceph_namespace))
|
||||
.await
|
||||
{
|
||||
Ok(Some(deployment)) => {
|
||||
if let Some(status) = deployment.status {
|
||||
let ready_count = status.ready_replicas.unwrap_or(0);
|
||||
if ready_count >= 1 {
|
||||
return Ok(Outcome::success(format!(
|
||||
"'{}' is ready with {} replica(s).",
|
||||
&toolbox_dep, ready_count
|
||||
)));
|
||||
} else {
|
||||
return Err(InterpretError::new(
|
||||
"ceph-tool-box not ready in cluster".to_string(),
|
||||
));
|
||||
}
|
||||
} else {
|
||||
Err(InterpretError::new(format!(
|
||||
"failed to get deployment status {}",
|
||||
&toolbox_dep
|
||||
)))
|
||||
}
|
||||
}
|
||||
Ok(None) => Err(InterpretError::new(format!(
|
||||
"Deployment '{}' not found in namespace '{}'.",
|
||||
&toolbox_dep, self.score.rook_ceph_namespace
|
||||
))),
|
||||
Err(e) => Err(InterpretError::new(format!(
|
||||
"Failed to query for deployment '{}': {}",
|
||||
&toolbox_dep, e
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn scale_deployment(
|
||||
&self,
|
||||
client: Arc<K8sClient>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
info!(
|
||||
"Scaling down OSD deployment: {}",
|
||||
self.score.osd_deployment_name
|
||||
);
|
||||
client
|
||||
.scale_deployment(
|
||||
&self.score.osd_deployment_name,
|
||||
Some(&self.score.rook_ceph_namespace),
|
||||
0,
|
||||
)
|
||||
.await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"Scaled down deployment {}",
|
||||
self.score.osd_deployment_name
|
||||
)))
|
||||
}
|
||||
|
||||
pub async fn verify_deployment_scaled(
|
||||
&self,
|
||||
client: Arc<K8sClient>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let (timeout, interval, start) = self.build_timer();
|
||||
|
||||
info!("Waiting for OSD deployment to scale down to 0 replicas");
|
||||
loop {
|
||||
let dep = client
|
||||
.get_deployment(
|
||||
&self.score.osd_deployment_name,
|
||||
Some(&self.score.rook_ceph_namespace),
|
||||
)
|
||||
.await?;
|
||||
|
||||
if let Some(deployment) = dep {
|
||||
if let Some(status) = deployment.status {
|
||||
if status.replicas.unwrap_or(1) == 0 && status.ready_replicas.unwrap_or(1) == 0
|
||||
{
|
||||
return Ok(Outcome::success(
|
||||
"Deployment successfully scaled down.".to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if start.elapsed() > timeout {
|
||||
return Err(InterpretError::new(format!(
|
||||
"Timed out waiting for deployment {} to scale down",
|
||||
self.score.osd_deployment_name
|
||||
)));
|
||||
}
|
||||
sleep(interval).await;
|
||||
}
|
||||
}
|
||||
|
||||
fn build_timer(&self) -> (Duration, Duration, Instant) {
|
||||
let timeout = Duration::from_secs(120);
|
||||
let interval = Duration::from_secs(5);
|
||||
let start = Instant::now();
|
||||
(timeout, interval, start)
|
||||
}
|
||||
pub async fn delete_deployment(
|
||||
&self,
|
||||
client: Arc<K8sClient>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
info!(
|
||||
"Deleting OSD deployment: {}",
|
||||
self.score.osd_deployment_name
|
||||
);
|
||||
client
|
||||
.delete_deployment(
|
||||
&self.score.osd_deployment_name,
|
||||
Some(&self.score.rook_ceph_namespace),
|
||||
)
|
||||
.await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"deployment {} deleted",
|
||||
self.score.osd_deployment_name
|
||||
)))
|
||||
}
|
||||
|
||||
pub async fn verify_deployment_deleted(
|
||||
&self,
|
||||
client: Arc<K8sClient>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let (timeout, interval, start) = self.build_timer();
|
||||
|
||||
info!("Waiting for OSD deployment to scale down to 0 replicas");
|
||||
loop {
|
||||
let dep = client
|
||||
.get_deployment(
|
||||
&self.score.osd_deployment_name,
|
||||
Some(&self.score.rook_ceph_namespace),
|
||||
)
|
||||
.await?;
|
||||
|
||||
if dep.is_none() {
|
||||
info!(
|
||||
"Deployment {} successfully deleted.",
|
||||
self.score.osd_deployment_name
|
||||
);
|
||||
return Ok(Outcome::success(format!(
|
||||
"Deployment {} deleted.",
|
||||
self.score.osd_deployment_name
|
||||
)));
|
||||
}
|
||||
|
||||
if start.elapsed() > timeout {
|
||||
return Err(InterpretError::new(format!(
|
||||
"Timed out waiting for deployment {} to be deleted",
|
||||
self.score.osd_deployment_name
|
||||
)));
|
||||
}
|
||||
sleep(interval).await;
|
||||
}
|
||||
}
|
||||
|
||||
fn get_osd_tree(&self, json: serde_json::Value) -> Result<CephOsdTree, InterpretError> {
|
||||
let nodes = json.get("nodes").ok_or_else(|| {
|
||||
InterpretError::new("Missing 'nodes' field in ceph osd tree JSON".to_string())
|
||||
})?;
|
||||
let tree: CephOsdTree = CephOsdTree {
|
||||
nodes: serde_json::from_value(nodes.clone()).map_err(|e| {
|
||||
InterpretError::new(format!("Failed to parse ceph osd tree JSON: {}", e))
|
||||
})?,
|
||||
};
|
||||
Ok(tree)
|
||||
}
|
||||
|
||||
pub async fn purge_ceph_osd(
|
||||
&self,
|
||||
client: Arc<K8sClient>,
|
||||
osd_id_full: &str,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
info!(
|
||||
"Purging OSD {} from Ceph cluster and removing its auth key",
|
||||
osd_id_full
|
||||
);
|
||||
client
|
||||
.exec_app_capture_output(
|
||||
"rook-ceph-tools".to_string(),
|
||||
"app".to_string(),
|
||||
Some(&self.score.rook_ceph_namespace),
|
||||
vec![
|
||||
format!("ceph osd purge {osd_id_full} --yes-i-really-mean-it").as_str(),
|
||||
format!("ceph auth del osd.{osd_id_full}").as_str(),
|
||||
],
|
||||
)
|
||||
.await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"osd id {} removed from osd tree",
|
||||
osd_id_full
|
||||
)))
|
||||
}
|
||||
|
||||
pub async fn verify_ceph_osd_removal(
|
||||
&self,
|
||||
client: Arc<K8sClient>,
|
||||
osd_id_full: &str,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let (timeout, interval, start) = self.build_timer();
|
||||
info!(
|
||||
"Verifying OSD {} has been removed from the Ceph tree...",
|
||||
osd_id_full
|
||||
);
|
||||
loop {
|
||||
let output = client
|
||||
.exec_app_capture_output(
|
||||
"rook-ceph-tools".to_string(),
|
||||
"app".to_string(),
|
||||
Some(&self.score.rook_ceph_namespace),
|
||||
vec!["ceph osd tree -f json"],
|
||||
)
|
||||
.await?;
|
||||
let tree =
|
||||
self.get_osd_tree(serde_json::from_str(&output).expect("could not extract json"));
|
||||
|
||||
let osd_found = tree
|
||||
.unwrap()
|
||||
.nodes
|
||||
.iter()
|
||||
.any(|node| node.name == osd_id_full);
|
||||
|
||||
if !osd_found {
|
||||
return Ok(Outcome::success(format!(
|
||||
"Successfully verified that OSD {} is removed from the Ceph cluster.",
|
||||
osd_id_full,
|
||||
)));
|
||||
}
|
||||
|
||||
if start.elapsed() > timeout {
|
||||
return Err(InterpretError::new(format!(
|
||||
"Timed out waiting for OSD {} to be removed from Ceph tree",
|
||||
osd_id_full
|
||||
)));
|
||||
}
|
||||
|
||||
warn!(
|
||||
"OSD {} still found in Ceph tree, retrying in {:?}...",
|
||||
osd_id_full, interval
|
||||
);
|
||||
sleep(interval).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
#[derive(Debug, Deserialize, PartialEq)]
|
||||
pub struct CephOsdTree {
|
||||
pub nodes: Vec<CephNode>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, PartialEq)]
|
||||
pub struct CephNode {
|
||||
pub id: i32,
|
||||
pub name: String,
|
||||
#[serde(rename = "type")]
|
||||
pub node_type: String,
|
||||
pub type_id: Option<i32>,
|
||||
pub children: Option<Vec<i32>>,
|
||||
pub exists: Option<i32>,
|
||||
pub status: Option<String>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use serde_json::json;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_get_osd_tree() {
|
||||
let json_data = json!({
|
||||
"nodes": [
|
||||
{"id": 1, "name": "osd.1", "type": "osd", "primary_affinity":"1"},
|
||||
{"id": 2, "name": "osd.2", "type": "osd", "crush_weight": 1.22344}
|
||||
]
|
||||
});
|
||||
let interpret = CephRemoveOsdInterpret {
|
||||
score: CephRemoveOsd {
|
||||
osd_deployment_name: "osd-1".to_string(),
|
||||
rook_ceph_namespace: "dummy_ns".to_string(),
|
||||
},
|
||||
};
|
||||
let json = interpret.get_osd_tree(json_data).unwrap();
|
||||
|
||||
let expected = CephOsdTree {
|
||||
nodes: vec![
|
||||
CephNode {
|
||||
id: 1,
|
||||
name: "osd.1".to_string(),
|
||||
node_type: "osd".to_string(),
|
||||
type_id: None,
|
||||
children: None,
|
||||
exists: None,
|
||||
status: None,
|
||||
},
|
||||
CephNode {
|
||||
id: 2,
|
||||
name: "osd.2".to_string(),
|
||||
node_type: "osd".to_string(),
|
||||
type_id: None,
|
||||
children: None,
|
||||
exists: None,
|
||||
status: None,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
assert_eq!(json, expected);
|
||||
}
|
||||
}
|
||||
1
harmony/src/modules/storage/ceph/mod.rs
Normal file
1
harmony/src/modules/storage/ceph/mod.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod ceph_remove_osd_score;
|
||||
1
harmony/src/modules/storage/mod.rs
Normal file
1
harmony/src/modules/storage/mod.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod ceph;
|
||||
@@ -17,7 +17,7 @@ impl<T: Topology + TenantCredentialManager> Score<T> for TenantCredentialScore {
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
todo!()
|
||||
"TenantCredentialScore".into()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ impl<T: Topology + TenantManager> Score<T> for TenantScore {
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
format!("{} TenantScore", self.config.name)
|
||||
format!("{} [TenantScore]", self.config.name)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -47,8 +47,8 @@ impl<T: Topology + TenantManager> Interpret<T> for TenantInterpret {
|
||||
topology.provision_tenant(&self.tenant_config).await?;
|
||||
|
||||
Ok(Outcome::success(format!(
|
||||
"Successfully provisioned tenant {} with id {}",
|
||||
self.tenant_config.name, self.tenant_config.id
|
||||
"Tenant provisioned with id '{}'",
|
||||
self.tenant_config.id
|
||||
)))
|
||||
}
|
||||
|
||||
|
||||
@@ -5,6 +5,10 @@ version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[features]
|
||||
default = ["tui"]
|
||||
tui = ["dep:harmony_tui"]
|
||||
|
||||
[dependencies]
|
||||
assert_cmd = "2.0.17"
|
||||
clap = { version = "4.5.35", features = ["derive"] }
|
||||
@@ -19,7 +23,5 @@ lazy_static = "1.5.0"
|
||||
log.workspace = true
|
||||
indicatif-log-bridge = "0.2.3"
|
||||
|
||||
|
||||
[features]
|
||||
default = ["tui"]
|
||||
tui = ["dep:harmony_tui"]
|
||||
[dev-dependencies]
|
||||
harmony = { path = "../harmony", features = ["testing"] }
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user