Compare commits
15 Commits
feat/chang
...
fix/monito
| Author | SHA1 | Date | |
|---|---|---|---|
| c4dd0b0cf2 | |||
| b14b41d172 | |||
| 5e861cfc6d | |||
| 4fad077eb4 | |||
| d80561e326 | |||
| 621aed4903 | |||
| e68426cc3d | |||
| 0c1c8daf13 | |||
| 4b5e3a52a1 | |||
| c54936d19f | |||
| 699822af74 | |||
| 554c94f5a9 | |||
| 836db9e6b1 | |||
| bc6a41d40c | |||
| 8d446ec2e4 |
@@ -1,37 +1,45 @@
|
||||
use std::collections::HashMap;
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
sync::{Arc, Mutex},
|
||||
};
|
||||
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
modules::{
|
||||
monitoring::{
|
||||
alert_channel::discord_alert_channel::DiscordWebhook,
|
||||
alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
|
||||
kube_prometheus::{
|
||||
helm_prometheus_alert_score::HelmPrometheusAlertingScore,
|
||||
types::{
|
||||
HTTPScheme, MatchExpression, Operator, Selector, ServiceMonitor,
|
||||
ServiceMonitorEndpoint,
|
||||
modules::monitoring::{
|
||||
alert_channel::discord_alert_channel::DiscordReceiver,
|
||||
alert_rule::{
|
||||
alerts::{
|
||||
infra::dell_server::{
|
||||
alert_global_storage_status_critical,
|
||||
alert_global_storage_status_non_recoverable,
|
||||
global_storage_status_degraded_non_critical,
|
||||
},
|
||||
k8s::pvc::high_pvc_fill_rate_over_two_days,
|
||||
},
|
||||
prometheus_alert_rule::AlertManagerRuleGroup,
|
||||
},
|
||||
prometheus::alerts::{
|
||||
infra::dell_server::{
|
||||
alert_global_storage_status_critical, alert_global_storage_status_non_recoverable,
|
||||
global_storage_status_degraded_non_critical,
|
||||
kube_prometheus::{
|
||||
helm::config::KubePrometheusConfig,
|
||||
kube_prometheus_alerting_score::KubePrometheusAlertingScore,
|
||||
types::{
|
||||
HTTPScheme, MatchExpression, Operator, Selector, ServiceMonitor,
|
||||
ServiceMonitorEndpoint,
|
||||
},
|
||||
k8s::pvc::high_pvc_fill_rate_over_two_days,
|
||||
},
|
||||
},
|
||||
topology::K8sAnywhereTopology,
|
||||
topology::{K8sAnywhereTopology, monitoring::AlertRoute},
|
||||
};
|
||||
use harmony_types::{k8s_name::K8sName, net::Url};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let discord_receiver = DiscordWebhook {
|
||||
name: K8sName("test-discord".to_string()),
|
||||
let receiver_name = "test-discord".to_string();
|
||||
let discord_receiver = DiscordReceiver {
|
||||
name: receiver_name.clone(),
|
||||
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
|
||||
selectors: vec![],
|
||||
route: AlertRoute {
|
||||
..AlertRoute::default(receiver_name)
|
||||
},
|
||||
};
|
||||
|
||||
let high_pvc_fill_rate_over_two_days_alert = high_pvc_fill_rate_over_two_days();
|
||||
@@ -70,10 +78,15 @@ async fn main() {
|
||||
endpoints: vec![service_monitor_endpoint],
|
||||
..Default::default()
|
||||
};
|
||||
let alerting_score = HelmPrometheusAlertingScore {
|
||||
|
||||
let config = Arc::new(Mutex::new(KubePrometheusConfig::new()));
|
||||
|
||||
let alerting_score = KubePrometheusAlertingScore {
|
||||
receivers: vec![Box::new(discord_receiver)],
|
||||
rules: vec![Box::new(additional_rules), Box::new(additional_rules2)],
|
||||
service_monitors: vec![service_monitor],
|
||||
scrape_targets: None,
|
||||
config,
|
||||
};
|
||||
|
||||
harmony_cli::run(
|
||||
|
||||
@@ -1,24 +1,32 @@
|
||||
use std::{collections::HashMap, str::FromStr};
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
str::FromStr,
|
||||
sync::{Arc, Mutex},
|
||||
};
|
||||
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
modules::{
|
||||
monitoring::{
|
||||
alert_channel::discord_alert_channel::DiscordWebhook,
|
||||
alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
|
||||
alert_channel::discord_alert_channel::DiscordReceiver,
|
||||
alert_rule::{
|
||||
alerts::k8s::pvc::high_pvc_fill_rate_over_two_days,
|
||||
prometheus_alert_rule::AlertManagerRuleGroup,
|
||||
},
|
||||
kube_prometheus::{
|
||||
helm_prometheus_alert_score::HelmPrometheusAlertingScore,
|
||||
helm::config::KubePrometheusConfig,
|
||||
kube_prometheus_alerting_score::KubePrometheusAlertingScore,
|
||||
types::{
|
||||
HTTPScheme, MatchExpression, Operator, Selector, ServiceMonitor,
|
||||
ServiceMonitorEndpoint,
|
||||
},
|
||||
},
|
||||
},
|
||||
prometheus::alerts::k8s::pvc::high_pvc_fill_rate_over_two_days,
|
||||
tenant::TenantScore,
|
||||
},
|
||||
topology::{
|
||||
K8sAnywhereTopology,
|
||||
monitoring::AlertRoute,
|
||||
tenant::{ResourceLimits, TenantConfig, TenantNetworkPolicy},
|
||||
},
|
||||
};
|
||||
@@ -42,10 +50,13 @@ async fn main() {
|
||||
},
|
||||
};
|
||||
|
||||
let discord_receiver = DiscordWebhook {
|
||||
name: K8sName("test-discord".to_string()),
|
||||
let receiver_name = "test-discord".to_string();
|
||||
let discord_receiver = DiscordReceiver {
|
||||
name: receiver_name.clone(),
|
||||
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
|
||||
selectors: vec![],
|
||||
route: AlertRoute {
|
||||
..AlertRoute::default(receiver_name)
|
||||
},
|
||||
};
|
||||
|
||||
let high_pvc_fill_rate_over_two_days_alert = high_pvc_fill_rate_over_two_days();
|
||||
@@ -74,10 +85,14 @@ async fn main() {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let alerting_score = HelmPrometheusAlertingScore {
|
||||
let config = Arc::new(Mutex::new(KubePrometheusConfig::new()));
|
||||
|
||||
let alerting_score = KubePrometheusAlertingScore {
|
||||
receivers: vec![Box::new(discord_receiver)],
|
||||
rules: vec![Box::new(additional_rules)],
|
||||
service_monitors: vec![service_monitor],
|
||||
scrape_targets: None,
|
||||
config,
|
||||
};
|
||||
|
||||
harmony_cli::run(
|
||||
|
||||
@@ -1,35 +1,64 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
modules::monitoring::{
|
||||
alert_channel::discord_alert_channel::DiscordWebhook,
|
||||
okd::cluster_monitoring::OpenshiftClusterAlertScore,
|
||||
alert_channel::discord_alert_channel::DiscordReceiver,
|
||||
alert_rule::{
|
||||
alerts::{
|
||||
infra::opnsense::high_http_error_rate, k8s::pvc::high_pvc_fill_rate_over_two_days,
|
||||
},
|
||||
prometheus_alert_rule::AlertManagerRuleGroup,
|
||||
},
|
||||
okd::openshift_cluster_alerting_score::OpenshiftClusterAlertScore,
|
||||
scrape_target::prometheus_node_exporter::PrometheusNodeExporter,
|
||||
},
|
||||
topology::{
|
||||
K8sAnywhereTopology,
|
||||
monitoring::{AlertMatcher, AlertRoute, MatchOp},
|
||||
},
|
||||
topology::K8sAnywhereTopology,
|
||||
};
|
||||
use harmony_macros::hurl;
|
||||
use harmony_types::k8s_name::K8sName;
|
||||
|
||||
use harmony_macros::{hurl, ip};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let mut sel = HashMap::new();
|
||||
sel.insert(
|
||||
"openshift_io_alert_source".to_string(),
|
||||
"platform".to_string(),
|
||||
);
|
||||
let mut sel2 = HashMap::new();
|
||||
sel2.insert("openshift_io_alert_source".to_string(), "".to_string());
|
||||
let selectors = vec![sel, sel2];
|
||||
let platform_matcher = AlertMatcher {
|
||||
label: "prometheus".to_string(),
|
||||
operator: MatchOp::Eq,
|
||||
value: "openshift-monitoring/k8s".to_string(),
|
||||
};
|
||||
let severity = AlertMatcher {
|
||||
label: "severity".to_string(),
|
||||
operator: MatchOp::Eq,
|
||||
value: "critical".to_string(),
|
||||
};
|
||||
|
||||
let high_http_error_rate = high_http_error_rate();
|
||||
|
||||
let additional_rules = AlertManagerRuleGroup::new("", vec![high_http_error_rate]);
|
||||
|
||||
let scrape_target = PrometheusNodeExporter {
|
||||
job_name: "firewall".to_string(),
|
||||
metrics_path: "/metrics".to_string(),
|
||||
listen_address: ip!("127.0.0.1"),
|
||||
port: 9100,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
harmony_cli::run(
|
||||
Inventory::autoload(),
|
||||
K8sAnywhereTopology::from_env(),
|
||||
vec![Box::new(OpenshiftClusterAlertScore {
|
||||
receivers: vec![Box::new(DiscordWebhook {
|
||||
name: K8sName("wills-discord-webhook-example".to_string()),
|
||||
url: hurl!("https://something.io"),
|
||||
selectors: selectors,
|
||||
receivers: vec![Box::new(DiscordReceiver {
|
||||
name: "crit-wills-discord-channel-example".to_string(),
|
||||
url: hurl!("https://test.io"),
|
||||
route: AlertRoute {
|
||||
matchers: vec![severity],
|
||||
..AlertRoute::default("crit-wills-discord-channel-example".to_string())
|
||||
},
|
||||
})],
|
||||
sender: harmony::modules::monitoring::okd::OpenshiftClusterAlertSender,
|
||||
rules: vec![Box::new(additional_rules)],
|
||||
scrape_targets: Some(vec![Box::new(scrape_target)]),
|
||||
})],
|
||||
None,
|
||||
)
|
||||
|
||||
@@ -6,9 +6,9 @@ use harmony::{
|
||||
application::{
|
||||
ApplicationScore, RustWebFramework, RustWebapp, features::rhob_monitoring::Monitoring,
|
||||
},
|
||||
monitoring::alert_channel::discord_alert_channel::DiscordWebhook,
|
||||
monitoring::alert_channel::discord_alert_channel::DiscordReceiver,
|
||||
},
|
||||
topology::K8sAnywhereTopology,
|
||||
topology::{K8sAnywhereTopology, monitoring::AlertRoute},
|
||||
};
|
||||
use harmony_types::{k8s_name::K8sName, net::Url};
|
||||
|
||||
@@ -22,18 +22,21 @@ async fn main() {
|
||||
service_port: 3000,
|
||||
});
|
||||
|
||||
let discord_receiver = DiscordWebhook {
|
||||
name: K8sName("test-discord".to_string()),
|
||||
let receiver_name = "test-discord".to_string();
|
||||
let discord_receiver = DiscordReceiver {
|
||||
name: receiver_name.clone(),
|
||||
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
|
||||
selectors: vec![],
|
||||
route: AlertRoute {
|
||||
..AlertRoute::default(receiver_name)
|
||||
},
|
||||
};
|
||||
|
||||
let app = ApplicationScore {
|
||||
features: vec![
|
||||
Box::new(Monitoring {
|
||||
application: application.clone(),
|
||||
alert_receiver: vec![Box::new(discord_receiver)],
|
||||
}),
|
||||
// Box::new(Monitoring {
|
||||
// application: application.clone(),
|
||||
// alert_receiver: vec![Box::new(discord_receiver)],
|
||||
// }),
|
||||
// TODO add backups, multisite ha, etc
|
||||
],
|
||||
application,
|
||||
|
||||
@@ -8,13 +8,13 @@ use harmony::{
|
||||
features::{Monitoring, PackagingDeployment},
|
||||
},
|
||||
monitoring::alert_channel::{
|
||||
discord_alert_channel::DiscordWebhook, webhook_receiver::WebhookReceiver,
|
||||
discord_alert_channel::DiscordReceiver, webhook_receiver::WebhookReceiver,
|
||||
},
|
||||
},
|
||||
topology::K8sAnywhereTopology,
|
||||
topology::{K8sAnywhereTopology, monitoring::AlertRoute},
|
||||
};
|
||||
use harmony_macros::hurl;
|
||||
use harmony_types::k8s_name::K8sName;
|
||||
use harmony_types::{k8s_name::K8sName, net::Url};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
@@ -26,10 +26,13 @@ async fn main() {
|
||||
service_port: 3000,
|
||||
});
|
||||
|
||||
let discord_receiver = DiscordWebhook {
|
||||
name: K8sName("test-discord".to_string()),
|
||||
url: hurl!("https://discord.doesnt.exist.com"),
|
||||
selectors: vec![],
|
||||
let receiver_name = "test-discord".to_string();
|
||||
let discord_receiver = DiscordReceiver {
|
||||
name: receiver_name.clone(),
|
||||
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
|
||||
route: AlertRoute {
|
||||
..AlertRoute::default(receiver_name)
|
||||
},
|
||||
};
|
||||
|
||||
let webhook_receiver = WebhookReceiver {
|
||||
@@ -42,10 +45,10 @@ async fn main() {
|
||||
Box::new(PackagingDeployment {
|
||||
application: application.clone(),
|
||||
}),
|
||||
Box::new(Monitoring {
|
||||
application: application.clone(),
|
||||
alert_receiver: vec![Box::new(discord_receiver), Box::new(webhook_receiver)],
|
||||
}),
|
||||
// Box::new(Monitoring {
|
||||
// application: application.clone(),
|
||||
// alert_receiver: vec![Box::new(discord_receiver), Box::new(webhook_receiver)],
|
||||
// }),
|
||||
// TODO add backups, multisite ha, etc
|
||||
],
|
||||
application,
|
||||
|
||||
@@ -1,11 +1,8 @@
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
modules::{
|
||||
application::{
|
||||
ApplicationScore, RustWebFramework, RustWebapp,
|
||||
features::{Monitoring, PackagingDeployment},
|
||||
},
|
||||
monitoring::alert_channel::discord_alert_channel::DiscordWebhook,
|
||||
modules::application::{
|
||||
ApplicationScore, RustWebFramework, RustWebapp,
|
||||
features::{Monitoring, PackagingDeployment},
|
||||
},
|
||||
topology::K8sAnywhereTopology,
|
||||
};
|
||||
@@ -30,14 +27,14 @@ async fn main() {
|
||||
Box::new(PackagingDeployment {
|
||||
application: application.clone(),
|
||||
}),
|
||||
Box::new(Monitoring {
|
||||
application: application.clone(),
|
||||
alert_receiver: vec![Box::new(DiscordWebhook {
|
||||
name: K8sName("test-discord".to_string()),
|
||||
url: hurl!("https://discord.doesnt.exist.com"),
|
||||
selectors: vec![],
|
||||
})],
|
||||
}),
|
||||
// Box::new(Monitoring {
|
||||
// application: application.clone(),
|
||||
// alert_receiver: vec![Box::new(DiscordWebhook {
|
||||
// name: K8sName("test-discord".to_string()),
|
||||
// url: hurl!("https://discord.doesnt.exist.com"),
|
||||
// selectors: vec![],
|
||||
// })],
|
||||
// }),
|
||||
],
|
||||
application,
|
||||
};
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
use std::{collections::BTreeMap, process::Command, sync::Arc, time::Duration};
|
||||
use std::{collections::BTreeMap, process::Command, sync::Arc};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use base64::{Engine, engine::general_purpose};
|
||||
use harmony_types::rfc1123::Rfc1123Name;
|
||||
use k8s_openapi::api::{
|
||||
core::v1::{Pod, Secret},
|
||||
rbac::v1::{ClusterRoleBinding, RoleRef, Subject},
|
||||
};
|
||||
use kube::api::{DynamicObject, GroupVersionKind, ObjectMeta};
|
||||
use kube::api::{GroupVersionKind, ObjectMeta};
|
||||
use log::{debug, info, trace, warn};
|
||||
use serde::Serialize;
|
||||
use tokio::sync::OnceCell;
|
||||
@@ -28,28 +27,7 @@ use crate::{
|
||||
score_cert_management::CertificateManagementScore,
|
||||
},
|
||||
k3d::K3DInstallationScore,
|
||||
k8s::ingress::{K8sIngressScore, PathType},
|
||||
monitoring::{
|
||||
grafana::{grafana::Grafana, helm::helm_grafana::grafana_helm_chart_score},
|
||||
kube_prometheus::crd::{
|
||||
crd_alertmanager_config::CRDPrometheus,
|
||||
crd_grafana::{
|
||||
Grafana as GrafanaCRD, GrafanaCom, GrafanaDashboard,
|
||||
GrafanaDashboardDatasource, GrafanaDashboardSpec, GrafanaDatasource,
|
||||
GrafanaDatasourceConfig, GrafanaDatasourceJsonData,
|
||||
GrafanaDatasourceSecureJsonData, GrafanaDatasourceSpec, GrafanaSpec,
|
||||
},
|
||||
crd_prometheuses::LabelSelector,
|
||||
prometheus_operator::prometheus_operator_helm_chart_score,
|
||||
rhob_alertmanager_config::RHOBObservability,
|
||||
service_monitor::ServiceMonitor,
|
||||
},
|
||||
},
|
||||
okd::{crd::ingresses_config::Ingress as IngressResource, route::OKDTlsPassthroughScore},
|
||||
prometheus::{
|
||||
k8s_prometheus_alerting_score::K8sPrometheusCRDAlertingScore,
|
||||
prometheus::PrometheusMonitoring, rhob_alerting_score::RHOBAlertingScore,
|
||||
},
|
||||
},
|
||||
score::Score,
|
||||
topology::{TlsRoute, TlsRouter, ingress::Ingress},
|
||||
@@ -59,7 +37,6 @@ use super::super::{
|
||||
DeploymentTarget, HelmCommand, K8sclient, MultiTargetTopology, PreparationError,
|
||||
PreparationOutcome, Topology,
|
||||
k8s::K8sClient,
|
||||
oberservability::monitoring::AlertReceiver,
|
||||
tenant::{
|
||||
TenantConfig, TenantManager,
|
||||
k8s::K8sTenantManager,
|
||||
@@ -173,216 +150,6 @@ impl TlsRouter for K8sAnywhereTopology {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Grafana for K8sAnywhereTopology {
|
||||
async fn ensure_grafana_operator(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
debug!("ensure grafana operator");
|
||||
let client = self.k8s_client().await.unwrap();
|
||||
let grafana_gvk = GroupVersionKind {
|
||||
group: "grafana.integreatly.org".to_string(),
|
||||
version: "v1beta1".to_string(),
|
||||
kind: "Grafana".to_string(),
|
||||
};
|
||||
let name = "grafanas.grafana.integreatly.org";
|
||||
let ns = "grafana";
|
||||
|
||||
let grafana_crd = client
|
||||
.get_resource_json_value(name, Some(ns), &grafana_gvk)
|
||||
.await;
|
||||
match grafana_crd {
|
||||
Ok(_) => {
|
||||
return Ok(PreparationOutcome::Success {
|
||||
details: "Found grafana CRDs in cluster".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
Err(_) => {
|
||||
return self
|
||||
.install_grafana_operator(inventory, Some("grafana"))
|
||||
.await;
|
||||
}
|
||||
};
|
||||
}
|
||||
async fn install_grafana(&self) -> Result<PreparationOutcome, PreparationError> {
|
||||
let ns = "grafana";
|
||||
|
||||
let mut label = BTreeMap::new();
|
||||
|
||||
label.insert("dashboards".to_string(), "grafana".to_string());
|
||||
|
||||
let label_selector = LabelSelector {
|
||||
match_labels: label.clone(),
|
||||
match_expressions: vec![],
|
||||
};
|
||||
|
||||
let client = self.k8s_client().await?;
|
||||
|
||||
let grafana = self.build_grafana(ns, &label);
|
||||
|
||||
client.apply(&grafana, Some(ns)).await?;
|
||||
//TODO change this to a ensure ready or something better than just a timeout
|
||||
client
|
||||
.wait_until_deployment_ready(
|
||||
"grafana-grafana-deployment",
|
||||
Some("grafana"),
|
||||
Some(Duration::from_secs(30)),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let sa_name = "grafana-grafana-sa";
|
||||
let token_secret_name = "grafana-sa-token-secret";
|
||||
|
||||
let sa_token_secret = self.build_sa_token_secret(token_secret_name, sa_name, ns);
|
||||
|
||||
client.apply(&sa_token_secret, Some(ns)).await?;
|
||||
let secret_gvk = GroupVersionKind {
|
||||
group: "".to_string(),
|
||||
version: "v1".to_string(),
|
||||
kind: "Secret".to_string(),
|
||||
};
|
||||
|
||||
let secret = client
|
||||
.get_resource_json_value(token_secret_name, Some(ns), &secret_gvk)
|
||||
.await?;
|
||||
|
||||
let token = format!(
|
||||
"Bearer {}",
|
||||
self.extract_and_normalize_token(&secret).unwrap()
|
||||
);
|
||||
|
||||
debug!("creating grafana clusterrole binding");
|
||||
|
||||
let clusterrolebinding =
|
||||
self.build_cluster_rolebinding(sa_name, "cluster-monitoring-view", ns);
|
||||
|
||||
client.apply(&clusterrolebinding, Some(ns)).await?;
|
||||
|
||||
debug!("creating grafana datasource crd");
|
||||
|
||||
let thanos_url = format!(
|
||||
"https://{}",
|
||||
self.get_domain("thanos-querier-openshift-monitoring")
|
||||
.await
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
let thanos_openshift_datasource = self.build_grafana_datasource(
|
||||
"thanos-openshift-monitoring",
|
||||
ns,
|
||||
&label_selector,
|
||||
&thanos_url,
|
||||
&token,
|
||||
);
|
||||
|
||||
client.apply(&thanos_openshift_datasource, Some(ns)).await?;
|
||||
|
||||
debug!("creating grafana dashboard crd");
|
||||
let dashboard = self.build_grafana_dashboard(ns, &label_selector);
|
||||
|
||||
client.apply(&dashboard, Some(ns)).await?;
|
||||
debug!("creating grafana ingress");
|
||||
let grafana_ingress = self.build_grafana_ingress(ns).await;
|
||||
|
||||
grafana_ingress
|
||||
.interpret(&Inventory::empty(), self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(e.to_string()))?;
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Installed grafana composants".to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PrometheusMonitoring<CRDPrometheus> for K8sAnywhereTopology {
|
||||
async fn install_prometheus(
|
||||
&self,
|
||||
sender: &CRDPrometheus,
|
||||
_inventory: &Inventory,
|
||||
_receivers: Option<Vec<Box<dyn AlertReceiver<CRDPrometheus>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let client = self.k8s_client().await?;
|
||||
|
||||
for monitor in sender.service_monitor.iter() {
|
||||
client
|
||||
.apply(monitor, Some(&sender.namespace))
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(e.to_string()))?;
|
||||
}
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "successfuly installed prometheus components".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn ensure_prometheus_operator(
|
||||
&self,
|
||||
sender: &CRDPrometheus,
|
||||
_inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let po_result = self.ensure_prometheus_operator(sender).await?;
|
||||
|
||||
match po_result {
|
||||
PreparationOutcome::Success { details: _ } => {
|
||||
debug!("Detected prometheus crds operator present in cluster.");
|
||||
return Ok(po_result);
|
||||
}
|
||||
PreparationOutcome::Noop => {
|
||||
debug!("Skipping Prometheus CR installation due to missing operator.");
|
||||
return Ok(po_result);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PrometheusMonitoring<RHOBObservability> for K8sAnywhereTopology {
|
||||
async fn install_prometheus(
|
||||
&self,
|
||||
sender: &RHOBObservability,
|
||||
inventory: &Inventory,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<RHOBObservability>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let po_result = self.ensure_cluster_observability_operator(sender).await?;
|
||||
|
||||
if po_result == PreparationOutcome::Noop {
|
||||
debug!("Skipping Prometheus CR installation due to missing operator.");
|
||||
return Ok(po_result);
|
||||
}
|
||||
|
||||
let result = self
|
||||
.get_cluster_observability_operator_prometheus_application_score(
|
||||
sender.clone(),
|
||||
receivers,
|
||||
)
|
||||
.await
|
||||
.interpret(inventory, self)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(outcome) => match outcome.status {
|
||||
InterpretStatus::SUCCESS => Ok(PreparationOutcome::Success {
|
||||
details: outcome.message,
|
||||
}),
|
||||
InterpretStatus::NOOP => Ok(PreparationOutcome::Noop),
|
||||
_ => Err(PreparationError::new(outcome.message)),
|
||||
},
|
||||
Err(err) => Err(PreparationError::new(err.to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
async fn ensure_prometheus_operator(
|
||||
&self,
|
||||
sender: &RHOBObservability,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for K8sAnywhereTopology {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
@@ -587,23 +354,6 @@ impl K8sAnywhereTopology {
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_and_normalize_token(&self, secret: &DynamicObject) -> Option<String> {
|
||||
let token_b64 = secret
|
||||
.data
|
||||
.get("token")
|
||||
.or_else(|| secret.data.get("data").and_then(|d| d.get("token")))
|
||||
.and_then(|v| v.as_str())?;
|
||||
|
||||
let bytes = general_purpose::STANDARD.decode(token_b64).ok()?;
|
||||
|
||||
let s = String::from_utf8(bytes).ok()?;
|
||||
|
||||
let cleaned = s
|
||||
.trim_matches(|c: char| c.is_whitespace() || c == '\0')
|
||||
.to_string();
|
||||
Some(cleaned)
|
||||
}
|
||||
|
||||
pub async fn get_k8s_distribution(&self) -> Result<KubernetesDistribution, PreparationError> {
|
||||
self.k8s_client()
|
||||
.await?
|
||||
@@ -663,141 +413,6 @@ impl K8sAnywhereTopology {
|
||||
}
|
||||
}
|
||||
|
||||
fn build_grafana_datasource(
|
||||
&self,
|
||||
name: &str,
|
||||
ns: &str,
|
||||
label_selector: &LabelSelector,
|
||||
url: &str,
|
||||
token: &str,
|
||||
) -> GrafanaDatasource {
|
||||
let mut json_data = BTreeMap::new();
|
||||
json_data.insert("timeInterval".to_string(), "5s".to_string());
|
||||
|
||||
GrafanaDatasource {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(name.to_string()),
|
||||
namespace: Some(ns.to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: GrafanaDatasourceSpec {
|
||||
instance_selector: label_selector.clone(),
|
||||
allow_cross_namespace_import: Some(true),
|
||||
values_from: None,
|
||||
datasource: GrafanaDatasourceConfig {
|
||||
access: "proxy".to_string(),
|
||||
name: name.to_string(),
|
||||
r#type: "prometheus".to_string(),
|
||||
url: url.to_string(),
|
||||
database: None,
|
||||
json_data: Some(GrafanaDatasourceJsonData {
|
||||
time_interval: Some("60s".to_string()),
|
||||
http_header_name1: Some("Authorization".to_string()),
|
||||
tls_skip_verify: Some(true),
|
||||
oauth_pass_thru: Some(true),
|
||||
}),
|
||||
secure_json_data: Some(GrafanaDatasourceSecureJsonData {
|
||||
http_header_value1: Some(format!("Bearer {token}")),
|
||||
}),
|
||||
is_default: Some(false),
|
||||
editable: Some(true),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn build_grafana_dashboard(
|
||||
&self,
|
||||
ns: &str,
|
||||
label_selector: &LabelSelector,
|
||||
) -> GrafanaDashboard {
|
||||
let graf_dashboard = GrafanaDashboard {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(format!("grafana-dashboard-{}", ns)),
|
||||
namespace: Some(ns.to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: GrafanaDashboardSpec {
|
||||
resync_period: Some("30s".to_string()),
|
||||
instance_selector: label_selector.clone(),
|
||||
datasources: Some(vec![GrafanaDashboardDatasource {
|
||||
input_name: "DS_PROMETHEUS".to_string(),
|
||||
datasource_name: "thanos-openshift-monitoring".to_string(),
|
||||
}]),
|
||||
json: None,
|
||||
grafana_com: Some(GrafanaCom {
|
||||
id: 17406,
|
||||
revision: None,
|
||||
}),
|
||||
},
|
||||
};
|
||||
graf_dashboard
|
||||
}
|
||||
|
||||
fn build_grafana(&self, ns: &str, labels: &BTreeMap<String, String>) -> GrafanaCRD {
|
||||
let grafana = GrafanaCRD {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(format!("grafana-{}", ns)),
|
||||
namespace: Some(ns.to_string()),
|
||||
labels: Some(labels.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: GrafanaSpec {
|
||||
config: None,
|
||||
admin_user: None,
|
||||
admin_password: None,
|
||||
ingress: None,
|
||||
persistence: None,
|
||||
resources: None,
|
||||
},
|
||||
};
|
||||
grafana
|
||||
}
|
||||
|
||||
async fn build_grafana_ingress(&self, ns: &str) -> K8sIngressScore {
|
||||
let domain = self.get_domain(&format!("grafana-{}", ns)).await.unwrap();
|
||||
let name = format!("{}-grafana", ns);
|
||||
let backend_service = format!("grafana-{}-service", ns);
|
||||
|
||||
K8sIngressScore {
|
||||
name: fqdn::fqdn!(&name),
|
||||
host: fqdn::fqdn!(&domain),
|
||||
backend_service: fqdn::fqdn!(&backend_service),
|
||||
port: 3000,
|
||||
path: Some("/".to_string()),
|
||||
path_type: Some(PathType::Prefix),
|
||||
namespace: Some(fqdn::fqdn!(&ns)),
|
||||
ingress_class_name: Some("openshift-default".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_cluster_observability_operator_prometheus_application_score(
|
||||
&self,
|
||||
sender: RHOBObservability,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<RHOBObservability>>>>,
|
||||
) -> RHOBAlertingScore {
|
||||
RHOBAlertingScore {
|
||||
sender,
|
||||
receivers: receivers.unwrap_or_default(),
|
||||
service_monitors: vec![],
|
||||
prometheus_rules: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_k8s_prometheus_application_score(
|
||||
&self,
|
||||
sender: CRDPrometheus,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<CRDPrometheus>>>>,
|
||||
service_monitors: Option<Vec<ServiceMonitor>>,
|
||||
) -> K8sPrometheusCRDAlertingScore {
|
||||
return K8sPrometheusCRDAlertingScore {
|
||||
sender,
|
||||
receivers: receivers.unwrap_or_default(),
|
||||
service_monitors: service_monitors.unwrap_or_default(),
|
||||
prometheus_rules: vec![],
|
||||
};
|
||||
}
|
||||
|
||||
async fn openshift_ingress_operator_available(&self) -> Result<(), PreparationError> {
|
||||
let client = self.k8s_client().await?;
|
||||
let gvk = GroupVersionKind {
|
||||
@@ -963,137 +578,6 @@ impl K8sAnywhereTopology {
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
async fn ensure_cluster_observability_operator(
|
||||
&self,
|
||||
sender: &RHOBObservability,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let status = Command::new("sh")
|
||||
.args(["-c", "kubectl get crd -A | grep -i rhobs"])
|
||||
.status()
|
||||
.map_err(|e| PreparationError::new(format!("could not connect to cluster: {}", e)))?;
|
||||
|
||||
if !status.success() {
|
||||
if let Some(Some(k8s_state)) = self.k8s_state.get() {
|
||||
match k8s_state.source {
|
||||
K8sSource::LocalK3d => {
|
||||
warn!(
|
||||
"Installing observability operator is not supported on LocalK3d source"
|
||||
);
|
||||
return Ok(PreparationOutcome::Noop);
|
||||
debug!("installing cluster observability operator");
|
||||
todo!();
|
||||
let op_score =
|
||||
prometheus_operator_helm_chart_score(sender.namespace.clone());
|
||||
let result = op_score.interpret(&Inventory::empty(), self).await;
|
||||
|
||||
return match result {
|
||||
Ok(outcome) => match outcome.status {
|
||||
InterpretStatus::SUCCESS => Ok(PreparationOutcome::Success {
|
||||
details: "installed cluster observability operator".into(),
|
||||
}),
|
||||
InterpretStatus::NOOP => Ok(PreparationOutcome::Noop),
|
||||
_ => Err(PreparationError::new(
|
||||
"failed to install cluster observability operator (unknown error)".into(),
|
||||
)),
|
||||
},
|
||||
Err(err) => Err(PreparationError::new(err.to_string())),
|
||||
};
|
||||
}
|
||||
K8sSource::Kubeconfig => {
|
||||
debug!(
|
||||
"unable to install cluster observability operator, contact cluster admin"
|
||||
);
|
||||
return Ok(PreparationOutcome::Noop);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
warn!(
|
||||
"Unable to detect k8s_state. Skipping Cluster Observability Operator install."
|
||||
);
|
||||
return Ok(PreparationOutcome::Noop);
|
||||
}
|
||||
}
|
||||
|
||||
debug!("Cluster Observability Operator is already present, skipping install");
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "cluster observability operator present in cluster".into(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn ensure_prometheus_operator(
|
||||
&self,
|
||||
sender: &CRDPrometheus,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let status = Command::new("sh")
|
||||
.args(["-c", "kubectl get crd -A | grep -i prometheuses"])
|
||||
.status()
|
||||
.map_err(|e| PreparationError::new(format!("could not connect to cluster: {}", e)))?;
|
||||
|
||||
if !status.success() {
|
||||
if let Some(Some(k8s_state)) = self.k8s_state.get() {
|
||||
match k8s_state.source {
|
||||
K8sSource::LocalK3d => {
|
||||
debug!("installing prometheus operator");
|
||||
let op_score =
|
||||
prometheus_operator_helm_chart_score(sender.namespace.clone());
|
||||
let result = op_score.interpret(&Inventory::empty(), self).await;
|
||||
|
||||
return match result {
|
||||
Ok(outcome) => match outcome.status {
|
||||
InterpretStatus::SUCCESS => Ok(PreparationOutcome::Success {
|
||||
details: "installed prometheus operator".into(),
|
||||
}),
|
||||
InterpretStatus::NOOP => Ok(PreparationOutcome::Noop),
|
||||
_ => Err(PreparationError::new(
|
||||
"failed to install prometheus operator (unknown error)".into(),
|
||||
)),
|
||||
},
|
||||
Err(err) => Err(PreparationError::new(err.to_string())),
|
||||
};
|
||||
}
|
||||
K8sSource::Kubeconfig => {
|
||||
debug!("unable to install prometheus operator, contact cluster admin");
|
||||
return Ok(PreparationOutcome::Noop);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
warn!("Unable to detect k8s_state. Skipping Prometheus Operator install.");
|
||||
return Ok(PreparationOutcome::Noop);
|
||||
}
|
||||
}
|
||||
|
||||
debug!("Prometheus operator is already present, skipping install");
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "prometheus operator present in cluster".into(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn install_grafana_operator(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
ns: Option<&str>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let namespace = ns.unwrap_or("grafana");
|
||||
info!("installing grafana operator in ns {namespace}");
|
||||
let tenant = self.get_k8s_tenant_manager()?.get_tenant_config().await;
|
||||
let mut namespace_scope = false;
|
||||
if tenant.is_some() {
|
||||
namespace_scope = true;
|
||||
}
|
||||
let _grafana_operator_score = grafana_helm_chart_score(namespace, namespace_scope)
|
||||
.interpret(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(e.to_string()));
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: format!(
|
||||
"Successfully installed grafana operator in ns {}",
|
||||
ns.unwrap()
|
||||
),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
mod k8s_anywhere;
|
||||
pub mod nats;
|
||||
pub mod observability;
|
||||
mod postgres;
|
||||
pub use k8s_anywhere::*;
|
||||
|
||||
@@ -0,0 +1,147 @@
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::{
|
||||
inventory::Inventory,
|
||||
modules::monitoring::grafana::{
|
||||
grafana::Grafana,
|
||||
k8s::{
|
||||
score_ensure_grafana_ready::GrafanaK8sEnsureReadyScore,
|
||||
score_grafana_alert_receiver::GrafanaK8sReceiverScore,
|
||||
score_grafana_datasource::GrafanaK8sDatasourceScore,
|
||||
score_grafana_rule::GrafanaK8sRuleScore, score_install_grafana::GrafanaK8sInstallScore,
|
||||
},
|
||||
},
|
||||
score::Score,
|
||||
topology::{
|
||||
K8sAnywhereTopology, PreparationError, PreparationOutcome,
|
||||
monitoring::{AlertReceiver, AlertRule, Observability, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
impl Observability<Grafana> for K8sAnywhereTopology {
|
||||
async fn install_alert_sender(
|
||||
&self,
|
||||
sender: &Grafana,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let score = GrafanaK8sInstallScore {
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Grafana not installed {}", e)))?;
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Successfully installed grafana alert sender".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn install_receivers(
|
||||
&self,
|
||||
sender: &Grafana,
|
||||
inventory: &Inventory,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<Grafana>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let receivers = match receivers {
|
||||
Some(r) if !r.is_empty() => r,
|
||||
_ => return Ok(PreparationOutcome::Noop),
|
||||
};
|
||||
|
||||
for receiver in receivers {
|
||||
let score = GrafanaK8sReceiverScore {
|
||||
receiver,
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Failed to install receiver: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "All alert receivers installed successfully".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn install_rules(
|
||||
&self,
|
||||
sender: &Grafana,
|
||||
inventory: &Inventory,
|
||||
rules: Option<Vec<Box<dyn AlertRule<Grafana>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let rules = match rules {
|
||||
Some(r) if !r.is_empty() => r,
|
||||
_ => return Ok(PreparationOutcome::Noop),
|
||||
};
|
||||
|
||||
for rule in rules {
|
||||
let score = GrafanaK8sRuleScore {
|
||||
sender: sender.clone(),
|
||||
rule,
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Failed to install rule: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "All alert rules installed successfully".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn add_scrape_targets(
|
||||
&self,
|
||||
sender: &Grafana,
|
||||
inventory: &Inventory,
|
||||
scrape_targets: Option<Vec<Box<dyn ScrapeTarget<Grafana>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let scrape_targets = match scrape_targets {
|
||||
Some(r) if !r.is_empty() => r,
|
||||
_ => return Ok(PreparationOutcome::Noop),
|
||||
};
|
||||
|
||||
for scrape_target in scrape_targets {
|
||||
let score = GrafanaK8sDatasourceScore {
|
||||
scrape_target,
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Failed to add DataSource: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "All datasources installed successfully".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn ensure_monitoring_installed(
|
||||
&self,
|
||||
sender: &Grafana,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let score = GrafanaK8sEnsureReadyScore {
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Grafana not ready {}", e)))?;
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Grafana Ready".to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,142 @@
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::{
|
||||
inventory::Inventory,
|
||||
modules::monitoring::kube_prometheus::{
|
||||
KubePrometheus, helm::kube_prometheus_helm_chart::kube_prometheus_helm_chart_score,
|
||||
score_kube_prometheus_alert_receivers::KubePrometheusReceiverScore,
|
||||
score_kube_prometheus_ensure_ready::KubePrometheusEnsureReadyScore,
|
||||
score_kube_prometheus_rule::KubePrometheusRuleScore,
|
||||
score_kube_prometheus_scrape_target::KubePrometheusScrapeTargetScore,
|
||||
},
|
||||
score::Score,
|
||||
topology::{
|
||||
K8sAnywhereTopology, PreparationError, PreparationOutcome,
|
||||
monitoring::{AlertReceiver, AlertRule, Observability, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
impl Observability<KubePrometheus> for K8sAnywhereTopology {
|
||||
async fn install_alert_sender(
|
||||
&self,
|
||||
sender: &KubePrometheus,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
kube_prometheus_helm_chart_score(sender.config.clone())
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(e.to_string()))?;
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Successfully installed kubeprometheus alert sender".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn install_receivers(
|
||||
&self,
|
||||
sender: &KubePrometheus,
|
||||
inventory: &Inventory,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<KubePrometheus>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let receivers = match receivers {
|
||||
Some(r) if !r.is_empty() => r,
|
||||
_ => return Ok(PreparationOutcome::Noop),
|
||||
};
|
||||
|
||||
for receiver in receivers {
|
||||
let score = KubePrometheusReceiverScore {
|
||||
receiver,
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Failed to install receiver: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "All alert receivers installed successfully".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn install_rules(
|
||||
&self,
|
||||
sender: &KubePrometheus,
|
||||
inventory: &Inventory,
|
||||
rules: Option<Vec<Box<dyn AlertRule<KubePrometheus>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let rules = match rules {
|
||||
Some(r) if !r.is_empty() => r,
|
||||
_ => return Ok(PreparationOutcome::Noop),
|
||||
};
|
||||
|
||||
for rule in rules {
|
||||
let score = KubePrometheusRuleScore {
|
||||
sender: sender.clone(),
|
||||
rule,
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Failed to install rule: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "All alert rules installed successfully".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn add_scrape_targets(
|
||||
&self,
|
||||
sender: &KubePrometheus,
|
||||
inventory: &Inventory,
|
||||
scrape_targets: Option<Vec<Box<dyn ScrapeTarget<KubePrometheus>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let scrape_targets = match scrape_targets {
|
||||
Some(r) if !r.is_empty() => r,
|
||||
_ => return Ok(PreparationOutcome::Noop),
|
||||
};
|
||||
|
||||
for scrape_target in scrape_targets {
|
||||
let score = KubePrometheusScrapeTargetScore {
|
||||
scrape_target,
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Failed to install rule: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "All scrap targets installed successfully".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn ensure_monitoring_installed(
|
||||
&self,
|
||||
sender: &KubePrometheus,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let score = KubePrometheusEnsureReadyScore {
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("KubePrometheus not ready {}", e)))?;
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "KubePrometheus Ready".to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
pub mod grafana;
|
||||
pub mod kube_prometheus;
|
||||
pub mod openshift_monitoring;
|
||||
pub mod prometheus;
|
||||
pub mod redhat_cluster_observability;
|
||||
@@ -0,0 +1,142 @@
|
||||
use async_trait::async_trait;
|
||||
use log::info;
|
||||
|
||||
use crate::score::Score;
|
||||
use crate::{
|
||||
inventory::Inventory,
|
||||
modules::monitoring::okd::{
|
||||
OpenshiftClusterAlertSender,
|
||||
score_enable_cluster_monitoring::OpenshiftEnableClusterMonitoringScore,
|
||||
score_openshift_alert_rule::OpenshiftAlertRuleScore,
|
||||
score_openshift_receiver::OpenshiftReceiverScore,
|
||||
score_openshift_scrape_target::OpenshiftScrapeTargetScore,
|
||||
score_user_workload::OpenshiftUserWorkloadMonitoring,
|
||||
score_verify_user_workload_monitoring::VerifyUserWorkload,
|
||||
},
|
||||
topology::{
|
||||
K8sAnywhereTopology, PreparationError, PreparationOutcome,
|
||||
monitoring::{AlertReceiver, AlertRule, Observability, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
impl Observability<OpenshiftClusterAlertSender> for K8sAnywhereTopology {
|
||||
async fn install_alert_sender(
|
||||
&self,
|
||||
_sender: &OpenshiftClusterAlertSender,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
info!("enabling cluster monitoring");
|
||||
let cluster_monitoring_score = OpenshiftEnableClusterMonitoringScore {};
|
||||
cluster_monitoring_score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError { msg: e.to_string() })?;
|
||||
|
||||
info!("enabling user workload monitoring");
|
||||
let user_workload_score = OpenshiftUserWorkloadMonitoring {};
|
||||
user_workload_score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError { msg: e.to_string() })?;
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Successfully configured cluster monitoring".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn install_receivers(
|
||||
&self,
|
||||
_sender: &OpenshiftClusterAlertSender,
|
||||
inventory: &Inventory,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<OpenshiftClusterAlertSender>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
if let Some(receivers) = receivers {
|
||||
for receiver in receivers {
|
||||
info!("Installing receiver {}", receiver.name());
|
||||
let receiver_score = OpenshiftReceiverScore { receiver };
|
||||
receiver_score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError { msg: e.to_string() })?;
|
||||
}
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Successfully installed receivers for OpenshiftClusterMonitoring"
|
||||
.to_string(),
|
||||
})
|
||||
} else {
|
||||
Ok(PreparationOutcome::Noop)
|
||||
}
|
||||
}
|
||||
|
||||
async fn install_rules(
|
||||
&self,
|
||||
_sender: &OpenshiftClusterAlertSender,
|
||||
inventory: &Inventory,
|
||||
rules: Option<Vec<Box<dyn AlertRule<OpenshiftClusterAlertSender>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
if let Some(rules) = rules {
|
||||
for rule in rules {
|
||||
info!("Installing rule ");
|
||||
let rule_score = OpenshiftAlertRuleScore { rule: rule };
|
||||
rule_score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError { msg: e.to_string() })?;
|
||||
}
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Successfully installed rules for OpenshiftClusterMonitoring".to_string(),
|
||||
})
|
||||
} else {
|
||||
Ok(PreparationOutcome::Noop)
|
||||
}
|
||||
}
|
||||
|
||||
async fn add_scrape_targets(
|
||||
&self,
|
||||
_sender: &OpenshiftClusterAlertSender,
|
||||
inventory: &Inventory,
|
||||
scrape_targets: Option<Vec<Box<dyn ScrapeTarget<OpenshiftClusterAlertSender>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
if let Some(scrape_targets) = scrape_targets {
|
||||
for scrape_target in scrape_targets {
|
||||
info!("Installing scrape target");
|
||||
let scrape_target_score = OpenshiftScrapeTargetScore {
|
||||
scrape_target: scrape_target,
|
||||
};
|
||||
scrape_target_score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError { msg: e.to_string() })?;
|
||||
}
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Successfully added scrape targets for OpenshiftClusterMonitoring"
|
||||
.to_string(),
|
||||
})
|
||||
} else {
|
||||
Ok(PreparationOutcome::Noop)
|
||||
}
|
||||
}
|
||||
|
||||
async fn ensure_monitoring_installed(
|
||||
&self,
|
||||
_sender: &OpenshiftClusterAlertSender,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let verify_monitoring_score = VerifyUserWorkload {};
|
||||
info!("Verifying user workload and cluster monitoring installed");
|
||||
verify_monitoring_score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError { msg: e.to_string() })?;
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "OpenshiftClusterMonitoring ready".to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,147 @@
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::{
|
||||
inventory::Inventory,
|
||||
modules::monitoring::prometheus::{
|
||||
Prometheus, score_prometheus_alert_receivers::PrometheusReceiverScore,
|
||||
score_prometheus_ensure_ready::PrometheusEnsureReadyScore,
|
||||
score_prometheus_install::PrometheusInstallScore,
|
||||
score_prometheus_rule::PrometheusRuleScore,
|
||||
score_prometheus_scrape_target::PrometheusScrapeTargetScore,
|
||||
},
|
||||
score::Score,
|
||||
topology::{
|
||||
K8sAnywhereTopology, PreparationError, PreparationOutcome,
|
||||
monitoring::{AlertReceiver, AlertRule, Observability, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
impl Observability<Prometheus> for K8sAnywhereTopology {
|
||||
async fn install_alert_sender(
|
||||
&self,
|
||||
sender: &Prometheus,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let score = PrometheusInstallScore {
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Prometheus not installed {}", e)))?;
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Successfully installed kubeprometheus alert sender".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn install_receivers(
|
||||
&self,
|
||||
sender: &Prometheus,
|
||||
inventory: &Inventory,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<Prometheus>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let receivers = match receivers {
|
||||
Some(r) if !r.is_empty() => r,
|
||||
_ => return Ok(PreparationOutcome::Noop),
|
||||
};
|
||||
|
||||
for receiver in receivers {
|
||||
let score = PrometheusReceiverScore {
|
||||
receiver,
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Failed to install receiver: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "All alert receivers installed successfully".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn install_rules(
|
||||
&self,
|
||||
sender: &Prometheus,
|
||||
inventory: &Inventory,
|
||||
rules: Option<Vec<Box<dyn AlertRule<Prometheus>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let rules = match rules {
|
||||
Some(r) if !r.is_empty() => r,
|
||||
_ => return Ok(PreparationOutcome::Noop),
|
||||
};
|
||||
|
||||
for rule in rules {
|
||||
let score = PrometheusRuleScore {
|
||||
sender: sender.clone(),
|
||||
rule,
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Failed to install rule: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "All alert rules installed successfully".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn add_scrape_targets(
|
||||
&self,
|
||||
sender: &Prometheus,
|
||||
inventory: &Inventory,
|
||||
scrape_targets: Option<Vec<Box<dyn ScrapeTarget<Prometheus>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let scrape_targets = match scrape_targets {
|
||||
Some(r) if !r.is_empty() => r,
|
||||
_ => return Ok(PreparationOutcome::Noop),
|
||||
};
|
||||
|
||||
for scrape_target in scrape_targets {
|
||||
let score = PrometheusScrapeTargetScore {
|
||||
scrape_target,
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Failed to install rule: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "All scrap targets installed successfully".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn ensure_monitoring_installed(
|
||||
&self,
|
||||
sender: &Prometheus,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let score = PrometheusEnsureReadyScore {
|
||||
sender: sender.clone(),
|
||||
};
|
||||
|
||||
score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(format!("Prometheus not ready {}", e)))?;
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Prometheus Ready".to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,114 @@
|
||||
use crate::{
|
||||
modules::monitoring::red_hat_cluster_observability::{
|
||||
score_alert_receiver::RedHatClusterObservabilityReceiverScore,
|
||||
score_coo_monitoring_stack::RedHatClusterObservabilityMonitoringStackScore,
|
||||
},
|
||||
score::Score,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use log::info;
|
||||
|
||||
use crate::{
|
||||
inventory::Inventory,
|
||||
modules::monitoring::red_hat_cluster_observability::{
|
||||
RedHatClusterObservability,
|
||||
score_redhat_cluster_observability_operator::RedHatClusterObservabilityOperatorScore,
|
||||
},
|
||||
topology::{
|
||||
K8sAnywhereTopology, PreparationError, PreparationOutcome,
|
||||
monitoring::{AlertReceiver, AlertRule, Observability, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
impl Observability<RedHatClusterObservability> for K8sAnywhereTopology {
|
||||
async fn install_alert_sender(
|
||||
&self,
|
||||
sender: &RedHatClusterObservability,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
info!("Verifying Redhat Cluster Observability Operator");
|
||||
|
||||
let coo_score = RedHatClusterObservabilityOperatorScore::default();
|
||||
|
||||
coo_score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(e.to_string()))?;
|
||||
|
||||
info!(
|
||||
"Installing Cluster Observability Operator Monitoring Stack in ns {}",
|
||||
sender.namespace.clone()
|
||||
);
|
||||
|
||||
let coo_monitoring_stack_score = RedHatClusterObservabilityMonitoringStackScore {
|
||||
namespace: sender.namespace.clone(),
|
||||
resource_selector: sender.resource_selector.clone(),
|
||||
};
|
||||
|
||||
coo_monitoring_stack_score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(e.to_string()))?;
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Successfully installed RedHatClusterObservability Operator".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn install_receivers(
|
||||
&self,
|
||||
sender: &RedHatClusterObservability,
|
||||
inventory: &Inventory,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<RedHatClusterObservability>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
if let Some(receivers) = receivers {
|
||||
for receiver in receivers {
|
||||
info!("Installing receiver {}", receiver.name());
|
||||
let receiver_score = RedHatClusterObservabilityReceiverScore {
|
||||
receiver,
|
||||
sender: sender.clone(),
|
||||
};
|
||||
receiver_score
|
||||
.create_interpret()
|
||||
.execute(inventory, self)
|
||||
.await
|
||||
.map_err(|e| PreparationError::new(e.to_string()))?;
|
||||
}
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "Successfully installed receivers for OpenshiftClusterMonitoring"
|
||||
.to_string(),
|
||||
})
|
||||
} else {
|
||||
Ok(PreparationOutcome::Noop)
|
||||
}
|
||||
}
|
||||
|
||||
async fn install_rules(
|
||||
&self,
|
||||
_sender: &RedHatClusterObservability,
|
||||
_inventory: &Inventory,
|
||||
_rules: Option<Vec<Box<dyn AlertRule<RedHatClusterObservability>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn add_scrape_targets(
|
||||
&self,
|
||||
_sender: &RedHatClusterObservability,
|
||||
_inventory: &Inventory,
|
||||
_scrape_targets: Option<Vec<Box<dyn ScrapeTarget<RedHatClusterObservability>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn ensure_monitoring_installed(
|
||||
&self,
|
||||
_sender: &RedHatClusterObservability,
|
||||
_inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,7 @@ pub mod decentralized;
|
||||
mod failover;
|
||||
mod ha_cluster;
|
||||
pub mod ingress;
|
||||
pub mod monitoring;
|
||||
pub mod node_exporter;
|
||||
pub mod opnsense;
|
||||
pub use failover::*;
|
||||
@@ -11,7 +12,6 @@ mod http;
|
||||
pub mod installable;
|
||||
mod k8s_anywhere;
|
||||
mod localhost;
|
||||
pub mod oberservability;
|
||||
pub mod tenant;
|
||||
use derive_new::new;
|
||||
pub use k8s_anywhere::*;
|
||||
|
||||
234
harmony/src/domain/topology/monitoring.rs
Normal file
234
harmony/src/domain/topology/monitoring.rs
Normal file
@@ -0,0 +1,234 @@
|
||||
use std::{
|
||||
any::Any,
|
||||
collections::{BTreeMap, HashMap},
|
||||
net::IpAddr,
|
||||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use kube::api::DynamicObject;
|
||||
use log::{debug, info};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
topology::{PreparationError, PreparationOutcome, Topology, installable::Installable},
|
||||
};
|
||||
use harmony_types::id::Id;
|
||||
|
||||
/// Defines the application that sends the alerts to a receivers
|
||||
/// for example prometheus
|
||||
#[async_trait]
|
||||
pub trait AlertSender: Send + Sync + std::fmt::Debug {
|
||||
fn name(&self) -> String;
|
||||
}
|
||||
|
||||
/// Trait which defines how an alert sender is impleneted for a specific topology
|
||||
#[async_trait]
|
||||
pub trait Observability<S: AlertSender> {
|
||||
async fn install_alert_sender(
|
||||
&self,
|
||||
sender: &S,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError>;
|
||||
|
||||
async fn install_receivers(
|
||||
&self,
|
||||
sender: &S,
|
||||
inventory: &Inventory,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<S>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError>;
|
||||
|
||||
async fn install_rules(
|
||||
&self,
|
||||
sender: &S,
|
||||
inventory: &Inventory,
|
||||
rules: Option<Vec<Box<dyn AlertRule<S>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError>;
|
||||
|
||||
async fn add_scrape_targets(
|
||||
&self,
|
||||
sender: &S,
|
||||
inventory: &Inventory,
|
||||
scrape_targets: Option<Vec<Box<dyn ScrapeTarget<S>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError>;
|
||||
|
||||
async fn ensure_monitoring_installed(
|
||||
&self,
|
||||
sender: &S,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError>;
|
||||
}
|
||||
|
||||
/// Defines the entity that receives the alerts from a sender. For example Discord, Slack, etc
|
||||
///
|
||||
pub trait AlertReceiver<S: AlertSender>: std::fmt::Debug + Send + Sync {
|
||||
fn build_route(&self) -> Result<serde_yaml::Value, InterpretError>;
|
||||
fn build_receiver(&self) -> Result<serde_yaml::Value, InterpretError>;
|
||||
fn name(&self) -> String;
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<S>>;
|
||||
}
|
||||
|
||||
/// Defines a generic rule that can be applied to a sender, such as aprometheus alert rule
|
||||
pub trait AlertRule<S: AlertSender>: std::fmt::Debug + Send + Sync {
|
||||
fn build_rule(&self) -> Result<serde_json::Value, InterpretError>;
|
||||
fn name(&self) -> String;
|
||||
fn clone_box(&self) -> Box<dyn AlertRule<S>>;
|
||||
}
|
||||
|
||||
/// A generic scrape target that can be added to a sender to scrape metrics from, for example a
|
||||
/// server outside of the cluster
|
||||
pub trait ScrapeTarget<S: AlertSender>: std::fmt::Debug + Send + Sync {
|
||||
fn build_scrape_target(&self) -> Result<ExternalScrapeTarget, InterpretError>;
|
||||
fn name(&self) -> String;
|
||||
fn clone_box(&self) -> Box<dyn ScrapeTarget<S>>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ExternalScrapeTarget {
|
||||
pub ip: IpAddr,
|
||||
pub port: i32,
|
||||
pub interval: Option<String>,
|
||||
pub path: Option<String>,
|
||||
pub labels: Option<BTreeMap<String, String>>,
|
||||
}
|
||||
|
||||
/// Alerting interpret to install an alert sender on a given topology
|
||||
#[derive(Debug)]
|
||||
pub struct AlertingInterpret<S: AlertSender> {
|
||||
pub sender: S,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<S>>>,
|
||||
pub rules: Vec<Box<dyn AlertRule<S>>>,
|
||||
pub scrape_targets: Option<Vec<Box<dyn ScrapeTarget<S>>>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<S: AlertSender, T: Topology + Observability<S>> Interpret<T> for AlertingInterpret<S> {
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
info!("Configuring alert sender {}", self.sender.name());
|
||||
topology
|
||||
.install_alert_sender(&self.sender, inventory)
|
||||
.await?;
|
||||
|
||||
info!("Installing receivers");
|
||||
topology
|
||||
.install_receivers(&self.sender, inventory, Some(self.receivers.clone()))
|
||||
.await?;
|
||||
|
||||
info!("Installing rules");
|
||||
topology
|
||||
.install_rules(&self.sender, inventory, Some(self.rules.clone()))
|
||||
.await?;
|
||||
|
||||
info!("Adding extra scrape targets");
|
||||
topology
|
||||
.add_scrape_targets(&self.sender, inventory, self.scrape_targets.clone())
|
||||
.await?;
|
||||
|
||||
info!("Ensuring alert sender {} is ready", self.sender.name());
|
||||
topology
|
||||
.ensure_monitoring_installed(&self.sender, inventory)
|
||||
.await?;
|
||||
|
||||
Ok(Outcome::success(format!(
|
||||
"successfully installed alert sender {}",
|
||||
self.sender.name()
|
||||
)))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Alerting
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AlertSender> Clone for Box<dyn AlertReceiver<S>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AlertSender> Clone for Box<dyn AlertRule<S>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AlertSender> Clone for Box<dyn ScrapeTarget<S>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
|
||||
///Generic routing that can map to various alert sender backends
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertRoute {
|
||||
pub receiver: String,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub matchers: Vec<AlertMatcher>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub group_by: Vec<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub repeat_interval: Option<String>,
|
||||
#[serde(rename = "continue")]
|
||||
pub continue_matching: bool,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub children: Vec<AlertRoute>,
|
||||
}
|
||||
|
||||
impl AlertRoute {
|
||||
pub fn default(name: String) -> Self {
|
||||
Self {
|
||||
receiver: name,
|
||||
matchers: vec![],
|
||||
group_by: vec![],
|
||||
repeat_interval: Some("30s".to_string()),
|
||||
continue_matching: true,
|
||||
children: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertMatcher {
|
||||
pub label: String,
|
||||
pub operator: MatchOp,
|
||||
pub value: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum MatchOp {
|
||||
Eq,
|
||||
NotEq,
|
||||
Regex,
|
||||
}
|
||||
|
||||
impl Serialize for MatchOp {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
let op = match self {
|
||||
MatchOp::Eq => "=",
|
||||
MatchOp::NotEq => "!=",
|
||||
MatchOp::Regex => "=~",
|
||||
};
|
||||
serializer.serialize_str(op)
|
||||
}
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
pub mod monitoring;
|
||||
@@ -1,101 +0,0 @@
|
||||
use std::{any::Any, collections::HashMap};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use kube::api::DynamicObject;
|
||||
use log::debug;
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
topology::{Topology, installable::Installable},
|
||||
};
|
||||
use harmony_types::id::Id;
|
||||
|
||||
#[async_trait]
|
||||
pub trait AlertSender: Send + Sync + std::fmt::Debug {
|
||||
fn name(&self) -> String;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct AlertingInterpret<S: AlertSender> {
|
||||
pub sender: S,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<S>>>,
|
||||
pub rules: Vec<Box<dyn AlertRule<S>>>,
|
||||
pub scrape_targets: Option<Vec<Box<dyn ScrapeTarget<S>>>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<S: AlertSender + Installable<T>, T: Topology> Interpret<T> for AlertingInterpret<S> {
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
debug!("hit sender configure for AlertingInterpret");
|
||||
self.sender.configure(inventory, topology).await?;
|
||||
for receiver in self.receivers.iter() {
|
||||
receiver.install(&self.sender).await?;
|
||||
}
|
||||
for rule in self.rules.iter() {
|
||||
debug!("installing rule: {:#?}", rule);
|
||||
rule.install(&self.sender).await?;
|
||||
}
|
||||
if let Some(targets) = &self.scrape_targets {
|
||||
for target in targets.iter() {
|
||||
debug!("installing scrape_target: {:#?}", target);
|
||||
target.install(&self.sender).await?;
|
||||
}
|
||||
}
|
||||
self.sender.ensure_installed(inventory, topology).await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"successfully installed alert sender {}",
|
||||
self.sender.name()
|
||||
)))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Alerting
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait AlertReceiver<S: AlertSender>: std::fmt::Debug + Send + Sync {
|
||||
async fn install(&self, sender: &S) -> Result<Outcome, InterpretError>;
|
||||
fn name(&self) -> String;
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<S>>;
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
fn as_alertmanager_receiver(&self) -> Result<AlertManagerReceiver, String>;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct AlertManagerReceiver {
|
||||
pub receiver_config: serde_json::Value,
|
||||
// FIXME we should not leak k8s here. DynamicObject is k8s specific
|
||||
pub additional_ressources: Vec<DynamicObject>,
|
||||
pub route_config: serde_json::Value,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait AlertRule<S: AlertSender>: std::fmt::Debug + Send + Sync {
|
||||
async fn install(&self, sender: &S) -> Result<Outcome, InterpretError>;
|
||||
fn clone_box(&self) -> Box<dyn AlertRule<S>>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait ScrapeTarget<S: AlertSender>: std::fmt::Debug + Send + Sync {
|
||||
async fn install(&self, sender: &S) -> Result<Outcome, InterpretError>;
|
||||
fn clone_box(&self) -> Box<dyn ScrapeTarget<S>>;
|
||||
}
|
||||
@@ -2,13 +2,15 @@ use crate::modules::application::{
|
||||
Application, ApplicationFeature, InstallationError, InstallationOutcome,
|
||||
};
|
||||
use crate::modules::monitoring::application_monitoring::application_monitoring_score::ApplicationMonitoringScore;
|
||||
use crate::modules::monitoring::grafana::grafana::Grafana;
|
||||
use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::CRDPrometheus;
|
||||
use crate::modules::monitoring::kube_prometheus::crd::service_monitor::{
|
||||
ServiceMonitor, ServiceMonitorSpec,
|
||||
};
|
||||
use crate::modules::monitoring::prometheus::Prometheus;
|
||||
use crate::modules::monitoring::prometheus::helm::prometheus_config::PrometheusConfig;
|
||||
use crate::topology::MultiTargetTopology;
|
||||
use crate::topology::ingress::Ingress;
|
||||
use crate::topology::monitoring::AlertReceiver;
|
||||
use crate::topology::monitoring::Observability;
|
||||
use crate::{
|
||||
inventory::Inventory,
|
||||
modules::monitoring::{
|
||||
@@ -17,10 +19,6 @@ use crate::{
|
||||
score::Score,
|
||||
topology::{HelmCommand, K8sclient, Topology, tenant::TenantManager},
|
||||
};
|
||||
use crate::{
|
||||
modules::prometheus::prometheus::PrometheusMonitoring,
|
||||
topology::oberservability::monitoring::AlertReceiver,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use base64::{Engine as _, engine::general_purpose};
|
||||
use harmony_secret::SecretManager;
|
||||
@@ -30,12 +28,12 @@ use kube::api::ObjectMeta;
|
||||
use log::{debug, info};
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::Arc;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Monitoring {
|
||||
pub application: Arc<dyn Application>,
|
||||
pub alert_receiver: Vec<Box<dyn AlertReceiver<CRDPrometheus>>>,
|
||||
pub alert_receiver: Vec<Box<dyn AlertReceiver<Prometheus>>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -46,8 +44,7 @@ impl<
|
||||
+ TenantManager
|
||||
+ K8sclient
|
||||
+ MultiTargetTopology
|
||||
+ PrometheusMonitoring<CRDPrometheus>
|
||||
+ Grafana
|
||||
+ Observability<Prometheus>
|
||||
+ Ingress
|
||||
+ std::fmt::Debug,
|
||||
> ApplicationFeature<T> for Monitoring
|
||||
@@ -74,10 +71,8 @@ impl<
|
||||
};
|
||||
|
||||
let mut alerting_score = ApplicationMonitoringScore {
|
||||
sender: CRDPrometheus {
|
||||
namespace: namespace.clone(),
|
||||
client: topology.k8s_client().await.unwrap(),
|
||||
service_monitor: vec![app_service_monitor],
|
||||
sender: Prometheus {
|
||||
config: Arc::new(Mutex::new(PrometheusConfig::new())),
|
||||
},
|
||||
application: self.application.clone(),
|
||||
receivers: self.alert_receiver.clone(),
|
||||
@@ -119,11 +114,12 @@ impl<
|
||||
),
|
||||
};
|
||||
|
||||
alerting_score.receivers.push(Box::new(ntfy_receiver));
|
||||
alerting_score
|
||||
.interpret(&Inventory::empty(), topology)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
todo!();
|
||||
// alerting_score.receivers.push(Box::new(ntfy_receiver));
|
||||
// alerting_score
|
||||
// .interpret(&Inventory::empty(), topology)
|
||||
// .await
|
||||
// .map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(InstallationOutcome::success())
|
||||
}
|
||||
|
||||
@@ -3,11 +3,13 @@ use std::sync::Arc;
|
||||
use crate::modules::application::{
|
||||
Application, ApplicationFeature, InstallationError, InstallationOutcome,
|
||||
};
|
||||
use crate::modules::monitoring::application_monitoring::rhobs_application_monitoring_score::ApplicationRHOBMonitoringScore;
|
||||
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::RHOBObservability;
|
||||
use crate::modules::monitoring::red_hat_cluster_observability::RedHatClusterObservability;
|
||||
use crate::modules::monitoring::red_hat_cluster_observability::redhat_cluster_observability::RedHatClusterObservabilityScore;
|
||||
use crate::topology::MultiTargetTopology;
|
||||
use crate::topology::ingress::Ingress;
|
||||
use crate::topology::monitoring::AlertReceiver;
|
||||
use crate::topology::monitoring::Observability;
|
||||
use crate::{
|
||||
inventory::Inventory,
|
||||
modules::monitoring::{
|
||||
@@ -16,10 +18,6 @@ use crate::{
|
||||
score::Score,
|
||||
topology::{HelmCommand, K8sclient, Topology, tenant::TenantManager},
|
||||
};
|
||||
use crate::{
|
||||
modules::prometheus::prometheus::PrometheusMonitoring,
|
||||
topology::oberservability::monitoring::AlertReceiver,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use base64::{Engine as _, engine::general_purpose};
|
||||
use harmony_types::net::Url;
|
||||
@@ -28,9 +26,10 @@ use log::{debug, info};
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Monitoring {
|
||||
pub application: Arc<dyn Application>,
|
||||
pub alert_receiver: Vec<Box<dyn AlertReceiver<RHOBObservability>>>,
|
||||
pub alert_receiver: Vec<Box<dyn AlertReceiver<RedHatClusterObservability>>>,
|
||||
}
|
||||
|
||||
///TODO TEST this
|
||||
#[async_trait]
|
||||
impl<
|
||||
T: Topology
|
||||
@@ -41,7 +40,7 @@ impl<
|
||||
+ MultiTargetTopology
|
||||
+ Ingress
|
||||
+ std::fmt::Debug
|
||||
+ PrometheusMonitoring<RHOBObservability>,
|
||||
+ Observability<RedHatClusterObservability>,
|
||||
> ApplicationFeature<T> for Monitoring
|
||||
{
|
||||
async fn ensure_installed(
|
||||
@@ -55,13 +54,14 @@ impl<
|
||||
.map(|ns| ns.name.clone())
|
||||
.unwrap_or_else(|| self.application.name());
|
||||
|
||||
let mut alerting_score = ApplicationRHOBMonitoringScore {
|
||||
sender: RHOBObservability {
|
||||
let mut alerting_score = RedHatClusterObservabilityScore {
|
||||
sender: RedHatClusterObservability {
|
||||
namespace: namespace.clone(),
|
||||
client: topology.k8s_client().await.unwrap(),
|
||||
resource_selector: todo!(),
|
||||
},
|
||||
application: self.application.clone(),
|
||||
receivers: self.alert_receiver.clone(),
|
||||
rules: vec![],
|
||||
scrape_targets: None,
|
||||
};
|
||||
let domain = topology
|
||||
.get_domain("ntfy")
|
||||
|
||||
@@ -9,7 +9,7 @@ use crate::{
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology},
|
||||
topology::{K8sclient, Topology, k8s::ApplyStrategy},
|
||||
};
|
||||
use harmony_types::id::Id;
|
||||
|
||||
@@ -29,7 +29,7 @@ impl<K: Resource + std::fmt::Debug> K8sResourceScore<K> {
|
||||
}
|
||||
|
||||
impl<
|
||||
K: Resource<Scope = NamespaceResourceScope>
|
||||
K: Resource
|
||||
+ std::fmt::Debug
|
||||
+ Sync
|
||||
+ DeserializeOwned
|
||||
@@ -42,6 +42,7 @@ impl<
|
||||
> Score<T> for K8sResourceScore<K>
|
||||
where
|
||||
<K as kube::Resource>::DynamicType: Default,
|
||||
<K as kube::Resource>::Scope: ApplyStrategy<K>,
|
||||
{
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(K8sResourceInterpret {
|
||||
@@ -61,7 +62,7 @@ pub struct K8sResourceInterpret<K: Resource + std::fmt::Debug + Sync + Send> {
|
||||
|
||||
#[async_trait]
|
||||
impl<
|
||||
K: Resource<Scope = NamespaceResourceScope>
|
||||
K: Resource
|
||||
+ Clone
|
||||
+ std::fmt::Debug
|
||||
+ DeserializeOwned
|
||||
@@ -73,6 +74,7 @@ impl<
|
||||
> Interpret<T> for K8sResourceInterpret<K>
|
||||
where
|
||||
<K as kube::Resource>::DynamicType: Default,
|
||||
<K as kube::Resource>::Scope: ApplyStrategy<K>,
|
||||
{
|
||||
async fn execute(
|
||||
&self,
|
||||
|
||||
@@ -18,7 +18,6 @@ pub mod network;
|
||||
pub mod okd;
|
||||
pub mod opnsense;
|
||||
pub mod postgresql;
|
||||
pub mod prometheus;
|
||||
pub mod storage;
|
||||
pub mod tenant;
|
||||
pub mod tftp;
|
||||
|
||||
@@ -1,98 +1,54 @@
|
||||
use std::any::Any;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use harmony_types::k8s_name::K8sName;
|
||||
use crate::modules::monitoring::kube_prometheus::KubePrometheus;
|
||||
use crate::modules::monitoring::okd::OpenshiftClusterAlertSender;
|
||||
use crate::modules::monitoring::red_hat_cluster_observability::RedHatClusterObservability;
|
||||
use crate::topology::monitoring::{AlertRoute, MatchOp};
|
||||
use crate::{interpret::InterpretError, topology::monitoring::AlertReceiver};
|
||||
use harmony_types::net::Url;
|
||||
use k8s_openapi::api::core::v1::Secret;
|
||||
use kube::Resource;
|
||||
use kube::api::{DynamicObject, ObjectMeta};
|
||||
use log::{debug, trace};
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
use serde_yaml::{Mapping, Value};
|
||||
|
||||
use crate::infra::kube::kube_resource_to_dynamic;
|
||||
use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::{
|
||||
AlertmanagerConfig, AlertmanagerConfigSpec, CRDPrometheus,
|
||||
};
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::RHOBObservability;
|
||||
use crate::modules::monitoring::okd::OpenshiftClusterAlertSender;
|
||||
use crate::topology::oberservability::monitoring::AlertManagerReceiver;
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
modules::monitoring::{
|
||||
kube_prometheus::{
|
||||
prometheus::{KubePrometheus, KubePrometheusReceiver},
|
||||
types::{AlertChannelConfig, AlertManagerChannelConfig},
|
||||
},
|
||||
prometheus::prometheus::{Prometheus, PrometheusReceiver},
|
||||
},
|
||||
topology::oberservability::monitoring::AlertReceiver,
|
||||
};
|
||||
use harmony_types::net::Url;
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct DiscordWebhook {
|
||||
pub name: K8sName,
|
||||
pub struct DiscordReceiver {
|
||||
pub name: String,
|
||||
pub url: Url,
|
||||
pub selectors: Vec<HashMap<String, String>>,
|
||||
pub route: AlertRoute,
|
||||
}
|
||||
|
||||
impl DiscordWebhook {
|
||||
fn get_receiver_config(&self) -> Result<AlertManagerReceiver, String> {
|
||||
let secret_name = format!("{}-secret", self.name.clone());
|
||||
let webhook_key = format!("{}", self.url.clone());
|
||||
impl AlertReceiver<OpenshiftClusterAlertSender> for DiscordReceiver {
|
||||
fn build_route(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let matchers: Vec<String> = self
|
||||
.route
|
||||
.matchers
|
||||
.iter()
|
||||
.map(|m| match m.operator {
|
||||
MatchOp::Eq => format!("{} = {}", m.label, m.value),
|
||||
MatchOp::NotEq => format!("{} != {}", m.label, m.value),
|
||||
MatchOp::Regex => format!("{} =~ {}", m.label, m.value),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut string_data = BTreeMap::new();
|
||||
string_data.insert("webhook-url".to_string(), webhook_key.clone());
|
||||
|
||||
let secret = Secret {
|
||||
metadata: kube::core::ObjectMeta {
|
||||
name: Some(secret_name.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
string_data: Some(string_data),
|
||||
type_: Some("Opaque".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut matchers: Vec<String> = Vec::new();
|
||||
for selector in &self.selectors {
|
||||
trace!("selector: {:#?}", selector);
|
||||
for (k, v) in selector {
|
||||
matchers.push(format!("{} = {}", k, v));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(AlertManagerReceiver {
|
||||
additional_ressources: vec![kube_resource_to_dynamic(&secret)?],
|
||||
|
||||
receiver_config: json!({
|
||||
"name": self.name,
|
||||
"discord_configs": [
|
||||
{
|
||||
"webhook_url": self.url.clone(),
|
||||
"title": "{{ template \"discord.default.title\" . }}",
|
||||
"message": "{{ template \"discord.default.message\" . }}"
|
||||
}
|
||||
]
|
||||
}),
|
||||
route_config: json!({
|
||||
"receiver": self.name,
|
||||
"matchers": matchers,
|
||||
|
||||
}),
|
||||
})
|
||||
let route_block = serde_yaml::to_value(json!({
|
||||
"receiver": self.name,
|
||||
"matchers": matchers,
|
||||
}))
|
||||
.unwrap();
|
||||
Ok(route_block)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<OpenshiftClusterAlertSender> for DiscordWebhook {
|
||||
async fn install(
|
||||
&self,
|
||||
sender: &OpenshiftClusterAlertSender,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
todo!()
|
||||
fn build_receiver(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let receiver_block = serde_yaml::to_value(json!({
|
||||
"name": self.name,
|
||||
"discord_configs": [{
|
||||
"webhook_url": format!("{}", self.url),
|
||||
"title": "{{ template \"discord.default.title\" . }}",
|
||||
"message": "{{ template \"discord.default.message\" . }}"
|
||||
}]
|
||||
}))
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
Ok(receiver_block)
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
@@ -102,93 +58,16 @@ impl AlertReceiver<OpenshiftClusterAlertSender> for DiscordWebhook {
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<OpenshiftClusterAlertSender>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn as_alertmanager_receiver(&self) -> Result<AlertManagerReceiver, String> {
|
||||
self.get_receiver_config()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<RHOBObservability> for DiscordWebhook {
|
||||
fn as_alertmanager_receiver(&self) -> Result<AlertManagerReceiver, String> {
|
||||
todo!()
|
||||
impl AlertReceiver<RedHatClusterObservability> for DiscordReceiver {
|
||||
fn build_route(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
serde_yaml::to_value(&self.route).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
|
||||
async fn install(&self, sender: &RHOBObservability) -> Result<Outcome, InterpretError> {
|
||||
let ns = sender.namespace.clone();
|
||||
|
||||
let config = self.get_receiver_config()?;
|
||||
for resource in config.additional_ressources.iter() {
|
||||
todo!("can I apply a dynamicresource");
|
||||
// sender.client.apply(resource, Some(&ns)).await;
|
||||
}
|
||||
|
||||
let spec = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfigSpec {
|
||||
data: json!({
|
||||
"route": {
|
||||
"receiver": self.name,
|
||||
},
|
||||
"receivers": [
|
||||
config.receiver_config
|
||||
]
|
||||
}),
|
||||
};
|
||||
|
||||
let alertmanager_configs = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfig {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.name.clone().to_string()),
|
||||
labels: Some(std::collections::BTreeMap::from([(
|
||||
"alertmanagerConfig".to_string(),
|
||||
"enabled".to_string(),
|
||||
)])),
|
||||
namespace: Some(sender.namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec,
|
||||
};
|
||||
debug!(
|
||||
"alertmanager_configs yaml:\n{:#?}",
|
||||
serde_yaml::to_string(&alertmanager_configs)
|
||||
);
|
||||
debug!(
|
||||
"alert manager configs: \n{:#?}",
|
||||
alertmanager_configs.clone()
|
||||
);
|
||||
|
||||
sender
|
||||
.client
|
||||
.apply(&alertmanager_configs, Some(&sender.namespace))
|
||||
.await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"installed rhob-alertmanagerconfigs for {}",
|
||||
self.name
|
||||
)))
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"webhook-receiver".to_string()
|
||||
}
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<RHOBObservability>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<CRDPrometheus> for DiscordWebhook {
|
||||
fn as_alertmanager_receiver(&self) -> Result<AlertManagerReceiver, String> {
|
||||
todo!()
|
||||
}
|
||||
async fn install(&self, sender: &CRDPrometheus) -> Result<Outcome, InterpretError> {
|
||||
let ns = sender.namespace.clone();
|
||||
fn build_receiver(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
//FIXME this secret needs to be applied so that the discord Configs for RedHatCO
|
||||
//CRD AlertmanagerConfigs can access the URL
|
||||
let secret_name = format!("{}-secret", self.name.clone());
|
||||
let webhook_key = format!("{}", self.url.clone());
|
||||
|
||||
@@ -205,206 +84,54 @@ impl AlertReceiver<CRDPrometheus> for DiscordWebhook {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let _ = sender.client.apply(&secret, Some(&ns)).await;
|
||||
|
||||
let spec = AlertmanagerConfigSpec {
|
||||
data: json!({
|
||||
"route": {
|
||||
"receiver": self.name,
|
||||
},
|
||||
"receivers": [
|
||||
{
|
||||
"name": self.name,
|
||||
"discordConfigs": [
|
||||
{
|
||||
"apiURL": {
|
||||
"name": secret_name,
|
||||
"key": "webhook-url",
|
||||
},
|
||||
"title": "{{ template \"discord.default.title\" . }}",
|
||||
"message": "{{ template \"discord.default.message\" . }}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}),
|
||||
};
|
||||
|
||||
let alertmanager_configs = AlertmanagerConfig {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.name.clone().to_string()),
|
||||
labels: Some(std::collections::BTreeMap::from([(
|
||||
"alertmanagerConfig".to_string(),
|
||||
"enabled".to_string(),
|
||||
)])),
|
||||
namespace: Some(ns),
|
||||
..Default::default()
|
||||
},
|
||||
spec,
|
||||
};
|
||||
|
||||
sender
|
||||
.client
|
||||
.apply(&alertmanager_configs, Some(&sender.namespace))
|
||||
.await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"installed crd-alertmanagerconfigs for {}",
|
||||
self.name
|
||||
)))
|
||||
let receiver_config = json!({
|
||||
"name": self.name,
|
||||
"discordConfigs": [
|
||||
{
|
||||
"apiURL": {
|
||||
"key": "webhook-url",
|
||||
"name": format!("{}-secret", self.name)
|
||||
},
|
||||
"title": "{{ template \"discord.default.title\" . }}",
|
||||
"message": "{{ template \"discord.default.message\" . }}"
|
||||
}
|
||||
]
|
||||
});
|
||||
serde_yaml::to_value(receiver_config).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"discord-webhook".to_string()
|
||||
self.name.clone()
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<CRDPrometheus>> {
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<RedHatClusterObservability>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<Prometheus> for DiscordWebhook {
|
||||
fn as_alertmanager_receiver(&self) -> Result<AlertManagerReceiver, String> {
|
||||
todo!()
|
||||
impl AlertReceiver<KubePrometheus> for DiscordReceiver {
|
||||
fn build_route(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
serde_yaml::to_value(self.route.clone()).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_receiver(self).await
|
||||
|
||||
fn build_receiver(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let receiver_block = serde_yaml::to_value(json!({
|
||||
"name": self.name,
|
||||
"discord_configs": [{
|
||||
"webhook_url": format!("{}", self.url),
|
||||
"title": "{{ template \"discord.default.title\" . }}",
|
||||
"message": "{{ template \"discord.default.message\" . }}"
|
||||
}]
|
||||
}))
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
Ok(receiver_block)
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"discord-webhook".to_string()
|
||||
self.name.clone()
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<Prometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PrometheusReceiver for DiscordWebhook {
|
||||
fn name(&self) -> String {
|
||||
self.name.clone().to_string()
|
||||
}
|
||||
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
|
||||
self.get_config().await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<KubePrometheus> for DiscordWebhook {
|
||||
fn as_alertmanager_receiver(&self) -> Result<AlertManagerReceiver, String> {
|
||||
todo!()
|
||||
}
|
||||
async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_receiver(self).await
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<KubePrometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
fn name(&self) -> String {
|
||||
"discord-webhook".to_string()
|
||||
}
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl KubePrometheusReceiver for DiscordWebhook {
|
||||
fn name(&self) -> String {
|
||||
self.name.clone().to_string()
|
||||
}
|
||||
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
|
||||
self.get_config().await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertChannelConfig for DiscordWebhook {
|
||||
async fn get_config(&self) -> AlertManagerChannelConfig {
|
||||
let channel_global_config = None;
|
||||
let channel_receiver = self.alert_channel_receiver().await;
|
||||
let channel_route = self.alert_channel_route().await;
|
||||
|
||||
AlertManagerChannelConfig {
|
||||
channel_global_config,
|
||||
channel_receiver,
|
||||
channel_route,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DiscordWebhook {
|
||||
async fn alert_channel_route(&self) -> serde_yaml::Value {
|
||||
let mut route = Mapping::new();
|
||||
route.insert(
|
||||
Value::String("receiver".to_string()),
|
||||
Value::String(self.name.clone().to_string()),
|
||||
);
|
||||
route.insert(
|
||||
Value::String("matchers".to_string()),
|
||||
Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]),
|
||||
);
|
||||
route.insert(Value::String("continue".to_string()), Value::Bool(true));
|
||||
Value::Mapping(route)
|
||||
}
|
||||
|
||||
async fn alert_channel_receiver(&self) -> serde_yaml::Value {
|
||||
let mut receiver = Mapping::new();
|
||||
receiver.insert(
|
||||
Value::String("name".to_string()),
|
||||
Value::String(self.name.clone().to_string()),
|
||||
);
|
||||
|
||||
let mut discord_config = Mapping::new();
|
||||
discord_config.insert(
|
||||
Value::String("webhook_url".to_string()),
|
||||
Value::String(self.url.to_string()),
|
||||
);
|
||||
|
||||
receiver.insert(
|
||||
Value::String("discord_configs".to_string()),
|
||||
Value::Sequence(vec![Value::Mapping(discord_config)]),
|
||||
);
|
||||
|
||||
Value::Mapping(receiver)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn discord_serialize_should_match() {
|
||||
let discord_receiver = DiscordWebhook {
|
||||
name: K8sName("test-discord".to_string()),
|
||||
url: Url::Url(url::Url::parse("https://discord.i.dont.exist.com").unwrap()),
|
||||
selectors: vec![],
|
||||
};
|
||||
|
||||
let discord_receiver_receiver =
|
||||
serde_yaml::to_string(&discord_receiver.alert_channel_receiver().await).unwrap();
|
||||
println!("receiver \n{:#}", discord_receiver_receiver);
|
||||
let discord_receiver_receiver_yaml = r#"name: test-discord
|
||||
discord_configs:
|
||||
- webhook_url: https://discord.i.dont.exist.com/
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
let discord_receiver_route =
|
||||
serde_yaml::to_string(&discord_receiver.alert_channel_route().await).unwrap();
|
||||
println!("route \n{:#}", discord_receiver_route);
|
||||
let discord_receiver_route_yaml = r#"receiver: test-discord
|
||||
matchers:
|
||||
- alertname!=Watchdog
|
||||
continue: true
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
assert_eq!(discord_receiver_receiver, discord_receiver_receiver_yaml);
|
||||
assert_eq!(discord_receiver_route, discord_receiver_route_yaml);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,25 +1,13 @@
|
||||
use std::any::Any;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use kube::api::ObjectMeta;
|
||||
use log::debug;
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
use serde_yaml::{Mapping, Value};
|
||||
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
interpret::InterpretError,
|
||||
modules::monitoring::{
|
||||
kube_prometheus::{
|
||||
crd::{
|
||||
crd_alertmanager_config::CRDPrometheus, rhob_alertmanager_config::RHOBObservability,
|
||||
},
|
||||
prometheus::{KubePrometheus, KubePrometheusReceiver},
|
||||
types::{AlertChannelConfig, AlertManagerChannelConfig},
|
||||
},
|
||||
prometheus::prometheus::{Prometheus, PrometheusReceiver},
|
||||
kube_prometheus::KubePrometheus, okd::OpenshiftClusterAlertSender, prometheus::Prometheus,
|
||||
red_hat_cluster_observability::RedHatClusterObservability,
|
||||
},
|
||||
topology::oberservability::monitoring::{AlertManagerReceiver, AlertReceiver},
|
||||
topology::monitoring::AlertReceiver,
|
||||
};
|
||||
use harmony_types::net::Url;
|
||||
|
||||
@@ -29,279 +17,104 @@ pub struct WebhookReceiver {
|
||||
pub url: Url,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<RHOBObservability> for WebhookReceiver {
|
||||
fn as_alertmanager_receiver(&self) -> Result<AlertManagerReceiver, String> {
|
||||
todo!()
|
||||
}
|
||||
async fn install(&self, sender: &RHOBObservability) -> Result<Outcome, InterpretError> {
|
||||
let spec = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfigSpec {
|
||||
data: json!({
|
||||
"route": {
|
||||
"receiver": self.name,
|
||||
},
|
||||
"receivers": [
|
||||
{
|
||||
"name": self.name,
|
||||
"webhookConfigs": [
|
||||
{
|
||||
"url": self.url,
|
||||
"httpConfig": {
|
||||
"tlsConfig": {
|
||||
"insecureSkipVerify": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
impl WebhookReceiver {
|
||||
fn build_receiver(&self) -> serde_json::Value {
|
||||
json!({
|
||||
"name": self.name,
|
||||
"webhookConfigs": [
|
||||
{
|
||||
"url": self.url,
|
||||
"httpConfig": {
|
||||
"tlsConfig": {
|
||||
"insecureSkipVerify": true
|
||||
}
|
||||
]
|
||||
}),
|
||||
};
|
||||
|
||||
let alertmanager_configs = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfig {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.name.clone()),
|
||||
labels: Some(std::collections::BTreeMap::from([(
|
||||
"alertmanagerConfig".to_string(),
|
||||
"enabled".to_string(),
|
||||
)])),
|
||||
namespace: Some(sender.namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec,
|
||||
};
|
||||
debug!(
|
||||
"alert manager configs: \n{:#?}",
|
||||
alertmanager_configs.clone()
|
||||
);
|
||||
|
||||
sender
|
||||
.client
|
||||
.apply(&alertmanager_configs, Some(&sender.namespace))
|
||||
.await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"installed rhob-alertmanagerconfigs for {}",
|
||||
self.name
|
||||
)))
|
||||
}
|
||||
}
|
||||
]})
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"webhook-receiver".to_string()
|
||||
}
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<RHOBObservability>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
fn build_route(&self) -> serde_json::Value {
|
||||
json!({
|
||||
"name": self.name})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<CRDPrometheus> for WebhookReceiver {
|
||||
fn as_alertmanager_receiver(&self) -> Result<AlertManagerReceiver, String> {
|
||||
todo!()
|
||||
}
|
||||
async fn install(&self, sender: &CRDPrometheus) -> Result<Outcome, InterpretError> {
|
||||
let spec = crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::AlertmanagerConfigSpec {
|
||||
data: json!({
|
||||
"route": {
|
||||
"receiver": self.name,
|
||||
},
|
||||
"receivers": [
|
||||
{
|
||||
"name": self.name,
|
||||
"webhookConfigs": [
|
||||
{
|
||||
"url": self.url,
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}),
|
||||
};
|
||||
|
||||
let alertmanager_configs = crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::AlertmanagerConfig {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.name.clone()),
|
||||
labels: Some(std::collections::BTreeMap::from([(
|
||||
"alertmanagerConfig".to_string(),
|
||||
"enabled".to_string(),
|
||||
)])),
|
||||
namespace: Some(sender.namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec,
|
||||
};
|
||||
debug!(
|
||||
"alert manager configs: \n{:#?}",
|
||||
alertmanager_configs.clone()
|
||||
);
|
||||
|
||||
sender
|
||||
.client
|
||||
.apply(&alertmanager_configs, Some(&sender.namespace))
|
||||
.await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"installed crd-alertmanagerconfigs for {}",
|
||||
self.name
|
||||
)))
|
||||
impl AlertReceiver<OpenshiftClusterAlertSender> for WebhookReceiver {
|
||||
fn build_receiver(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let receiver = self.build_receiver();
|
||||
serde_yaml::to_value(receiver).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"webhook-receiver".to_string()
|
||||
fn build_route(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let route = self.build_route();
|
||||
serde_yaml::to_value(route).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<CRDPrometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<Prometheus> for WebhookReceiver {
|
||||
fn as_alertmanager_receiver(&self) -> Result<AlertManagerReceiver, String> {
|
||||
todo!()
|
||||
}
|
||||
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_receiver(self).await
|
||||
}
|
||||
fn name(&self) -> String {
|
||||
"webhook-receiver".to_string()
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<Prometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PrometheusReceiver for WebhookReceiver {
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
|
||||
self.get_config().await
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<OpenshiftClusterAlertSender>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<KubePrometheus> for WebhookReceiver {
|
||||
fn as_alertmanager_receiver(&self) -> Result<AlertManagerReceiver, String> {
|
||||
todo!()
|
||||
impl AlertReceiver<RedHatClusterObservability> for WebhookReceiver {
|
||||
fn build_receiver(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let receiver = self.build_receiver();
|
||||
serde_yaml::to_value(receiver).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_receiver(self).await
|
||||
|
||||
fn build_route(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let route = self.build_route();
|
||||
serde_yaml::to_value(route).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"webhook-receiver".to_string()
|
||||
self.name.clone()
|
||||
}
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<RedHatClusterObservability>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl AlertReceiver<KubePrometheus> for WebhookReceiver {
|
||||
fn build_receiver(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let receiver = self.build_receiver();
|
||||
serde_yaml::to_value(receiver).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
|
||||
fn build_route(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let route = self.build_route();
|
||||
serde_yaml::to_value(route).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<KubePrometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl KubePrometheusReceiver for WebhookReceiver {
|
||||
impl AlertReceiver<Prometheus> for WebhookReceiver {
|
||||
fn build_receiver(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let receiver = self.build_receiver();
|
||||
serde_yaml::to_value(receiver).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
|
||||
fn build_route(&self) -> Result<serde_yaml::Value, InterpretError> {
|
||||
let route = self.build_route();
|
||||
serde_yaml::to_value(route).map_err(|e| InterpretError::new(e.to_string()))
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
|
||||
self.get_config().await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertChannelConfig for WebhookReceiver {
|
||||
async fn get_config(&self) -> AlertManagerChannelConfig {
|
||||
let channel_global_config = None;
|
||||
let channel_receiver = self.alert_channel_receiver().await;
|
||||
let channel_route = self.alert_channel_route().await;
|
||||
|
||||
AlertManagerChannelConfig {
|
||||
channel_global_config,
|
||||
channel_receiver,
|
||||
channel_route,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl WebhookReceiver {
|
||||
async fn alert_channel_route(&self) -> serde_yaml::Value {
|
||||
let mut route = Mapping::new();
|
||||
route.insert(
|
||||
Value::String("receiver".to_string()),
|
||||
Value::String(self.name.clone()),
|
||||
);
|
||||
route.insert(
|
||||
Value::String("matchers".to_string()),
|
||||
Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]),
|
||||
);
|
||||
route.insert(Value::String("continue".to_string()), Value::Bool(true));
|
||||
Value::Mapping(route)
|
||||
}
|
||||
|
||||
async fn alert_channel_receiver(&self) -> serde_yaml::Value {
|
||||
let mut receiver = Mapping::new();
|
||||
receiver.insert(
|
||||
Value::String("name".to_string()),
|
||||
Value::String(self.name.clone()),
|
||||
);
|
||||
|
||||
let mut webhook_config = Mapping::new();
|
||||
webhook_config.insert(
|
||||
Value::String("url".to_string()),
|
||||
Value::String(self.url.to_string()),
|
||||
);
|
||||
|
||||
receiver.insert(
|
||||
Value::String("webhook_configs".to_string()),
|
||||
Value::Sequence(vec![Value::Mapping(webhook_config)]),
|
||||
);
|
||||
|
||||
Value::Mapping(receiver)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
#[tokio::test]
|
||||
async fn webhook_serialize_should_match() {
|
||||
let webhook_receiver = WebhookReceiver {
|
||||
name: "test-webhook".to_string(),
|
||||
url: Url::Url(url::Url::parse("https://webhook.i.dont.exist.com").unwrap()),
|
||||
};
|
||||
|
||||
let webhook_receiver_receiver =
|
||||
serde_yaml::to_string(&webhook_receiver.alert_channel_receiver().await).unwrap();
|
||||
println!("receiver \n{:#}", webhook_receiver_receiver);
|
||||
let webhook_receiver_receiver_yaml = r#"name: test-webhook
|
||||
webhook_configs:
|
||||
- url: https://webhook.i.dont.exist.com/
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
let webhook_receiver_route =
|
||||
serde_yaml::to_string(&webhook_receiver.alert_channel_route().await).unwrap();
|
||||
println!("route \n{:#}", webhook_receiver_route);
|
||||
let webhook_receiver_route_yaml = r#"receiver: test-webhook
|
||||
matchers:
|
||||
- alertname!=Watchdog
|
||||
continue: true
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
assert_eq!(webhook_receiver_receiver, webhook_receiver_receiver_yaml);
|
||||
assert_eq!(webhook_receiver_route, webhook_receiver_route_yaml);
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<Prometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
pub mod dell_server;
|
||||
pub mod opnsense;
|
||||
@@ -0,0 +1,15 @@
|
||||
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
|
||||
|
||||
pub fn high_http_error_rate() -> PrometheusAlertRule {
|
||||
let expression = r#"(
|
||||
sum(rate(http_requests_total{status=~"5.."}[5m])) by (job, route, service)
|
||||
/
|
||||
sum(rate(http_requests_total[5m])) by (job, route, service)
|
||||
) > 0.05 and sum(rate(http_requests_total[5m])) by (job, route, service) > 10"#;
|
||||
|
||||
PrometheusAlertRule::new("HighApplicationErrorRate", expression)
|
||||
.for_duration("10m")
|
||||
.label("severity", "warning")
|
||||
.annotation("summary", "High HTTP error rate on {{ $labels.job }}")
|
||||
.annotation("description", "Job {{ $labels.job }} (route {{ $labels.route }}) has an error rate > 5% over the last 10m.")
|
||||
}
|
||||
@@ -1 +1,2 @@
|
||||
pub mod alerts;
|
||||
pub mod prometheus_alert_rule;
|
||||
|
||||
@@ -1,79 +1,13 @@
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
modules::monitoring::{
|
||||
kube_prometheus::{
|
||||
prometheus::{KubePrometheus, KubePrometheusRule},
|
||||
types::{AlertGroup, AlertManagerAdditionalPromRules},
|
||||
},
|
||||
prometheus::prometheus::{Prometheus, PrometheusRule},
|
||||
},
|
||||
topology::oberservability::monitoring::AlertRule,
|
||||
interpret::InterpretError,
|
||||
modules::monitoring::{kube_prometheus::KubePrometheus, okd::OpenshiftClusterAlertSender},
|
||||
topology::monitoring::AlertRule,
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
impl AlertRule<KubePrometheus> for AlertManagerRuleGroup {
|
||||
async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_rule(self).await
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertRule<KubePrometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertRule<Prometheus> for AlertManagerRuleGroup {
|
||||
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_rule(self).await
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertRule<Prometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PrometheusRule for AlertManagerRuleGroup {
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules {
|
||||
let mut additional_prom_rules = BTreeMap::new();
|
||||
|
||||
additional_prom_rules.insert(
|
||||
self.name.clone(),
|
||||
AlertGroup {
|
||||
groups: vec![self.clone()],
|
||||
},
|
||||
);
|
||||
AlertManagerAdditionalPromRules {
|
||||
rules: additional_prom_rules,
|
||||
}
|
||||
}
|
||||
}
|
||||
#[async_trait]
|
||||
impl KubePrometheusRule for AlertManagerRuleGroup {
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules {
|
||||
let mut additional_prom_rules = BTreeMap::new();
|
||||
|
||||
additional_prom_rules.insert(
|
||||
self.name.clone(),
|
||||
AlertGroup {
|
||||
groups: vec![self.clone()],
|
||||
},
|
||||
);
|
||||
AlertManagerAdditionalPromRules {
|
||||
rules: additional_prom_rules,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AlertManagerRuleGroup {
|
||||
pub fn new(name: &str, rules: Vec<PrometheusAlertRule>) -> AlertManagerRuleGroup {
|
||||
AlertManagerRuleGroup {
|
||||
@@ -129,3 +63,55 @@ impl PrometheusAlertRule {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl AlertRule<OpenshiftClusterAlertSender> for AlertManagerRuleGroup {
|
||||
fn build_rule(&self) -> Result<serde_json::Value, InterpretError> {
|
||||
let name = self.name.clone();
|
||||
let mut rules: Vec<crate::modules::monitoring::okd::crd::alerting_rules::Rule> = vec![];
|
||||
for rule in self.rules.clone() {
|
||||
rules.push(rule.into())
|
||||
}
|
||||
|
||||
let rule_groups =
|
||||
vec![crate::modules::monitoring::okd::crd::alerting_rules::RuleGroup { name, rules }];
|
||||
|
||||
Ok(serde_json::to_value(rule_groups).map_err(|e| InterpretError::new(e.to_string()))?)
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertRule<OpenshiftClusterAlertSender>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl AlertRule<KubePrometheus> for AlertManagerRuleGroup {
|
||||
fn build_rule(&self) -> Result<serde_json::Value, InterpretError> {
|
||||
let name = self.name.clone();
|
||||
let mut rules: Vec<
|
||||
crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::Rule,
|
||||
> = vec![];
|
||||
for rule in self.rules.clone() {
|
||||
rules.push(rule.into())
|
||||
}
|
||||
|
||||
let rule_groups = vec![
|
||||
crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::RuleGroup {
|
||||
name,
|
||||
rules,
|
||||
},
|
||||
];
|
||||
|
||||
Ok(serde_json::to_value(rule_groups).map_err(|e| InterpretError::new(e.to_string()))?)
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertRule<KubePrometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,32 +5,26 @@ use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::{
|
||||
application::Application,
|
||||
monitoring::{
|
||||
grafana::grafana::Grafana, kube_prometheus::crd::crd_alertmanager_config::CRDPrometheus,
|
||||
},
|
||||
prometheus::prometheus::PrometheusMonitoring,
|
||||
},
|
||||
modules::{application::Application, monitoring::prometheus::Prometheus},
|
||||
score::Score,
|
||||
topology::{
|
||||
K8sclient, Topology,
|
||||
oberservability::monitoring::{AlertReceiver, AlertingInterpret, ScrapeTarget},
|
||||
monitoring::{AlertReceiver, AlertingInterpret, Observability, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ApplicationMonitoringScore {
|
||||
pub sender: CRDPrometheus,
|
||||
pub sender: Prometheus,
|
||||
pub application: Arc<dyn Application>,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<CRDPrometheus>>>,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + PrometheusMonitoring<CRDPrometheus> + K8sclient + Grafana> Score<T>
|
||||
for ApplicationMonitoringScore
|
||||
{
|
||||
impl<T: Topology + Observability<Prometheus> + K8sclient> Score<T> for ApplicationMonitoringScore {
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
debug!("creating alerting interpret");
|
||||
//TODO will need to use k8sclient to apply service monitors or find a way to pass
|
||||
//them to the AlertingInterpret potentially via Sender Prometheus
|
||||
Box::new(AlertingInterpret {
|
||||
sender: self.sender.clone(),
|
||||
receivers: self.receivers.clone(),
|
||||
|
||||
@@ -9,28 +9,27 @@ use crate::{
|
||||
inventory::Inventory,
|
||||
modules::{
|
||||
application::Application,
|
||||
monitoring::kube_prometheus::crd::{
|
||||
crd_alertmanager_config::CRDPrometheus, rhob_alertmanager_config::RHOBObservability,
|
||||
},
|
||||
prometheus::prometheus::PrometheusMonitoring,
|
||||
monitoring::red_hat_cluster_observability::RedHatClusterObservability,
|
||||
},
|
||||
score::Score,
|
||||
topology::{PreparationOutcome, Topology, oberservability::monitoring::AlertReceiver},
|
||||
topology::{
|
||||
Topology,
|
||||
monitoring::{AlertReceiver, AlertingInterpret, Observability},
|
||||
},
|
||||
};
|
||||
use harmony_types::id::Id;
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ApplicationRHOBMonitoringScore {
|
||||
pub sender: RHOBObservability,
|
||||
pub struct ApplicationRedHatClusterMonitoringScore {
|
||||
pub sender: RedHatClusterObservability,
|
||||
pub application: Arc<dyn Application>,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<RHOBObservability>>>,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<RedHatClusterObservability>>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + PrometheusMonitoring<RHOBObservability>> Score<T>
|
||||
for ApplicationRHOBMonitoringScore
|
||||
impl<T: Topology + Observability<RedHatClusterObservability>> Score<T>
|
||||
for ApplicationRedHatClusterMonitoringScore
|
||||
{
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(ApplicationRHOBMonitoringInterpret {
|
||||
Box::new(ApplicationRedHatClusterMonitoringInterpret {
|
||||
score: self.clone(),
|
||||
})
|
||||
}
|
||||
@@ -44,38 +43,28 @@ impl<T: Topology + PrometheusMonitoring<RHOBObservability>> Score<T>
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ApplicationRHOBMonitoringInterpret {
|
||||
score: ApplicationRHOBMonitoringScore,
|
||||
pub struct ApplicationRedHatClusterMonitoringInterpret {
|
||||
score: ApplicationRedHatClusterMonitoringScore,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + PrometheusMonitoring<RHOBObservability>> Interpret<T>
|
||||
for ApplicationRHOBMonitoringInterpret
|
||||
impl<T: Topology + Observability<RedHatClusterObservability>> Interpret<T>
|
||||
for ApplicationRedHatClusterMonitoringInterpret
|
||||
{
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let result = topology
|
||||
.install_prometheus(
|
||||
&self.score.sender,
|
||||
inventory,
|
||||
Some(self.score.receivers.clone()),
|
||||
)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(outcome) => match outcome {
|
||||
PreparationOutcome::Success { details: _ } => {
|
||||
Ok(Outcome::success("Prometheus installed".into()))
|
||||
}
|
||||
PreparationOutcome::Noop => {
|
||||
Ok(Outcome::noop("Prometheus installation skipped".into()))
|
||||
}
|
||||
},
|
||||
Err(err) => Err(InterpretError::from(err)),
|
||||
}
|
||||
//TODO will need to use k8sclient to apply crd ServiceMonitor or find a way to pass
|
||||
//them to the AlertingInterpret potentially via Sender RedHatClusterObservability
|
||||
let alerting_interpret = AlertingInterpret {
|
||||
sender: self.score.sender.clone(),
|
||||
receivers: self.score.receivers.clone(),
|
||||
rules: vec![],
|
||||
scrape_targets: None,
|
||||
};
|
||||
alerting_interpret.execute(inventory, topology).await
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
|
||||
@@ -1,17 +1,48 @@
|
||||
use async_trait::async_trait;
|
||||
use k8s_openapi::Resource;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
inventory::Inventory,
|
||||
topology::{PreparationError, PreparationOutcome},
|
||||
topology::{
|
||||
PreparationError, PreparationOutcome,
|
||||
monitoring::{AlertReceiver, AlertRule, AlertSender, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
pub trait Grafana {
|
||||
async fn ensure_grafana_operator(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
) -> Result<PreparationOutcome, PreparationError>;
|
||||
|
||||
async fn install_grafana(&self) -> Result<PreparationOutcome, PreparationError>;
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct Grafana {
|
||||
pub namespace: String,
|
||||
}
|
||||
|
||||
impl AlertSender for Grafana {
|
||||
fn name(&self) -> String {
|
||||
"grafana".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertReceiver<Grafana>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertRule<Grafana>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn ScrapeTarget<Grafana>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
modules::monitoring::grafana::grafana::Grafana,
|
||||
score::Score,
|
||||
topology::{
|
||||
HelmCommand, Topology,
|
||||
monitoring::{AlertReceiver, AlertRule, AlertingInterpret, Observability, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct GrafanaAlertingScore {
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<Grafana>>>,
|
||||
pub rules: Vec<Box<dyn AlertRule<Grafana>>>,
|
||||
pub scrape_targets: Option<Vec<Box<dyn ScrapeTarget<Grafana>>>>,
|
||||
pub sender: Grafana,
|
||||
}
|
||||
|
||||
impl<T: Topology + Observability<Grafana>> Score<T> for GrafanaAlertingScore {
|
||||
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
|
||||
Box::new(AlertingInterpret {
|
||||
sender: self.sender.clone(),
|
||||
receivers: self.receivers.clone(),
|
||||
rules: self.rules.clone(),
|
||||
scrape_targets: self.scrape_targets.clone(),
|
||||
})
|
||||
}
|
||||
fn name(&self) -> String {
|
||||
"HelmPrometheusAlertingScore".to_string()
|
||||
}
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
use harmony_macros::hurl;
|
||||
use non_blank_string_rs::NonBlankString;
|
||||
use std::{collections::HashMap, str::FromStr};
|
||||
|
||||
use crate::modules::helm::chart::{HelmChartScore, HelmRepository};
|
||||
|
||||
pub fn grafana_helm_chart_score(ns: &str, namespace_scope: bool) -> HelmChartScore {
|
||||
let mut values_overrides = HashMap::new();
|
||||
values_overrides.insert(
|
||||
NonBlankString::from_str("namespaceScope").unwrap(),
|
||||
namespace_scope.to_string(),
|
||||
);
|
||||
HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str(ns).unwrap()),
|
||||
release_name: NonBlankString::from_str("grafana-operator").unwrap(),
|
||||
chart_name: NonBlankString::from_str("grafana/grafana-operator").unwrap(),
|
||||
chart_version: None,
|
||||
values_overrides: Some(values_overrides),
|
||||
values_yaml: None,
|
||||
create_namespace: true,
|
||||
install_only: true,
|
||||
repository: Some(HelmRepository::new(
|
||||
"grafana".to_string(),
|
||||
hurl!("https://grafana.github.io/helm-charts"),
|
||||
true,
|
||||
)),
|
||||
}
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
pub mod helm_grafana;
|
||||
@@ -4,7 +4,7 @@ use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::crd_prometheuses::LabelSelector;
|
||||
use crate::modules::monitoring::kube_prometheus::crd::crd_prometheuses::LabelSelector;
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
3
harmony/src/modules/monitoring/grafana/k8s/crd/mod.rs
Normal file
3
harmony/src/modules/monitoring/grafana/k8s/crd/mod.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
pub mod crd_grafana;
|
||||
pub mod grafana_default_dashboard;
|
||||
pub mod rhob_grafana;
|
||||
@@ -4,7 +4,7 @@ use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_prometheuses::LabelSelector;
|
||||
use crate::modules::monitoring::red_hat_cluster_observability::crd::rhob_prometheuses::LabelSelector;
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
1
harmony/src/modules/monitoring/grafana/k8s/helm/mod.rs
Normal file
1
harmony/src/modules/monitoring/grafana/k8s/helm/mod.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod grafana_operator;
|
||||
7
harmony/src/modules/monitoring/grafana/k8s/mod.rs
Normal file
7
harmony/src/modules/monitoring/grafana/k8s/mod.rs
Normal file
@@ -0,0 +1,7 @@
|
||||
pub mod crd;
|
||||
pub mod helm;
|
||||
pub mod score_ensure_grafana_ready;
|
||||
pub mod score_grafana_alert_receiver;
|
||||
pub mod score_grafana_datasource;
|
||||
pub mod score_grafana_rule;
|
||||
pub mod score_install_grafana;
|
||||
@@ -0,0 +1,54 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::monitoring::grafana::grafana::Grafana,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct GrafanaK8sEnsureReadyScore {
|
||||
pub sender: Grafana,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for GrafanaK8sEnsureReadyScore {
|
||||
fn name(&self) -> String {
|
||||
"GrafanaK8sEnsureReadyScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
// async fn ensure_ready(
|
||||
// &self,
|
||||
// inventory: &Inventory,
|
||||
// ) -> Result<PreparationOutcome, PreparationError> {
|
||||
// debug!("ensure grafana operator");
|
||||
// let client = self.k8s_client().await.unwrap();
|
||||
// let grafana_gvk = GroupVersionKind {
|
||||
// group: "grafana.integreatly.org".to_string(),
|
||||
// version: "v1beta1".to_string(),
|
||||
// kind: "Grafana".to_string(),
|
||||
// };
|
||||
// let name = "grafanas.grafana.integreatly.org";
|
||||
// let ns = "grafana";
|
||||
//
|
||||
// let grafana_crd = client
|
||||
// .get_resource_json_value(name, Some(ns), &grafana_gvk)
|
||||
// .await;
|
||||
// match grafana_crd {
|
||||
// Ok(_) => {
|
||||
// return Ok(PreparationOutcome::Success {
|
||||
// details: "Found grafana CRDs in cluster".to_string(),
|
||||
// });
|
||||
// }
|
||||
//
|
||||
// Err(_) => {
|
||||
// return self
|
||||
// .install_grafana_operator(inventory, Some("grafana"))
|
||||
// .await;
|
||||
// }
|
||||
// };
|
||||
// }
|
||||
@@ -0,0 +1,24 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::monitoring::grafana::grafana::Grafana,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, monitoring::AlertReceiver},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct GrafanaK8sReceiverScore {
|
||||
pub sender: Grafana,
|
||||
pub receiver: Box<dyn AlertReceiver<Grafana>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for GrafanaK8sReceiverScore {
|
||||
fn name(&self) -> String {
|
||||
"GrafanaK8sReceiverScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::monitoring::grafana::grafana::Grafana,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, monitoring::ScrapeTarget},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct GrafanaK8sDatasourceScore {
|
||||
pub sender: Grafana,
|
||||
pub scrape_target: Box<dyn ScrapeTarget<Grafana>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for GrafanaK8sDatasourceScore {
|
||||
fn name(&self) -> String {
|
||||
"GrafanaK8sDatasourceScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
// fn extract_and_normalize_token(&self, secret: &DynamicObject) -> Option<String> {
|
||||
// let token_b64 = secret
|
||||
// .data
|
||||
// .get("token")
|
||||
// .or_else(|| secret.data.get("data").and_then(|d| d.get("token")))
|
||||
// .and_then(|v| v.as_str())?;
|
||||
//
|
||||
// let bytes = general_purpose::STANDARD.decode(token_b64).ok()?;
|
||||
//
|
||||
// let s = String::from_utf8(bytes).ok()?;
|
||||
//
|
||||
// let cleaned = s
|
||||
// .trim_matches(|c: char| c.is_whitespace() || c == '\0')
|
||||
// .to_string();
|
||||
// Some(cleaned)
|
||||
// }
|
||||
// fn build_grafana_datasource(
|
||||
// &self,
|
||||
// name: &str,
|
||||
// ns: &str,
|
||||
// label_selector: &LabelSelector,
|
||||
// url: &str,
|
||||
// token: &str,
|
||||
// ) -> GrafanaDatasource {
|
||||
// let mut json_data = BTreeMap::new();
|
||||
// json_data.insert("timeInterval".to_string(), "5s".to_string());
|
||||
//
|
||||
// GrafanaDatasource {
|
||||
// metadata: ObjectMeta {
|
||||
// name: Some(name.to_string()),
|
||||
// namespace: Some(ns.to_string()),
|
||||
// ..Default::default()
|
||||
// },
|
||||
// spec: GrafanaDatasourceSpec {
|
||||
// instance_selector: label_selector.clone(),
|
||||
// allow_cross_namespace_import: Some(true),
|
||||
// values_from: None,
|
||||
// datasource: GrafanaDatasourceConfig {
|
||||
// access: "proxy".to_string(),
|
||||
// name: name.to_string(),
|
||||
// rype: "prometheus".to_string(),
|
||||
// url: url.to_string(),
|
||||
// database: None,
|
||||
// json_data: Some(GrafanaDatasourceJsonData {
|
||||
// time_interval: Some("60s".to_string()),
|
||||
// http_header_name1: Some("Authorization".to_string()),
|
||||
// tls_skip_verify: Some(true),
|
||||
// oauth_pass_thru: Some(true),
|
||||
// }),
|
||||
// secure_json_data: Some(GrafanaDatasourceSecureJsonData {
|
||||
// http_header_value1: Some(format!("Bearer {token}")),
|
||||
// }),
|
||||
// is_default: Some(false),
|
||||
// editable: Some(true),
|
||||
// },
|
||||
// },
|
||||
// }
|
||||
// }
|
||||
@@ -0,0 +1,67 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::monitoring::grafana::grafana::Grafana,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, monitoring::AlertRule},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct GrafanaK8sRuleScore {
|
||||
pub sender: Grafana,
|
||||
pub rule: Box<dyn AlertRule<Grafana>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for GrafanaK8sRuleScore {
|
||||
fn name(&self) -> String {
|
||||
"GrafanaK8sRuleScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
// kind: Secret
|
||||
// apiVersion: v1
|
||||
// metadata:
|
||||
// name: credentials
|
||||
// namespace: grafana
|
||||
// stringData:
|
||||
// PROMETHEUS_USERNAME: root
|
||||
// PROMETHEUS_PASSWORD: secret
|
||||
// type: Opaque
|
||||
// ---
|
||||
// apiVersion: grafana.integreatly.org/v1beta1
|
||||
// kind: GrafanaDatasource
|
||||
// metadata:
|
||||
// name: grafanadatasource-sample
|
||||
// spec:
|
||||
// valuesFrom:
|
||||
// - targetPath: "basicAuthUser"
|
||||
// valueFrom:
|
||||
// secretKeyRef:
|
||||
// name: "credentials"
|
||||
// key: "PROMETHEUS_USERNAME"
|
||||
// - targetPath: "secureJsonData.basicAuthPassword"
|
||||
// valueFrom:
|
||||
// secretKeyRef:
|
||||
// name: "credentials"
|
||||
// key: "PROMETHEUS_PASSWORD"
|
||||
// instanceSelector:
|
||||
// matchLabels:
|
||||
// dashboards: "grafana"
|
||||
// datasource:
|
||||
// name: prometheus
|
||||
// type: prometheus
|
||||
// access: proxy
|
||||
// basicAuth: true
|
||||
// url: http://prometheus-service:9090
|
||||
// isDefault: true
|
||||
// basicAuthUser: ${PROMETHEUS_USERNAME}
|
||||
// jsonData:
|
||||
// "tlsSkipVerify": true
|
||||
// "timeInterval": "5s"
|
||||
// secureJsonData:
|
||||
// "basicAuthPassword": ${PROMETHEUS_PASSWORD} #
|
||||
@@ -0,0 +1,189 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::monitoring::grafana::grafana::Grafana,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct GrafanaK8sInstallScore {
|
||||
pub sender: Grafana,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for GrafanaK8sInstallScore {
|
||||
fn name(&self) -> String {
|
||||
"GrafanaK8sEnsureReadyScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
// let score = grafana_operator_helm_chart_score(sender.namespace.clone());
|
||||
//
|
||||
// score
|
||||
// .create_interpret()
|
||||
// .execute(inventory, self)
|
||||
// .await
|
||||
// .map_err(|e| PreparationError::new(e.to_string()))?;
|
||||
//
|
||||
|
||||
//
|
||||
// fn build_grafana_dashboard(
|
||||
// &self,
|
||||
// ns: &str,
|
||||
// label_selector: &LabelSelector,
|
||||
// ) -> GrafanaDashboard {
|
||||
// let graf_dashboard = GrafanaDashboard {
|
||||
// metadata: ObjectMeta {
|
||||
// name: Some(format!("grafana-dashboard-{}", ns)),
|
||||
// namespace: Some(ns.to_string()),
|
||||
// ..Default::default()
|
||||
// },
|
||||
// spec: GrafanaDashboardSpec {
|
||||
// resync_period: Some("30s".to_string()),
|
||||
// instance_selector: label_selector.clone(),
|
||||
// datasources: Some(vec![GrafanaDashboardDatasource {
|
||||
// input_name: "DS_PROMETHEUS".to_string(),
|
||||
// datasource_name: "thanos-openshift-monitoring".to_string(),
|
||||
// }]),
|
||||
// json: None,
|
||||
// grafana_com: Some(GrafanaCom {
|
||||
// id: 17406,
|
||||
// revision: None,
|
||||
// }),
|
||||
// },
|
||||
// };
|
||||
// graf_dashboard
|
||||
// }
|
||||
//
|
||||
// fn build_grafana(&self, ns: &str, labels: &BTreeMap<String, String>) -> GrafanaCRD {
|
||||
// let grafana = GrafanaCRD {
|
||||
// metadata: ObjectMeta {
|
||||
// name: Some(format!("grafana-{}", ns)),
|
||||
// namespace: Some(ns.to_string()),
|
||||
// labels: Some(labels.clone()),
|
||||
// ..Default::default()
|
||||
// },
|
||||
// spec: GrafanaSpec {
|
||||
// config: None,
|
||||
// admin_user: None,
|
||||
// admin_password: None,
|
||||
// ingress: None,
|
||||
// persistence: None,
|
||||
// resources: None,
|
||||
// },
|
||||
// };
|
||||
// grafana
|
||||
// }
|
||||
//
|
||||
// async fn build_grafana_ingress(&self, ns: &str) -> K8sIngressScore {
|
||||
// let domain = self.get_domain(&format!("grafana-{}", ns)).await.unwrap();
|
||||
// let name = format!("{}-grafana", ns);
|
||||
// let backend_service = format!("grafana-{}-service", ns);
|
||||
//
|
||||
// K8sIngressScore {
|
||||
// name: fqdn::fqdn!(&name),
|
||||
// host: fqdn::fqdn!(&domain),
|
||||
// backend_service: fqdn::fqdn!(&backend_service),
|
||||
// port: 3000,
|
||||
// path: Some("/".to_string()),
|
||||
// path_type: Some(PathType::Prefix),
|
||||
// namespace: Some(fqdn::fqdn!(&ns)),
|
||||
// ingress_class_name: Some("openshift-default".to_string()),
|
||||
// }
|
||||
// }
|
||||
// #[async_trait]
|
||||
// impl Grafana for K8sAnywhereTopology {
|
||||
// async fn install_grafana(&self) -> Result<PreparationOutcome, PreparationError> {
|
||||
// let ns = "grafana";
|
||||
//
|
||||
// let mut label = BTreeMap::new();
|
||||
//
|
||||
// label.insert("dashboards".to_string(), "grafana".to_string());
|
||||
//
|
||||
// let label_selector = LabelSelector {
|
||||
// match_labels: label.clone(),
|
||||
// match_expressions: vec![],
|
||||
// };
|
||||
//
|
||||
// let client = self.k8s_client().await?;
|
||||
//
|
||||
// let grafana = self.build_grafana(ns, &label);
|
||||
//
|
||||
// client.apply(&grafana, Some(ns)).await?;
|
||||
// //TODO change this to a ensure ready or something better than just a timeout
|
||||
// client
|
||||
// .wait_until_deployment_ready(
|
||||
// "grafana-grafana-deployment",
|
||||
// Some("grafana"),
|
||||
// Some(Duration::from_secs(30)),
|
||||
// )
|
||||
// .await?;
|
||||
//
|
||||
// let sa_name = "grafana-grafana-sa";
|
||||
// let token_secret_name = "grafana-sa-token-secret";
|
||||
//
|
||||
// let sa_token_secret = self.build_sa_token_secret(token_secret_name, sa_name, ns);
|
||||
//
|
||||
// client.apply(&sa_token_secret, Some(ns)).await?;
|
||||
// let secret_gvk = GroupVersionKind {
|
||||
// group: "".to_string(),
|
||||
// version: "v1".to_string(),
|
||||
// kind: "Secret".to_string(),
|
||||
// };
|
||||
//
|
||||
// let secret = client
|
||||
// .get_resource_json_value(token_secret_name, Some(ns), &secret_gvk)
|
||||
// .await?;
|
||||
//
|
||||
// let token = format!(
|
||||
// "Bearer {}",
|
||||
// self.extract_and_normalize_token(&secret).unwrap()
|
||||
// );
|
||||
//
|
||||
// debug!("creating grafana clusterrole binding");
|
||||
//
|
||||
// let clusterrolebinding =
|
||||
// self.build_cluster_rolebinding(sa_name, "cluster-monitoring-view", ns);
|
||||
//
|
||||
// client.apply(&clusterrolebinding, Some(ns)).await?;
|
||||
//
|
||||
// debug!("creating grafana datasource crd");
|
||||
//
|
||||
// let thanos_url = format!(
|
||||
// "https://{}",
|
||||
// self.get_domain("thanos-querier-openshift-monitoring")
|
||||
// .await
|
||||
// .unwrap()
|
||||
// );
|
||||
//
|
||||
// let thanos_openshift_datasource = self.build_grafana_datasource(
|
||||
// "thanos-openshift-monitoring",
|
||||
// ns,
|
||||
// &label_selector,
|
||||
// &thanos_url,
|
||||
// &token,
|
||||
// );
|
||||
//
|
||||
// client.apply(&thanos_openshift_datasource, Some(ns)).await?;
|
||||
//
|
||||
// debug!("creating grafana dashboard crd");
|
||||
// let dashboard = self.build_grafana_dashboard(ns, &label_selector);
|
||||
//
|
||||
// client.apply(&dashboard, Some(ns)).await?;
|
||||
// debug!("creating grafana ingress");
|
||||
// let grafana_ingress = self.build_grafana_ingress(ns).await;
|
||||
//
|
||||
// grafana_ingress
|
||||
// .interpret(&Inventory::empty(), self)
|
||||
// .await
|
||||
// .map_err(|e| PreparationError::new(e.to_string()))?;
|
||||
//
|
||||
// Ok(PreparationOutcome::Success {
|
||||
// details: "Installed grafana composants".to_string(),
|
||||
// })
|
||||
// }
|
||||
// }
|
||||
@@ -1,2 +1,3 @@
|
||||
pub mod grafana;
|
||||
pub mod helm;
|
||||
pub mod grafana_alerting_score;
|
||||
pub mod k8s;
|
||||
|
||||
@@ -1,91 +1,17 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::{
|
||||
monitoring::{
|
||||
grafana::grafana::Grafana, kube_prometheus::crd::service_monitor::ServiceMonitor,
|
||||
},
|
||||
prometheus::prometheus::PrometheusMonitoring,
|
||||
},
|
||||
topology::{
|
||||
K8sclient, Topology,
|
||||
installable::Installable,
|
||||
k8s::K8sClient,
|
||||
oberservability::monitoring::{AlertReceiver, AlertSender, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[derive(CustomResource, Serialize, Deserialize, Default, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "monitoring.coreos.com",
|
||||
version = "v1alpha1",
|
||||
version = "v1",
|
||||
kind = "AlertmanagerConfig",
|
||||
plural = "alertmanagerconfigs",
|
||||
namespaced
|
||||
namespaced,
|
||||
derive = "Default"
|
||||
)]
|
||||
pub struct AlertmanagerConfigSpec {
|
||||
#[serde(flatten)]
|
||||
pub data: serde_json::Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct CRDPrometheus {
|
||||
pub namespace: String,
|
||||
pub client: Arc<K8sClient>,
|
||||
pub service_monitor: Vec<ServiceMonitor>,
|
||||
}
|
||||
|
||||
impl AlertSender for CRDPrometheus {
|
||||
fn name(&self) -> String {
|
||||
"CRDAlertManager".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Box<dyn AlertReceiver<CRDPrometheus>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Box<dyn ScrapeTarget<CRDPrometheus>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertReceiver<CRDPrometheus>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient + PrometheusMonitoring<CRDPrometheus> + Grafana> Installable<T>
|
||||
for CRDPrometheus
|
||||
{
|
||||
async fn configure(&self, inventory: &Inventory, topology: &T) -> Result<(), InterpretError> {
|
||||
topology.ensure_grafana_operator(inventory).await?;
|
||||
topology.ensure_prometheus_operator(self, inventory).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn ensure_installed(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<(), InterpretError> {
|
||||
topology.install_grafana().await?;
|
||||
topology.install_prometheus(&self, inventory, None).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::modules::prometheus::alerts::k8s::{
|
||||
use crate::modules::monitoring::alert_rule::alerts::k8s::{
|
||||
deployment::alert_deployment_unavailable,
|
||||
pod::{alert_container_restarting, alert_pod_not_ready, pod_failed},
|
||||
pvc::high_pvc_fill_rate_over_two_days,
|
||||
|
||||
@@ -6,13 +6,14 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
|
||||
|
||||
#[derive(CustomResource, Debug, Serialize, Deserialize, Clone, JsonSchema)]
|
||||
#[derive(CustomResource, Default, Debug, Serialize, Deserialize, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "monitoring.coreos.com",
|
||||
version = "v1",
|
||||
kind = "PrometheusRule",
|
||||
plural = "prometheusrules",
|
||||
namespaced
|
||||
namespaced,
|
||||
derive = "Default"
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PrometheusRuleSpec {
|
||||
|
||||
@@ -1,23 +1,18 @@
|
||||
use std::net::IpAddr;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
modules::monitoring::kube_prometheus::crd::{
|
||||
crd_alertmanager_config::CRDPrometheus, crd_prometheuses::LabelSelector,
|
||||
},
|
||||
topology::oberservability::monitoring::ScrapeTarget,
|
||||
};
|
||||
use crate::modules::monitoring::kube_prometheus::crd::crd_prometheuses::LabelSelector;
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[derive(CustomResource, Default, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "monitoring.coreos.com",
|
||||
version = "v1alpha1",
|
||||
kind = "ScrapeConfig",
|
||||
plural = "scrapeconfigs",
|
||||
derive = "Default",
|
||||
namespaced
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
@@ -70,8 +65,8 @@ pub struct ScrapeConfigSpec {
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct StaticConfig {
|
||||
pub targets: Vec<String>,
|
||||
|
||||
pub labels: Option<LabelSelector>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub labels: Option<BTreeMap<String, String>>,
|
||||
}
|
||||
|
||||
/// Relabeling configuration for target or metric relabeling.
|
||||
|
||||
@@ -1,22 +1,9 @@
|
||||
pub mod crd_alertmanager_config;
|
||||
pub mod crd_alertmanagers;
|
||||
pub mod crd_default_rules;
|
||||
pub mod crd_grafana;
|
||||
pub mod crd_prometheus_rules;
|
||||
pub mod crd_prometheuses;
|
||||
pub mod crd_scrape_config;
|
||||
pub mod grafana_default_dashboard;
|
||||
pub mod grafana_operator;
|
||||
pub mod prometheus_operator;
|
||||
pub mod rhob_alertmanager_config;
|
||||
pub mod rhob_alertmanagers;
|
||||
pub mod rhob_cluster_observability_operator;
|
||||
pub mod rhob_default_rules;
|
||||
pub mod rhob_grafana;
|
||||
pub mod rhob_monitoring_stack;
|
||||
pub mod rhob_prometheus_rules;
|
||||
pub mod rhob_prometheuses;
|
||||
pub mod rhob_role;
|
||||
pub mod rhob_service_monitor;
|
||||
pub mod role;
|
||||
pub mod service_monitor;
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::topology::{
|
||||
k8s::K8sClient,
|
||||
oberservability::monitoring::{AlertReceiver, AlertSender},
|
||||
};
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "monitoring.rhobs",
|
||||
version = "v1alpha1",
|
||||
kind = "AlertmanagerConfig",
|
||||
plural = "alertmanagerconfigs",
|
||||
namespaced
|
||||
)]
|
||||
pub struct AlertmanagerConfigSpec {
|
||||
#[serde(flatten)]
|
||||
pub data: serde_json::Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct RHOBObservability {
|
||||
pub namespace: String,
|
||||
pub client: Arc<K8sClient>,
|
||||
}
|
||||
|
||||
impl AlertSender for RHOBObservability {
|
||||
fn name(&self) -> String {
|
||||
"RHOBAlertManager".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Box<dyn AlertReceiver<RHOBObservability>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertReceiver<RHOBObservability>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use non_blank_string_rs::NonBlankString;
|
||||
|
||||
use crate::modules::helm::chart::HelmChartScore;
|
||||
//TODO package chart or something for COO okd
|
||||
pub fn rhob_cluster_observability_operator() -> HelmChartScore {
|
||||
HelmChartScore {
|
||||
namespace: None,
|
||||
release_name: NonBlankString::from_str("").unwrap(),
|
||||
chart_name: NonBlankString::from_str(
|
||||
"oci://hub.nationtech.io/harmony/nt-prometheus-operator",
|
||||
)
|
||||
.unwrap(),
|
||||
chart_version: None,
|
||||
values_overrides: None,
|
||||
values_yaml: None,
|
||||
create_namespace: true,
|
||||
install_only: true,
|
||||
repository: None,
|
||||
}
|
||||
}
|
||||
@@ -1,20 +1,13 @@
|
||||
use super::config::KubePrometheusConfig;
|
||||
use log::debug;
|
||||
use non_blank_string_rs::NonBlankString;
|
||||
use serde_yaml::{Mapping, Value};
|
||||
use std::{
|
||||
collections::BTreeMap,
|
||||
str::FromStr,
|
||||
sync::{Arc, Mutex},
|
||||
};
|
||||
|
||||
use crate::modules::{
|
||||
helm::chart::HelmChartScore,
|
||||
monitoring::kube_prometheus::types::{
|
||||
AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerConfig,
|
||||
AlertManagerConfigSelector, AlertManagerRoute, AlertManagerSpec, AlertManagerValues,
|
||||
ConfigReloader, Limits, PrometheusConfig, Requests, Resources,
|
||||
},
|
||||
monitoring::kube_prometheus::types::{Limits, Requests, Resources},
|
||||
};
|
||||
|
||||
pub fn kube_prometheus_helm_chart_score(
|
||||
@@ -66,7 +59,7 @@ pub fn kube_prometheus_helm_chart_score(
|
||||
}
|
||||
let _resource_section = resource_block(&resource_limit, 2);
|
||||
|
||||
let mut values = format!(
|
||||
let values = format!(
|
||||
r#"
|
||||
global:
|
||||
rbac:
|
||||
@@ -281,131 +274,6 @@ prometheusOperator:
|
||||
"#,
|
||||
);
|
||||
|
||||
let prometheus_config =
|
||||
crate::modules::monitoring::kube_prometheus::types::PrometheusConfigValues {
|
||||
prometheus: PrometheusConfig {
|
||||
prometheus: bool::from_str(prometheus.as_str()).expect("couldn't parse bool"),
|
||||
additional_service_monitors: config.additional_service_monitors.clone(),
|
||||
},
|
||||
};
|
||||
let prometheus_config_yaml =
|
||||
serde_yaml::to_string(&prometheus_config).expect("Failed to serialize YAML");
|
||||
|
||||
debug!(
|
||||
"serialized prometheus config: \n {:#}",
|
||||
prometheus_config_yaml
|
||||
);
|
||||
values.push_str(&prometheus_config_yaml);
|
||||
|
||||
// add required null receiver for prometheus alert manager
|
||||
let mut null_receiver = Mapping::new();
|
||||
null_receiver.insert(
|
||||
Value::String("receiver".to_string()),
|
||||
Value::String("null".to_string()),
|
||||
);
|
||||
null_receiver.insert(
|
||||
Value::String("matchers".to_string()),
|
||||
Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]),
|
||||
);
|
||||
null_receiver.insert(Value::String("continue".to_string()), Value::Bool(true));
|
||||
|
||||
//add alert channels
|
||||
let mut alert_manager_channel_config = AlertManagerConfig {
|
||||
global: Mapping::new(),
|
||||
route: AlertManagerRoute {
|
||||
routes: vec![Value::Mapping(null_receiver)],
|
||||
},
|
||||
receivers: vec![serde_yaml::from_str("name: 'null'").unwrap()],
|
||||
};
|
||||
for receiver in config.alert_receiver_configs.iter() {
|
||||
if let Some(global) = receiver.channel_global_config.clone() {
|
||||
alert_manager_channel_config
|
||||
.global
|
||||
.insert(global.0, global.1);
|
||||
}
|
||||
alert_manager_channel_config
|
||||
.route
|
||||
.routes
|
||||
.push(receiver.channel_route.clone());
|
||||
alert_manager_channel_config
|
||||
.receivers
|
||||
.push(receiver.channel_receiver.clone());
|
||||
}
|
||||
|
||||
let mut labels = BTreeMap::new();
|
||||
labels.insert("alertmanagerConfig".to_string(), "enabled".to_string());
|
||||
let alert_manager_config_selector = AlertManagerConfigSelector {
|
||||
match_labels: labels,
|
||||
};
|
||||
let alert_manager_values = AlertManagerValues {
|
||||
alertmanager: AlertManager {
|
||||
enabled: config.alert_manager,
|
||||
config: alert_manager_channel_config,
|
||||
alertmanager_spec: AlertManagerSpec {
|
||||
resources: Resources {
|
||||
limits: Limits {
|
||||
memory: "100Mi".to_string(),
|
||||
cpu: "100m".to_string(),
|
||||
},
|
||||
requests: Requests {
|
||||
memory: "100Mi".to_string(),
|
||||
cpu: "100m".to_string(),
|
||||
},
|
||||
},
|
||||
alert_manager_config_selector,
|
||||
replicas: 2,
|
||||
},
|
||||
init_config_reloader: ConfigReloader {
|
||||
resources: Resources {
|
||||
limits: Limits {
|
||||
memory: "100Mi".to_string(),
|
||||
cpu: "100m".to_string(),
|
||||
},
|
||||
requests: Requests {
|
||||
memory: "100Mi".to_string(),
|
||||
cpu: "100m".to_string(),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
let alert_manager_yaml =
|
||||
serde_yaml::to_string(&alert_manager_values).expect("Failed to serialize YAML");
|
||||
debug!("serialized alert manager: \n {:#}", alert_manager_yaml);
|
||||
values.push_str(&alert_manager_yaml);
|
||||
|
||||
//format alert manager additional rules for helm chart
|
||||
let mut merged_rules: BTreeMap<String, AlertGroup> = BTreeMap::new();
|
||||
|
||||
for additional_rule in config.alert_rules.clone() {
|
||||
for (key, group) in additional_rule.rules {
|
||||
merged_rules.insert(key, group);
|
||||
}
|
||||
}
|
||||
|
||||
let merged_rules = AlertManagerAdditionalPromRules {
|
||||
rules: merged_rules,
|
||||
};
|
||||
|
||||
let mut alert_manager_additional_rules = serde_yaml::Mapping::new();
|
||||
let rules_value = serde_yaml::to_value(merged_rules).unwrap();
|
||||
|
||||
alert_manager_additional_rules.insert(
|
||||
serde_yaml::Value::String("additionalPrometheusRulesMap".to_string()),
|
||||
rules_value,
|
||||
);
|
||||
|
||||
let alert_manager_additional_rules_yaml =
|
||||
serde_yaml::to_string(&alert_manager_additional_rules).expect("Failed to serialize YAML");
|
||||
debug!(
|
||||
"alert_rules_yaml:\n{:#}",
|
||||
alert_manager_additional_rules_yaml
|
||||
);
|
||||
|
||||
values.push_str(&alert_manager_additional_rules_yaml);
|
||||
debug!("full values.yaml: \n {:#}", values);
|
||||
|
||||
HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str(&config.namespace.clone().unwrap()).unwrap()),
|
||||
release_name: NonBlankString::from_str("kube-prometheus").unwrap(),
|
||||
|
||||
@@ -2,36 +2,41 @@ use std::sync::{Arc, Mutex};
|
||||
|
||||
use serde::Serialize;
|
||||
|
||||
use super::{helm::config::KubePrometheusConfig, prometheus::KubePrometheus};
|
||||
use super::helm::config::KubePrometheusConfig;
|
||||
use crate::{
|
||||
modules::monitoring::kube_prometheus::types::ServiceMonitor,
|
||||
modules::monitoring::kube_prometheus::{KubePrometheus, types::ServiceMonitor},
|
||||
score::Score,
|
||||
topology::{
|
||||
HelmCommand, Topology,
|
||||
oberservability::monitoring::{AlertReceiver, AlertRule, AlertingInterpret},
|
||||
tenant::TenantManager,
|
||||
Topology,
|
||||
monitoring::{AlertReceiver, AlertRule, AlertingInterpret, Observability, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
//TODO untested
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct HelmPrometheusAlertingScore {
|
||||
pub struct KubePrometheusAlertingScore {
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<KubePrometheus>>>,
|
||||
pub rules: Vec<Box<dyn AlertRule<KubePrometheus>>>,
|
||||
pub scrape_targets: Option<Vec<Box<dyn ScrapeTarget<KubePrometheus>>>>,
|
||||
pub service_monitors: Vec<ServiceMonitor>,
|
||||
pub config: Arc<Mutex<KubePrometheusConfig>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + HelmCommand + TenantManager> Score<T> for HelmPrometheusAlertingScore {
|
||||
impl<T: Topology + Observability<KubePrometheus>> Score<T> for KubePrometheusAlertingScore {
|
||||
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
|
||||
let config = Arc::new(Mutex::new(KubePrometheusConfig::new()));
|
||||
config
|
||||
//TODO test that additional service monitor is added
|
||||
self.config
|
||||
.try_lock()
|
||||
.expect("couldn't lock config")
|
||||
.additional_service_monitors = self.service_monitors.clone();
|
||||
|
||||
Box::new(AlertingInterpret {
|
||||
sender: KubePrometheus { config },
|
||||
sender: KubePrometheus {
|
||||
config: self.config.clone(),
|
||||
},
|
||||
receivers: self.receivers.clone(),
|
||||
rules: self.rules.clone(),
|
||||
scrape_targets: None,
|
||||
scrape_targets: self.scrape_targets.clone(),
|
||||
})
|
||||
}
|
||||
fn name(&self) -> String {
|
||||
@@ -1,5 +1,71 @@
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
modules::monitoring::kube_prometheus::helm::config::KubePrometheusConfig,
|
||||
topology::monitoring::{AlertReceiver, AlertRule, AlertSender, ScrapeTarget},
|
||||
};
|
||||
|
||||
pub mod crd;
|
||||
pub mod helm;
|
||||
pub mod helm_prometheus_alert_score;
|
||||
pub mod prometheus;
|
||||
pub mod kube_prometheus_alerting_score;
|
||||
pub mod score_kube_prometheus_alert_receivers;
|
||||
pub mod score_kube_prometheus_ensure_ready;
|
||||
pub mod score_kube_prometheus_rule;
|
||||
pub mod score_kube_prometheus_scrape_target;
|
||||
pub mod types;
|
||||
|
||||
impl Serialize for Box<dyn ScrapeTarget<KubePrometheus>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertSender for KubePrometheus {
|
||||
fn name(&self) -> String {
|
||||
"HelmKubePrometheus".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct KubePrometheus {
|
||||
pub config: Arc<Mutex<KubePrometheusConfig>>,
|
||||
}
|
||||
|
||||
impl Default for KubePrometheus {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl KubePrometheus {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
config: Arc::new(Mutex::new(KubePrometheusConfig::new())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertReceiver<KubePrometheus>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertRule<KubePrometheus>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,167 +0,0 @@
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use log::{debug, error};
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::monitoring::alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
|
||||
score,
|
||||
topology::{
|
||||
HelmCommand, Topology,
|
||||
installable::Installable,
|
||||
oberservability::monitoring::{AlertReceiver, AlertRule, AlertSender},
|
||||
tenant::TenantManager,
|
||||
},
|
||||
};
|
||||
|
||||
use score::Score;
|
||||
|
||||
use super::{
|
||||
helm::{
|
||||
config::KubePrometheusConfig, kube_prometheus_helm_chart::kube_prometheus_helm_chart_score,
|
||||
},
|
||||
types::{AlertManagerAdditionalPromRules, AlertManagerChannelConfig},
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
impl AlertSender for KubePrometheus {
|
||||
fn name(&self) -> String {
|
||||
"HelmKubePrometheus".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + HelmCommand + TenantManager> Installable<T> for KubePrometheus {
|
||||
async fn configure(&self, _inventory: &Inventory, topology: &T) -> Result<(), InterpretError> {
|
||||
self.configure_with_topology(topology).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn ensure_installed(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<(), InterpretError> {
|
||||
self.install_prometheus(inventory, topology).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct KubePrometheus {
|
||||
pub config: Arc<Mutex<KubePrometheusConfig>>,
|
||||
}
|
||||
|
||||
impl Default for KubePrometheus {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl KubePrometheus {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
config: Arc::new(Mutex::new(KubePrometheusConfig::new())),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn configure_with_topology<T: TenantManager>(&self, topology: &T) {
|
||||
let ns = topology
|
||||
.get_tenant_config()
|
||||
.await
|
||||
.map(|cfg| cfg.name.clone())
|
||||
.unwrap_or_else(|| "monitoring".to_string());
|
||||
error!("This must be refactored, see comments in pr #74");
|
||||
debug!("NS: {}", ns);
|
||||
self.config.lock().unwrap().namespace = Some(ns);
|
||||
}
|
||||
|
||||
pub async fn install_receiver(
|
||||
&self,
|
||||
prometheus_receiver: &dyn KubePrometheusReceiver,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let prom_receiver = prometheus_receiver.configure_receiver().await;
|
||||
debug!(
|
||||
"adding alert receiver to prometheus config: {:#?}",
|
||||
&prom_receiver
|
||||
);
|
||||
let mut config = self.config.lock().unwrap();
|
||||
|
||||
config.alert_receiver_configs.push(prom_receiver);
|
||||
let prom_receiver_name = prometheus_receiver.name();
|
||||
debug!("installed alert receiver {}", &prom_receiver_name);
|
||||
Ok(Outcome::success(format!(
|
||||
"Sucessfully installed receiver {}",
|
||||
prom_receiver_name
|
||||
)))
|
||||
}
|
||||
|
||||
pub async fn install_rule(
|
||||
&self,
|
||||
prometheus_rule: &AlertManagerRuleGroup,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let prometheus_rule = prometheus_rule.configure_rule().await;
|
||||
let mut config = self.config.lock().unwrap();
|
||||
|
||||
config.alert_rules.push(prometheus_rule.clone());
|
||||
Ok(Outcome::success(format!(
|
||||
"Successfully installed alert rule: {:#?},",
|
||||
prometheus_rule
|
||||
)))
|
||||
}
|
||||
|
||||
pub async fn install_prometheus<T: Topology + HelmCommand + Send + Sync>(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
kube_prometheus_helm_chart_score(self.config.clone())
|
||||
.interpret(inventory, topology)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait KubePrometheusReceiver: Send + Sync + std::fmt::Debug {
|
||||
fn name(&self) -> String;
|
||||
async fn configure_receiver(&self) -> AlertManagerChannelConfig;
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertReceiver<KubePrometheus>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Box<dyn AlertReceiver<KubePrometheus>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait KubePrometheusRule: Send + Sync + std::fmt::Debug {
|
||||
fn name(&self) -> String;
|
||||
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules;
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertRule<KubePrometheus>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Box<dyn AlertRule<KubePrometheus>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
use kube::api::ObjectMeta;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::{
|
||||
k8s::resource::K8sResourceScore,
|
||||
monitoring::kube_prometheus::{
|
||||
KubePrometheus,
|
||||
crd::crd_alertmanager_config::{AlertmanagerConfig, AlertmanagerConfigSpec},
|
||||
},
|
||||
},
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, monitoring::AlertReceiver},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct KubePrometheusReceiverScore {
|
||||
pub sender: KubePrometheus,
|
||||
pub receiver: Box<dyn AlertReceiver<KubePrometheus>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for KubePrometheusReceiverScore {
|
||||
fn name(&self) -> String {
|
||||
"KubePrometheusReceiverScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
let name = self.receiver.name();
|
||||
let namespace = self.sender.config.lock().unwrap().namespace.clone();
|
||||
let route = self.receiver.build_route().expect(&format!(
|
||||
"failed to build route for receveiver {}",
|
||||
name.clone()
|
||||
));
|
||||
let receiver = self.receiver.build_receiver().expect(&format!(
|
||||
"failed to build receiver path for receiver {}",
|
||||
name.clone()
|
||||
));
|
||||
|
||||
let data = serde_json::json!({
|
||||
"route": route,
|
||||
"receivers": [receiver]
|
||||
});
|
||||
|
||||
let alertmanager_config = AlertmanagerConfig {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(name),
|
||||
namespace: namespace.clone(),
|
||||
..Default::default()
|
||||
},
|
||||
spec: AlertmanagerConfigSpec { data: data },
|
||||
};
|
||||
|
||||
K8sResourceScore::single(alertmanager_config, namespace).create_interpret()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
use async_trait::async_trait;
|
||||
use harmony_types::id::Id;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::monitoring::kube_prometheus::KubePrometheus,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology},
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct KubePrometheusEnsureReadyScore {
|
||||
pub sender: KubePrometheus,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for KubePrometheusEnsureReadyScore {
|
||||
fn name(&self) -> String {
|
||||
"KubePrometheusEnsureReadyScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(KubePrometheusEnsureReadyInterpret {
|
||||
sender: self.sender.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct KubePrometheusEnsureReadyInterpret {
|
||||
pub sender: KubePrometheus,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient> Interpret<T> for KubePrometheusEnsureReadyInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let client = topology.k8s_client().await?;
|
||||
let namespace = self
|
||||
.sender
|
||||
.config
|
||||
.lock()
|
||||
.unwrap()
|
||||
.namespace
|
||||
.clone()
|
||||
.unwrap_or("default".to_string());
|
||||
|
||||
let prometheus_name = "kube-prometheues-kube-prometheus-operator";
|
||||
|
||||
client
|
||||
.wait_until_deployment_ready(prometheus_name, Some(&namespace), None)
|
||||
.await?;
|
||||
|
||||
Ok(Outcome::success(format!(
|
||||
"deployment: {} ready in ns: {}",
|
||||
prometheus_name, namespace
|
||||
)))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
use kube::api::ObjectMeta;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::{
|
||||
k8s::resource::K8sResourceScore,
|
||||
monitoring::kube_prometheus::{
|
||||
KubePrometheus,
|
||||
crd::crd_prometheus_rules::{PrometheusRule, PrometheusRuleSpec, RuleGroup},
|
||||
},
|
||||
},
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, monitoring::AlertRule},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct KubePrometheusRuleScore {
|
||||
pub sender: KubePrometheus,
|
||||
pub rule: Box<dyn AlertRule<KubePrometheus>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for KubePrometheusRuleScore {
|
||||
fn name(&self) -> String {
|
||||
"KubePrometheusRuleScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
let name = self.rule.name();
|
||||
let namespace = self.sender.config.lock().unwrap().namespace.clone();
|
||||
let groups: Vec<RuleGroup> =
|
||||
serde_json::from_value(self.rule.build_rule().expect("failed to build alert rule"))
|
||||
.expect("failed to serialize rule group");
|
||||
|
||||
let prometheus_rule = PrometheusRule {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(name.clone()),
|
||||
namespace: namespace.clone(),
|
||||
..Default::default()
|
||||
},
|
||||
|
||||
spec: PrometheusRuleSpec { groups },
|
||||
};
|
||||
K8sResourceScore::single(prometheus_rule, namespace).create_interpret()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
use kube::api::ObjectMeta;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::{
|
||||
k8s::resource::K8sResourceScore,
|
||||
monitoring::kube_prometheus::{
|
||||
KubePrometheus,
|
||||
crd::crd_scrape_config::{ScrapeConfig, ScrapeConfigSpec, StaticConfig},
|
||||
},
|
||||
},
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, monitoring::ScrapeTarget},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct KubePrometheusScrapeTargetScore {
|
||||
pub sender: KubePrometheus,
|
||||
pub scrape_target: Box<dyn ScrapeTarget<KubePrometheus>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for KubePrometheusScrapeTargetScore {
|
||||
fn name(&self) -> String {
|
||||
"KubePrometheusScrapeTargetScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
let name = self.scrape_target.name();
|
||||
let namespace = self.sender.config.lock().unwrap().namespace.clone();
|
||||
|
||||
let external_target = self
|
||||
.scrape_target
|
||||
.build_scrape_target()
|
||||
.expect("failed to build external scrape target");
|
||||
|
||||
//TODO this may need to modified to include a scrapeConfigSelector label from the
|
||||
//prometheus operator
|
||||
let labels = external_target.labels;
|
||||
|
||||
let scrape_target = ScrapeConfig {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(name.clone()),
|
||||
namespace: namespace.clone(),
|
||||
..Default::default()
|
||||
},
|
||||
spec: ScrapeConfigSpec {
|
||||
static_configs: Some(vec![StaticConfig {
|
||||
targets: vec![format!("{}:{}", external_target.ip, external_target.port)],
|
||||
labels,
|
||||
}]),
|
||||
metrics_path: external_target.path,
|
||||
scrape_interval: external_target.interval,
|
||||
job_name: Some(name),
|
||||
..Default::default()
|
||||
},
|
||||
};
|
||||
|
||||
K8sResourceScore::single(scrape_target, namespace).create_interpret()
|
||||
}
|
||||
}
|
||||
@@ -12,36 +12,6 @@ pub trait AlertChannelConfig {
|
||||
async fn get_config(&self) -> AlertManagerChannelConfig;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertManagerValues {
|
||||
pub alertmanager: AlertManager,
|
||||
}
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct AlertManager {
|
||||
pub enabled: bool,
|
||||
pub config: AlertManagerConfig,
|
||||
pub alertmanager_spec: AlertManagerSpec,
|
||||
pub init_config_reloader: ConfigReloader,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ConfigReloader {
|
||||
pub resources: Resources,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertManagerConfig {
|
||||
pub global: Mapping,
|
||||
pub route: AlertManagerRoute,
|
||||
pub receivers: Sequence,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertManagerRoute {
|
||||
pub routes: Sequence,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertManagerChannelConfig {
|
||||
///expecting an option that contains two values
|
||||
@@ -52,20 +22,6 @@ pub struct AlertManagerChannelConfig {
|
||||
pub channel_receiver: Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct AlertManagerSpec {
|
||||
pub(crate) resources: Resources,
|
||||
pub replicas: u32,
|
||||
pub alert_manager_config_selector: AlertManagerConfigSelector,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct AlertManagerConfigSelector {
|
||||
pub match_labels: BTreeMap<String, String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct Resources {
|
||||
pub limits: Limits,
|
||||
|
||||
@@ -6,4 +6,5 @@ pub mod kube_prometheus;
|
||||
pub mod ntfy;
|
||||
pub mod okd;
|
||||
pub mod prometheus;
|
||||
pub mod red_hat_cluster_observability;
|
||||
pub mod scrape_target;
|
||||
|
||||
@@ -1,270 +0,0 @@
|
||||
use base64::prelude::*;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use harmony_types::id::Id;
|
||||
use kube::api::DynamicObject;
|
||||
use log::{debug, info, trace};
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::monitoring::okd::OpenshiftClusterAlertSender,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, oberservability::monitoring::AlertReceiver},
|
||||
};
|
||||
|
||||
impl Clone for Box<dyn AlertReceiver<OpenshiftClusterAlertSender>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertReceiver<OpenshiftClusterAlertSender>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct OpenshiftClusterAlertScore {
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<OpenshiftClusterAlertSender>>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for OpenshiftClusterAlertScore {
|
||||
fn name(&self) -> String {
|
||||
"ClusterAlertScore".to_string()
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(OpenshiftClusterAlertInterpret {
|
||||
receivers: self.receivers.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct OpenshiftClusterAlertInterpret {
|
||||
receivers: Vec<Box<dyn AlertReceiver<OpenshiftClusterAlertSender>>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient> Interpret<T> for OpenshiftClusterAlertInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let client = topology.k8s_client().await?;
|
||||
let openshift_monitoring_namespace = "openshift-monitoring";
|
||||
|
||||
let mut alertmanager_main_secret: DynamicObject = client
|
||||
.get_secret_json_value("alertmanager-main", Some(openshift_monitoring_namespace))
|
||||
.await?;
|
||||
trace!("Got secret {alertmanager_main_secret:#?}");
|
||||
|
||||
let data: &mut serde_json::Value = &mut alertmanager_main_secret.data;
|
||||
trace!("Alertmanager-main secret data {data:#?}");
|
||||
let data_obj = data
|
||||
.get_mut("data")
|
||||
.ok_or(InterpretError::new(
|
||||
"Missing 'data' field in alertmanager-main secret.".to_string(),
|
||||
))?
|
||||
.as_object_mut()
|
||||
.ok_or(InterpretError::new(
|
||||
"'data' field in alertmanager-main secret is expected to be an object ."
|
||||
.to_string(),
|
||||
))?;
|
||||
|
||||
let config_b64 = data_obj
|
||||
.get("alertmanager.yaml")
|
||||
.ok_or(InterpretError::new(
|
||||
"Missing 'alertmanager.yaml' in alertmanager-main secret data".to_string(),
|
||||
))?
|
||||
.as_str()
|
||||
.unwrap_or("");
|
||||
trace!("Config base64 {config_b64}");
|
||||
|
||||
let config_bytes = BASE64_STANDARD.decode(config_b64).unwrap_or_default();
|
||||
|
||||
let mut am_config: serde_yaml::Value =
|
||||
serde_yaml::from_str(&String::from_utf8(config_bytes).unwrap_or_default())
|
||||
.unwrap_or_default();
|
||||
|
||||
debug!("Current alertmanager config {am_config:#?}");
|
||||
|
||||
let existing_receivers_sequence = if let Some(receivers) = am_config.get_mut("receivers") {
|
||||
match receivers.as_sequence_mut() {
|
||||
Some(seq) => seq,
|
||||
None => {
|
||||
return Err(InterpretError::new(format!(
|
||||
"Expected alertmanager config receivers to be a sequence, got {:?}",
|
||||
receivers
|
||||
)));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
&mut serde_yaml::Sequence::default()
|
||||
};
|
||||
|
||||
let mut additional_resources = vec![];
|
||||
|
||||
for custom_receiver in &self.receivers {
|
||||
let name = custom_receiver.name();
|
||||
let alertmanager_receiver = custom_receiver.as_alertmanager_receiver()?;
|
||||
|
||||
let receiver_json_value = alertmanager_receiver.receiver_config;
|
||||
|
||||
let receiver_yaml_string =
|
||||
serde_json::to_string(&receiver_json_value).map_err(|e| {
|
||||
InterpretError::new(format!("Failed to serialize receiver config: {}", e))
|
||||
})?;
|
||||
|
||||
let receiver_yaml_value: serde_yaml::Value =
|
||||
serde_yaml::from_str(&receiver_yaml_string).map_err(|e| {
|
||||
InterpretError::new(format!("Failed to parse receiver config as YAML: {}", e))
|
||||
})?;
|
||||
|
||||
if let Some(idx) = existing_receivers_sequence.iter().position(|r| {
|
||||
r.get("name")
|
||||
.and_then(|n| n.as_str())
|
||||
.map_or(false, |n| n == name)
|
||||
}) {
|
||||
info!("Replacing existing AlertManager receiver: {}", name);
|
||||
existing_receivers_sequence[idx] = receiver_yaml_value;
|
||||
} else {
|
||||
debug!("Adding new AlertManager receiver: {}", name);
|
||||
existing_receivers_sequence.push(receiver_yaml_value);
|
||||
}
|
||||
|
||||
additional_resources.push(alertmanager_receiver.additional_ressources);
|
||||
}
|
||||
|
||||
let existing_route_mapping = if let Some(route) = am_config.get_mut("route") {
|
||||
match route.as_mapping_mut() {
|
||||
Some(map) => map,
|
||||
None => {
|
||||
return Err(InterpretError::new(format!(
|
||||
"Expected alertmanager config route to be a mapping, got {:?}",
|
||||
route
|
||||
)));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
&mut serde_yaml::Mapping::default()
|
||||
};
|
||||
|
||||
let existing_route_sequence = if let Some(routes) = existing_route_mapping.get_mut("routes")
|
||||
{
|
||||
match routes.as_sequence_mut() {
|
||||
Some(seq) => seq,
|
||||
None => {
|
||||
return Err(InterpretError::new(format!(
|
||||
"Expected alertmanager config routes to be a sequence, got {:?}",
|
||||
routes
|
||||
)));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
&mut serde_yaml::Sequence::default()
|
||||
};
|
||||
|
||||
for custom_receiver in &self.receivers {
|
||||
let name = custom_receiver.name();
|
||||
let alertmanager_receiver = custom_receiver.as_alertmanager_receiver()?;
|
||||
|
||||
let route_json_value = alertmanager_receiver.route_config;
|
||||
let route_yaml_string = serde_json::to_string(&route_json_value).map_err(|e| {
|
||||
InterpretError::new(format!("Failed to serialize route config: {}", e))
|
||||
})?;
|
||||
|
||||
let route_yaml_value: serde_yaml::Value = serde_yaml::from_str(&route_yaml_string)
|
||||
.map_err(|e| {
|
||||
InterpretError::new(format!("Failed to parse route config as YAML: {}", e))
|
||||
})?;
|
||||
|
||||
if let Some(idy) = existing_route_sequence.iter().position(|r| {
|
||||
r.get("receiver")
|
||||
.and_then(|n| n.as_str())
|
||||
.map_or(false, |n| n == name)
|
||||
}) {
|
||||
info!("Replacing existing AlertManager receiver: {}", name);
|
||||
existing_route_sequence[idy] = route_yaml_value;
|
||||
} else {
|
||||
debug!("Adding new AlertManager receiver: {}", name);
|
||||
existing_route_sequence.push(route_yaml_value);
|
||||
}
|
||||
}
|
||||
debug!("Current alertmanager config {am_config:#?}");
|
||||
// TODO
|
||||
// - save new version of alertmanager config
|
||||
// - write additional ressources to the cluster
|
||||
let am_config = serde_yaml::to_string(&am_config).map_err(|e| {
|
||||
InterpretError::new(format!(
|
||||
"Failed to serialize new alertmanager config to string : {e}"
|
||||
))
|
||||
})?;
|
||||
|
||||
let mut am_config_b64 = String::new();
|
||||
BASE64_STANDARD.encode_string(am_config, &mut am_config_b64);
|
||||
|
||||
// TODO put update configmap value and save new value
|
||||
data_obj.insert(
|
||||
"alertmanager.yaml".to_string(),
|
||||
serde_json::Value::String(am_config_b64),
|
||||
);
|
||||
|
||||
// https://kubernetes.io/docs/reference/using-api/server-side-apply/#field-management
|
||||
alertmanager_main_secret.metadata.managed_fields = None;
|
||||
|
||||
trace!("Applying new alertmanager_main_secret {alertmanager_main_secret:#?}");
|
||||
client
|
||||
.apply_dynamic(
|
||||
&alertmanager_main_secret,
|
||||
Some(openshift_monitoring_namespace),
|
||||
true,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let additional_resources = additional_resources.concat();
|
||||
trace!("Applying additional ressources for alert receivers {additional_resources:#?}");
|
||||
client
|
||||
.apply_dynamic_many(
|
||||
&additional_resources,
|
||||
Some(openshift_monitoring_namespace),
|
||||
true,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(Outcome::success(format!(
|
||||
"Successfully configured {} cluster alert receivers: {}",
|
||||
self.receivers.len(),
|
||||
self.receivers
|
||||
.iter()
|
||||
.map(|r| r.name())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
)))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Custom("OpenshiftClusterAlertInterpret")
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -1,90 +0,0 @@
|
||||
use std::{collections::BTreeMap, sync::Arc};
|
||||
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
topology::k8s::K8sClient,
|
||||
};
|
||||
use k8s_openapi::api::core::v1::ConfigMap;
|
||||
use kube::api::ObjectMeta;
|
||||
|
||||
pub(crate) struct Config;
|
||||
|
||||
impl Config {
|
||||
pub async fn create_cluster_monitoring_config_cm(
|
||||
client: &Arc<K8sClient>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let mut data = BTreeMap::new();
|
||||
data.insert(
|
||||
"config.yaml".to_string(),
|
||||
r#"
|
||||
enableUserWorkload: true
|
||||
alertmanagerMain:
|
||||
enableUserAlertmanagerConfig: true
|
||||
"#
|
||||
.to_string(),
|
||||
);
|
||||
|
||||
let cm = ConfigMap {
|
||||
metadata: ObjectMeta {
|
||||
name: Some("cluster-monitoring-config".to_string()),
|
||||
namespace: Some("openshift-monitoring".to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
data: Some(data),
|
||||
..Default::default()
|
||||
};
|
||||
client.apply(&cm, Some("openshift-monitoring")).await?;
|
||||
|
||||
Ok(Outcome::success(
|
||||
"updated cluster-monitoring-config-map".to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
pub async fn create_user_workload_monitoring_config_cm(
|
||||
client: &Arc<K8sClient>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let mut data = BTreeMap::new();
|
||||
data.insert(
|
||||
"config.yaml".to_string(),
|
||||
r#"
|
||||
alertmanager:
|
||||
enabled: true
|
||||
enableAlertmanagerConfig: true
|
||||
"#
|
||||
.to_string(),
|
||||
);
|
||||
let cm = ConfigMap {
|
||||
metadata: ObjectMeta {
|
||||
name: Some("user-workload-monitoring-config".to_string()),
|
||||
namespace: Some("openshift-user-workload-monitoring".to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
data: Some(data),
|
||||
..Default::default()
|
||||
};
|
||||
client
|
||||
.apply(&cm, Some("openshift-user-workload-monitoring"))
|
||||
.await?;
|
||||
|
||||
Ok(Outcome::success(
|
||||
"updated openshift-user-monitoring-config-map".to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
pub async fn verify_user_workload(client: &Arc<K8sClient>) -> Result<Outcome, InterpretError> {
|
||||
let namespace = "openshift-user-workload-monitoring";
|
||||
let alertmanager_name = "alertmanager-user-workload-0";
|
||||
let prometheus_name = "prometheus-user-workload-0";
|
||||
client
|
||||
.wait_for_pod_ready(alertmanager_name, Some(namespace))
|
||||
.await?;
|
||||
client
|
||||
.wait_for_pod_ready(prometheus_name, Some(namespace))
|
||||
.await?;
|
||||
|
||||
Ok(Outcome::success(format!(
|
||||
"pods: {}, {} ready in ns: {}",
|
||||
alertmanager_name, prometheus_name, namespace
|
||||
)))
|
||||
}
|
||||
}
|
||||
58
harmony/src/modules/monitoring/okd/crd/alerting_rules.rs
Normal file
58
harmony/src/modules/monitoring/okd/crd/alerting_rules.rs
Normal file
@@ -0,0 +1,58 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
|
||||
|
||||
#[derive(CustomResource, Debug, Serialize, Deserialize, Clone, JsonSchema, Default)]
|
||||
#[kube(
|
||||
group = "monitoring.openshift.io",
|
||||
version = "v1",
|
||||
kind = "AlertingRule",
|
||||
plural = "alertingrules",
|
||||
namespaced,
|
||||
derive = "Default"
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct AlertingRuleSpec {
|
||||
pub groups: serde_json::Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
|
||||
pub struct RuleGroup {
|
||||
pub name: String,
|
||||
pub rules: Vec<Rule>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Rule {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub alert: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub expr: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub for_: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub labels: Option<std::collections::BTreeMap<String, String>>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub annotations: Option<std::collections::BTreeMap<String, String>>,
|
||||
}
|
||||
|
||||
impl From<PrometheusAlertRule> for Rule {
|
||||
fn from(value: PrometheusAlertRule) -> Self {
|
||||
Rule {
|
||||
alert: Some(value.alert),
|
||||
expr: Some(value.expr),
|
||||
for_: value.r#for,
|
||||
labels: Some(value.labels.into_iter().collect::<BTreeMap<_, _>>()),
|
||||
annotations: Some(value.annotations.into_iter().collect::<BTreeMap<_, _>>()),
|
||||
}
|
||||
}
|
||||
}
|
||||
3
harmony/src/modules/monitoring/okd/crd/mod.rs
Normal file
3
harmony/src/modules/monitoring/okd/crd/mod.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
pub mod alerting_rules;
|
||||
pub mod scrape_target;
|
||||
pub mod service_monitor;
|
||||
72
harmony/src/modules/monitoring/okd/crd/scrape_target.rs
Normal file
72
harmony/src/modules/monitoring/okd/crd/scrape_target.rs
Normal file
@@ -0,0 +1,72 @@
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
#[derive(CustomResource, Debug, Serialize, Deserialize, Clone, JsonSchema, Default)]
|
||||
#[kube(
|
||||
group = "monitoring.coreos.com",
|
||||
version = "v1alpha1",
|
||||
kind = "ScrapeConfig",
|
||||
plural = "scrapeconfigs",
|
||||
namespaced,
|
||||
derive = "Default"
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ScrapeConfigSpec {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub job_name: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub metrics_path: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub scrape_interval: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub scrape_timeout: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub scheme: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub static_configs: Option<Vec<StaticConfig>>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub relabelings: Option<Vec<RelabelConfig>>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub metric_relabelings: Option<Vec<RelabelConfig>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct StaticConfig {
|
||||
/// targets: ["host:port"]
|
||||
pub targets: Vec<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub labels: Option<BTreeMap<String, String>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct RelabelConfig {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub source_labels: Option<Vec<String>>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub separator: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub target_label: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub replacement: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub action: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub regex: Option<String>,
|
||||
}
|
||||
97
harmony/src/modules/monitoring/okd/crd/service_monitor.rs
Normal file
97
harmony/src/modules/monitoring/okd/crd/service_monitor.rs
Normal file
@@ -0,0 +1,97 @@
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
#[derive(CustomResource, Debug, Serialize, Deserialize, Clone, JsonSchema, Default)]
|
||||
#[kube(
|
||||
group = "monitoring.coreos.com",
|
||||
version = "v1",
|
||||
kind = "ServiceMonitor",
|
||||
plural = "servicemonitors",
|
||||
namespaced,
|
||||
derive = "Default"
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ServiceMonitorSpec {
|
||||
/// The label to use to retrieve the job name from.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub job_label: Option<String>,
|
||||
|
||||
/// TargetLabels transfers labels on the Kubernetes Service onto the target.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub target_labels: Option<Vec<String>>,
|
||||
|
||||
/// PodTargetLabels transfers labels on the Kubernetes Pod onto the target.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub pod_target_labels: Option<Vec<String>>,
|
||||
|
||||
/// A list of endpoints allowed as part of this ServiceMonitor.
|
||||
pub endpoints: Vec<Endpoint>,
|
||||
|
||||
/// Selector to select Endpoints objects.
|
||||
pub selector: LabelSelector,
|
||||
|
||||
/// Selector to select which namespaces the Kubernetes Endpoints objects are discovered from.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub namespace_selector: Option<NamespaceSelector>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Endpoint {
|
||||
/// Name of the service port this endpoint refers to.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub port: Option<String>,
|
||||
|
||||
/// HTTP path to scrape for metrics.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub path: Option<String>,
|
||||
|
||||
/// HTTP scheme to use for scraping.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub scheme: Option<String>,
|
||||
|
||||
/// Interval at which metrics should be scraped.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub interval: Option<String>,
|
||||
|
||||
/// Timeout after which the scrape is ended.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub scrape_timeout: Option<String>,
|
||||
|
||||
/// HonorLabels chooses the metric's labels on collisions with target labels.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub honor_labels: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct LabelSelector {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub match_labels: Option<BTreeMap<String, String>>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub match_expressions: Option<Vec<LabelSelectorRequirement>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct LabelSelectorRequirement {
|
||||
pub key: String,
|
||||
pub operator: String,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub values: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct NamespaceSelector {
|
||||
/// Boolean describing whether all namespaces are selected in contrast to a list restricting them.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub any: Option<bool>,
|
||||
|
||||
/// List of namespace names.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub match_names: Option<Vec<String>>,
|
||||
}
|
||||
@@ -1,60 +0,0 @@
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::monitoring::okd::config::Config,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology},
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use harmony_types::id::Id;
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct OpenshiftUserWorkloadMonitoring {}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for OpenshiftUserWorkloadMonitoring {
|
||||
fn name(&self) -> String {
|
||||
"OpenshiftUserWorkloadMonitoringScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(OpenshiftUserWorkloadMonitoringInterpret {})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct OpenshiftUserWorkloadMonitoringInterpret {}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient> Interpret<T> for OpenshiftUserWorkloadMonitoringInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let client = topology.k8s_client().await.unwrap();
|
||||
Config::create_cluster_monitoring_config_cm(&client).await?;
|
||||
Config::create_user_workload_monitoring_config_cm(&client).await?;
|
||||
Config::verify_user_workload(&client).await?;
|
||||
Ok(Outcome::success(
|
||||
"successfully enabled user-workload-monitoring".to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Custom("OpenshiftUserWorkloadMonitoring")
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,17 @@
|
||||
use crate::topology::oberservability::monitoring::AlertSender;
|
||||
use serde::Serialize;
|
||||
|
||||
pub mod cluster_monitoring;
|
||||
pub(crate) mod config;
|
||||
pub mod enable_user_workload;
|
||||
use crate::topology::monitoring::{AlertReceiver, AlertRule, AlertSender, ScrapeTarget};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub mod crd;
|
||||
pub mod openshift_cluster_alerting_score;
|
||||
pub mod score_enable_cluster_monitoring;
|
||||
pub mod score_openshift_alert_rule;
|
||||
pub mod score_openshift_receiver;
|
||||
pub mod score_openshift_scrape_target;
|
||||
pub mod score_user_workload;
|
||||
pub mod score_verify_user_workload_monitoring;
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct OpenshiftClusterAlertSender;
|
||||
|
||||
impl AlertSender for OpenshiftClusterAlertSender {
|
||||
@@ -12,3 +19,30 @@ impl AlertSender for OpenshiftClusterAlertSender {
|
||||
"OpenshiftClusterAlertSender".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertReceiver<OpenshiftClusterAlertSender>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertRule<OpenshiftClusterAlertSender>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn ScrapeTarget<OpenshiftClusterAlertSender>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::monitoring::okd::OpenshiftClusterAlertSender,
|
||||
score::Score,
|
||||
topology::{
|
||||
Topology,
|
||||
monitoring::{AlertReceiver, AlertRule, AlertingInterpret, Observability, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct OpenshiftClusterAlertScore {
|
||||
pub sender: OpenshiftClusterAlertSender,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<OpenshiftClusterAlertSender>>>,
|
||||
pub rules: Vec<Box<dyn AlertRule<OpenshiftClusterAlertSender>>>,
|
||||
pub scrape_targets: Option<Vec<Box<dyn ScrapeTarget<OpenshiftClusterAlertSender>>>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + Observability<OpenshiftClusterAlertSender>> Score<T>
|
||||
for OpenshiftClusterAlertScore
|
||||
{
|
||||
fn name(&self) -> String {
|
||||
"OpenshiftClusterAlertScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(AlertingInterpret {
|
||||
sender: OpenshiftClusterAlertSender,
|
||||
receivers: self.receivers.clone(),
|
||||
rules: self.rules.clone(),
|
||||
scrape_targets: self.scrape_targets.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,149 @@
|
||||
use std::{collections::BTreeMap, sync::Arc};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use harmony_types::id::Id;
|
||||
use k8s_openapi::api::core::v1::ConfigMap;
|
||||
use kube::api::{GroupVersionKind, ObjectMeta};
|
||||
use log::debug;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::k8s::resource::K8sResourceScore,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, k8s::K8sClient},
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct OpenshiftEnableClusterMonitoringScore {}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for OpenshiftEnableClusterMonitoringScore {
|
||||
fn name(&self) -> String {
|
||||
"OpenshiftClusterMonitoringScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(OpenshiftEnableClusterMonitoringInterpret {})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct OpenshiftEnableClusterMonitoringInterpret {}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient> Interpret<T> for OpenshiftEnableClusterMonitoringInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let namespace = "openshift-monitoring".to_string();
|
||||
let name = "cluster-monitoring-config".to_string();
|
||||
let client = topology.k8s_client().await?;
|
||||
let enabled = self
|
||||
.check_cluster_monitoring_enabled(client, &name, &namespace)
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e))?;
|
||||
|
||||
debug!("enabled {:#?}", enabled);
|
||||
|
||||
match enabled {
|
||||
true => Ok(Outcome::success(
|
||||
"Openshift Cluster Monitoring already enabled".to_string(),
|
||||
)),
|
||||
false => {
|
||||
let mut data = BTreeMap::new();
|
||||
data.insert(
|
||||
"config.yaml".to_string(),
|
||||
r#"
|
||||
enableUserWorkload: true
|
||||
alertmanagerMain:
|
||||
enableUserAlertmanagerConfig: true
|
||||
"#
|
||||
.to_string(),
|
||||
);
|
||||
|
||||
let cm = ConfigMap {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(name),
|
||||
namespace: Some(namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
data: Some(data),
|
||||
..Default::default()
|
||||
};
|
||||
K8sResourceScore::single(cm, Some(namespace))
|
||||
.create_interpret()
|
||||
.execute(inventory, topology)
|
||||
.await?;
|
||||
|
||||
Ok(Outcome::success(
|
||||
"Successfully enabled Openshift Cluster Monitoring".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Custom("OpenshiftEnableClusterMonitoringInterpret")
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl OpenshiftEnableClusterMonitoringInterpret {
|
||||
async fn check_cluster_monitoring_enabled(
|
||||
&self,
|
||||
client: Arc<K8sClient>,
|
||||
name: &str,
|
||||
namespace: &str,
|
||||
) -> Result<bool, String> {
|
||||
let gvk = GroupVersionKind {
|
||||
group: "".to_string(),
|
||||
version: "v1".to_string(),
|
||||
kind: "ConfigMap".to_string(),
|
||||
};
|
||||
|
||||
let cm = match client
|
||||
.get_resource_json_value(name, Some(namespace), &gvk)
|
||||
.await
|
||||
{
|
||||
Ok(obj) => obj,
|
||||
Err(_) => return Ok(false),
|
||||
};
|
||||
|
||||
debug!("{:#?}", cm.data.pointer("/data/config.yaml"));
|
||||
let config_yaml_str = match cm
|
||||
.data
|
||||
.pointer("/data/config.yaml")
|
||||
.and_then(|v| v.as_str())
|
||||
{
|
||||
Some(s) => s,
|
||||
None => return Ok(false),
|
||||
};
|
||||
|
||||
debug!("{:#?}", config_yaml_str);
|
||||
let parsed_config: serde_yaml::Value = serde_yaml::from_str(config_yaml_str)
|
||||
.map_err(|e| format!("Failed to parse nested YAML: {}", e))?;
|
||||
|
||||
let enabled = parsed_config
|
||||
.get("enableUserWorkload")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(false);
|
||||
|
||||
debug!("{:#?}", enabled);
|
||||
Ok(enabled)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
use kube::api::ObjectMeta;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::{
|
||||
k8s::resource::K8sResourceScore,
|
||||
monitoring::okd::{
|
||||
OpenshiftClusterAlertSender,
|
||||
crd::alerting_rules::{AlertingRule, AlertingRuleSpec},
|
||||
},
|
||||
},
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, monitoring::AlertRule},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct OpenshiftAlertRuleScore {
|
||||
pub rule: Box<dyn AlertRule<OpenshiftClusterAlertSender>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for OpenshiftAlertRuleScore {
|
||||
fn name(&self) -> String {
|
||||
"OpenshiftAlertingRuleScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
let namespace = "openshift-monitoring".to_string();
|
||||
let alerting_rule = AlertingRule {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.rule.name()),
|
||||
namespace: Some(namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: AlertingRuleSpec {
|
||||
groups: self.rule.build_rule().unwrap(),
|
||||
},
|
||||
};
|
||||
|
||||
K8sResourceScore::single(alerting_rule, Some(namespace)).create_interpret()
|
||||
}
|
||||
}
|
||||
144
harmony/src/modules/monitoring/okd/score_openshift_receiver.rs
Normal file
144
harmony/src/modules/monitoring/okd/score_openshift_receiver.rs
Normal file
@@ -0,0 +1,144 @@
|
||||
use async_trait::async_trait;
|
||||
use base64::{Engine as _, prelude::BASE64_STANDARD};
|
||||
use harmony_types::id::Id;
|
||||
use kube::api::DynamicObject;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::monitoring::okd::OpenshiftClusterAlertSender,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, monitoring::AlertReceiver},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct OpenshiftReceiverScore {
|
||||
pub receiver: Box<dyn AlertReceiver<OpenshiftClusterAlertSender>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for OpenshiftReceiverScore {
|
||||
fn name(&self) -> String {
|
||||
"OpenshiftAlertReceiverScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(OpenshiftReceiverInterpret {
|
||||
receiver: self.receiver.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct OpenshiftReceiverInterpret {
|
||||
receiver: Box<dyn AlertReceiver<OpenshiftClusterAlertSender>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient> Interpret<T> for OpenshiftReceiverInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let client = topology.k8s_client().await?;
|
||||
let ns = "openshift-monitoring";
|
||||
|
||||
let mut am_secret: DynamicObject = client
|
||||
.get_secret_json_value("alertmanager-main", Some(ns))
|
||||
.await?;
|
||||
|
||||
let data = am_secret
|
||||
.data
|
||||
.get_mut("data")
|
||||
.ok_or_else(|| {
|
||||
InterpretError::new("Missing 'data' field in alertmanager-main secret".into())
|
||||
})?
|
||||
.as_object_mut()
|
||||
.ok_or_else(|| InterpretError::new("'data' field must be a JSON object".into()))?;
|
||||
|
||||
let config_b64 = data
|
||||
.get("alertmanager.yaml")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or_default();
|
||||
|
||||
let config_bytes = BASE64_STANDARD.decode(config_b64).unwrap_or_default();
|
||||
|
||||
let mut am_config: serde_yaml::Value = serde_yaml::from_slice(&config_bytes)
|
||||
.unwrap_or_else(|_| serde_yaml::Value::Mapping(serde_yaml::Mapping::new()));
|
||||
|
||||
let name = self.receiver.name();
|
||||
let receiver = self.receiver.build_receiver()?;
|
||||
let route = self.receiver.build_route().unwrap();
|
||||
|
||||
if am_config.get("receivers").is_none() {
|
||||
am_config["receivers"] = serde_yaml::Value::Sequence(vec![]);
|
||||
}
|
||||
if am_config.get("route").is_none() {
|
||||
am_config["route"] = serde_yaml::Value::Mapping(serde_yaml::Mapping::new());
|
||||
}
|
||||
if am_config["route"].get("routes").is_none() {
|
||||
am_config["route"]["routes"] = serde_yaml::Value::Sequence(vec![]);
|
||||
}
|
||||
|
||||
{
|
||||
let receivers_seq = am_config["receivers"].as_sequence_mut().unwrap();
|
||||
if let Some(idx) = receivers_seq
|
||||
.iter()
|
||||
.position(|r| r.get("name").and_then(|n| n.as_str()) == Some(&name))
|
||||
{
|
||||
receivers_seq[idx] = receiver;
|
||||
} else {
|
||||
receivers_seq.push(receiver);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let route_seq = am_config["route"]["routes"].as_sequence_mut().unwrap();
|
||||
if let Some(idx) = route_seq
|
||||
.iter()
|
||||
.position(|r| r.get("receiver").and_then(|n| n.as_str()) == Some(&name))
|
||||
{
|
||||
route_seq[idx] = route;
|
||||
} else {
|
||||
route_seq.push(route);
|
||||
}
|
||||
}
|
||||
|
||||
let yaml_str =
|
||||
serde_yaml::to_string(&am_config).map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
|
||||
let mut yaml_b64 = String::new();
|
||||
|
||||
BASE64_STANDARD.encode_string(yaml_str, &mut yaml_b64);
|
||||
data.insert(
|
||||
"alertmanager.yaml".to_string(),
|
||||
serde_json::Value::String(yaml_b64),
|
||||
);
|
||||
am_secret.metadata.managed_fields = None;
|
||||
|
||||
client.apply_dynamic(&am_secret, Some(ns), true).await?;
|
||||
|
||||
Ok(Outcome::success(format!(
|
||||
"Configured OpenShift cluster alert receiver: {}",
|
||||
name
|
||||
)))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Custom("OpenshiftAlertReceiverInterpret")
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,188 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use harmony_types::id::Id;
|
||||
use k8s_openapi::{
|
||||
api::core::v1::{
|
||||
EndpointAddress, EndpointPort, EndpointSubset, Endpoints, Service, ServicePort, ServiceSpec,
|
||||
},
|
||||
apimachinery::pkg::util::intstr::IntOrString,
|
||||
};
|
||||
use kube::api::ObjectMeta;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::{
|
||||
k8s::resource::K8sResourceScore,
|
||||
monitoring::okd::{
|
||||
OpenshiftClusterAlertSender,
|
||||
crd::service_monitor::{Endpoint, LabelSelector, ServiceMonitor, ServiceMonitorSpec},
|
||||
},
|
||||
},
|
||||
score::Score,
|
||||
topology::{
|
||||
K8sclient, Topology,
|
||||
monitoring::{ExternalScrapeTarget, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct OpenshiftScrapeTargetScore {
|
||||
pub scrape_target: Box<dyn ScrapeTarget<OpenshiftClusterAlertSender>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for OpenshiftScrapeTargetScore {
|
||||
fn name(&self) -> String {
|
||||
"OpenshiftAlertingRuleScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(OpenshiftScrapeTargetInterpret {
|
||||
scrape_target: self.scrape_target.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct OpenshiftScrapeTargetInterpret {
|
||||
scrape_target: Box<dyn ScrapeTarget<OpenshiftClusterAlertSender>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient> Interpret<T> for OpenshiftScrapeTargetInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let namespace = "openshift-monitoring".to_string();
|
||||
let name = self.scrape_target.name();
|
||||
let external_target = self
|
||||
.scrape_target
|
||||
.build_scrape_target()
|
||||
.expect("failed to build scrape target ExternalScrapeTarget");
|
||||
|
||||
let (service, endpoints, service_monitor) =
|
||||
self.to_k8s_resources(&name, &namespace, external_target);
|
||||
|
||||
K8sResourceScore::single(service, Some(namespace.clone()))
|
||||
.create_interpret()
|
||||
.execute(inventory, topology)
|
||||
.await?;
|
||||
|
||||
K8sResourceScore::single(endpoints, Some(namespace.clone()))
|
||||
.create_interpret()
|
||||
.execute(inventory, topology)
|
||||
.await?;
|
||||
|
||||
K8sResourceScore::single(service_monitor, Some(namespace.clone()))
|
||||
.create_interpret()
|
||||
.execute(inventory, topology)
|
||||
.await?;
|
||||
|
||||
Ok(Outcome::success(
|
||||
"Installed scrape target of Openshift".to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Custom("OpenshiftScrapeTargetInterpret")
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl OpenshiftScrapeTargetInterpret {
|
||||
/// Maps the generic intent into the 3 required Kubernetes objects
|
||||
pub fn to_k8s_resources(
|
||||
&self,
|
||||
name: &str,
|
||||
namespace: &str,
|
||||
external_target: ExternalScrapeTarget,
|
||||
) -> (Service, Endpoints, ServiceMonitor) {
|
||||
let mut labels = external_target.labels.clone().unwrap_or(BTreeMap::new());
|
||||
|
||||
labels.insert("harmony/target-name".to_string(), name.to_string().clone());
|
||||
|
||||
let service = Service {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(name.to_string().clone()),
|
||||
namespace: Some(namespace.to_string()),
|
||||
labels: Some(labels.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: Some(ServiceSpec {
|
||||
cluster_ip: Some("None".to_string()), // Headless
|
||||
ports: Some(vec![ServicePort {
|
||||
name: Some("metrics".to_string()),
|
||||
port: external_target.port.clone(),
|
||||
target_port: Some(IntOrString::Int(external_target.port)),
|
||||
..Default::default()
|
||||
}]),
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let endpoints = Endpoints {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(name.to_string().clone()),
|
||||
namespace: Some(namespace.to_string()),
|
||||
labels: Some(labels.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
subsets: Some(vec![EndpointSubset {
|
||||
addresses: Some(vec![EndpointAddress {
|
||||
ip: external_target.ip.to_string().clone(),
|
||||
..Default::default()
|
||||
}]),
|
||||
ports: Some(vec![EndpointPort {
|
||||
name: Some("metrics".to_string()),
|
||||
port: external_target.port,
|
||||
..Default::default()
|
||||
}]),
|
||||
..Default::default()
|
||||
}]),
|
||||
};
|
||||
|
||||
let service_monitor = ServiceMonitor {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(name.to_string().clone()),
|
||||
namespace: Some(namespace.to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: ServiceMonitorSpec {
|
||||
job_label: Some("harmony/target-name".to_string()),
|
||||
endpoints: vec![Endpoint {
|
||||
port: Some("metrics".to_string()),
|
||||
interval: external_target.interval.clone(),
|
||||
path: external_target.path.clone(),
|
||||
..Default::default()
|
||||
}],
|
||||
selector: LabelSelector {
|
||||
match_labels: Some(BTreeMap::from([(
|
||||
"harmony/target-name".to_string(),
|
||||
name.to_string().clone(),
|
||||
)])),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
},
|
||||
};
|
||||
|
||||
(service, endpoints, service_monitor)
|
||||
}
|
||||
}
|
||||
157
harmony/src/modules/monitoring/okd/score_user_workload.rs
Normal file
157
harmony/src/modules/monitoring/okd/score_user_workload.rs
Normal file
@@ -0,0 +1,157 @@
|
||||
use std::{collections::BTreeMap, sync::Arc};
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::k8s::resource::K8sResourceScore,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, k8s::K8sClient},
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use harmony_types::id::Id;
|
||||
use k8s_openapi::api::core::v1::ConfigMap;
|
||||
use kube::api::{GroupVersionKind, ObjectMeta};
|
||||
use log::debug;
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct OpenshiftUserWorkloadMonitoring {}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for OpenshiftUserWorkloadMonitoring {
|
||||
fn name(&self) -> String {
|
||||
"OpenshiftUserWorkloadMonitoringScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(OpenshiftUserWorkloadMonitoringInterpret {})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct OpenshiftUserWorkloadMonitoringInterpret {}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient> Interpret<T> for OpenshiftUserWorkloadMonitoringInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let namespace = "openshift-user-workload-monitoring".to_string();
|
||||
let cm_name = "user-workload-monitoring-config".to_string();
|
||||
let client = topology.k8s_client().await?;
|
||||
|
||||
let cm_enabled = self
|
||||
.check_cluster_user_workload_monitoring_enabled(client, &cm_name, &namespace)
|
||||
.await?;
|
||||
|
||||
match cm_enabled {
|
||||
true => Ok(Outcome::success(
|
||||
"OpenshiftUserWorkloadMonitoringEnabled".to_string(),
|
||||
)),
|
||||
false => {
|
||||
let mut data = BTreeMap::new();
|
||||
data.insert(
|
||||
"config.yaml".to_string(),
|
||||
r#"
|
||||
alertmanager:
|
||||
enabled: true
|
||||
enableAlertmanagerConfig: true
|
||||
"#
|
||||
.to_string(),
|
||||
);
|
||||
|
||||
let cm = ConfigMap {
|
||||
metadata: ObjectMeta {
|
||||
name: Some("user-workload-monitoring-config".to_string()),
|
||||
namespace: Some(namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
data: Some(data),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
K8sResourceScore::single(cm, Some(namespace))
|
||||
.create_interpret()
|
||||
.execute(inventory, topology)
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Custom("OpenshiftUserWorkloadMonitoringInterpret")
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl OpenshiftUserWorkloadMonitoringInterpret {
|
||||
async fn check_cluster_user_workload_monitoring_enabled(
|
||||
&self,
|
||||
client: Arc<K8sClient>,
|
||||
name: &str,
|
||||
namespace: &str,
|
||||
) -> Result<bool, String> {
|
||||
let gvk = GroupVersionKind {
|
||||
group: "".to_string(),
|
||||
version: "v1".to_string(),
|
||||
kind: "ConfigMap".to_string(),
|
||||
};
|
||||
|
||||
let cm = match client
|
||||
.get_resource_json_value(name, Some(namespace), &gvk)
|
||||
.await
|
||||
{
|
||||
Ok(obj) => obj,
|
||||
Err(_) => return Ok(false), // CM doesn't exist? Treat as disabled.
|
||||
};
|
||||
|
||||
debug!("{:#?}", cm.data.pointer("/data/config.yaml"));
|
||||
let config_yaml_str = match cm
|
||||
.data
|
||||
.pointer("/data/config.yaml")
|
||||
.and_then(|v| v.as_str())
|
||||
{
|
||||
Some(s) => s,
|
||||
None => return Ok(false), // Key missing? Treat as disabled.
|
||||
};
|
||||
|
||||
debug!("{:#?}", config_yaml_str);
|
||||
let parsed_config: serde_yaml::Value = serde_yaml::from_str(config_yaml_str)
|
||||
.map_err(|e| format!("Failed to parse nested YAML: {}", e))?;
|
||||
|
||||
let alert_manager_enabled = parsed_config
|
||||
.get("alertmanager")
|
||||
.and_then(|a| a.get("enableAlertmanagerConfig"))
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(false);
|
||||
|
||||
debug!("alertmanagerenabled: {:#?}", alert_manager_enabled);
|
||||
|
||||
let enabled = parsed_config
|
||||
.get("alertmanager")
|
||||
.and_then(|enabled| enabled.get("enabled"))
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(false);
|
||||
|
||||
debug!("user workload monitoring enabled: {:#?}", enabled);
|
||||
|
||||
if alert_manager_enabled && enabled == true {
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
use async_trait::async_trait;
|
||||
use harmony_types::id::Id;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology},
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct VerifyUserWorkload {}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for VerifyUserWorkload {
|
||||
fn name(&self) -> String {
|
||||
"VerifyUserWorkload".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(VerifyUserWorkloadInterpret {})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct VerifyUserWorkloadInterpret {}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient> Interpret<T> for VerifyUserWorkloadInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let client = topology.k8s_client().await?;
|
||||
let namespace = "openshift-user-workload-monitoring";
|
||||
let alertmanager_name = "alertmanager-user-workload-0";
|
||||
let prometheus_name = "prometheus-user-workload-0";
|
||||
|
||||
client
|
||||
.wait_for_pod_ready(alertmanager_name, Some(namespace))
|
||||
.await?;
|
||||
|
||||
client
|
||||
.wait_for_pod_ready(prometheus_name, Some(namespace))
|
||||
.await?;
|
||||
|
||||
Ok(Outcome::success(format!(
|
||||
"pods: {}, {} ready in ns: {}",
|
||||
alertmanager_name, prometheus_name, namespace
|
||||
)))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Custom("VerifyUserWorkloadInterpret")
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -1 +1,2 @@
|
||||
pub mod prometheus_config;
|
||||
pub mod prometheus_helm;
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::modules::monitoring::kube_prometheus::types::{
|
||||
AlertManagerAdditionalPromRules, AlertManagerChannelConfig, ServiceMonitor,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct PrometheusConfig {
|
||||
pub namespace: Option<String>,
|
||||
pub default_rules: bool,
|
||||
@@ -3,9 +3,8 @@ use std::sync::{Arc, Mutex};
|
||||
|
||||
use non_blank_string_rs::NonBlankString;
|
||||
|
||||
use crate::modules::{
|
||||
helm::chart::HelmChartScore, monitoring::prometheus::prometheus_config::PrometheusConfig,
|
||||
};
|
||||
use crate::modules::helm::chart::HelmChartScore;
|
||||
use crate::modules::monitoring::prometheus::helm::prometheus_config::PrometheusConfig;
|
||||
|
||||
pub fn prometheus_helm_chart_score(config: Arc<Mutex<PrometheusConfig>>) -> HelmChartScore {
|
||||
let config = config.lock().unwrap();
|
||||
@@ -30,6 +29,7 @@ server:
|
||||
fullnameOverride: prometheus-{ns}
|
||||
"#
|
||||
);
|
||||
|
||||
HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str(&config.namespace.clone().unwrap()).unwrap()),
|
||||
release_name: NonBlankString::from_str("prometheus").unwrap(),
|
||||
|
||||
@@ -1,4 +1,56 @@
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
modules::monitoring::prometheus::helm::prometheus_config::PrometheusConfig,
|
||||
topology::monitoring::{AlertReceiver, AlertRule, AlertSender, ScrapeTarget},
|
||||
};
|
||||
|
||||
pub mod helm;
|
||||
#[allow(clippy::module_inception)]
|
||||
pub mod prometheus;
|
||||
pub mod prometheus_config;
|
||||
pub mod prometheus_alerting_score;
|
||||
pub mod score_prometheus_alert_receivers;
|
||||
pub mod score_prometheus_ensure_ready;
|
||||
pub mod score_prometheus_install;
|
||||
pub mod score_prometheus_rule;
|
||||
pub mod score_prometheus_scrape_target;
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct Prometheus {
|
||||
pub config: Arc<Mutex<PrometheusConfig>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertSender for Prometheus {
|
||||
fn name(&self) -> String {
|
||||
"Prometheus".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertReceiver<Prometheus>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertRule<Prometheus>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn ScrapeTarget<Prometheus>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,194 +0,0 @@
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use log::{debug, error};
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::monitoring::{
|
||||
alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
|
||||
grafana::helm::helm_grafana::grafana_helm_chart_score,
|
||||
kube_prometheus::types::{AlertManagerAdditionalPromRules, AlertManagerChannelConfig},
|
||||
},
|
||||
score::Score,
|
||||
topology::{
|
||||
HelmCommand, Topology,
|
||||
installable::Installable,
|
||||
oberservability::monitoring::{AlertReceiver, AlertRule, AlertSender},
|
||||
tenant::TenantManager,
|
||||
},
|
||||
};
|
||||
|
||||
use super::{
|
||||
helm::prometheus_helm::prometheus_helm_chart_score, prometheus_config::PrometheusConfig,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Prometheus {
|
||||
pub config: Arc<Mutex<PrometheusConfig>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertSender for Prometheus {
|
||||
fn name(&self) -> String {
|
||||
"Prometheus".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Prometheus {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl Prometheus {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
config: Arc::new(Mutex::new(PrometheusConfig::new())),
|
||||
}
|
||||
}
|
||||
pub async fn configure_with_topology<T: TenantManager>(&self, topology: &T) {
|
||||
let ns = topology
|
||||
.get_tenant_config()
|
||||
.await
|
||||
.map(|cfg| cfg.name.clone())
|
||||
.unwrap_or_else(|| "monitoring".to_string());
|
||||
error!("This must be refactored, see comments in pr #74");
|
||||
debug!("NS: {}", ns);
|
||||
self.config.lock().unwrap().namespace = Some(ns);
|
||||
}
|
||||
|
||||
pub async fn install_receiver(
|
||||
&self,
|
||||
prometheus_receiver: &dyn PrometheusReceiver,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let prom_receiver = prometheus_receiver.configure_receiver().await;
|
||||
debug!(
|
||||
"adding alert receiver to prometheus config: {:#?}",
|
||||
&prom_receiver
|
||||
);
|
||||
let mut config = self.config.lock().unwrap();
|
||||
|
||||
config.alert_receiver_configs.push(prom_receiver);
|
||||
let prom_receiver_name = prometheus_receiver.name();
|
||||
debug!("installed alert receiver {}", &prom_receiver_name);
|
||||
Ok(Outcome::success(format!(
|
||||
"Sucessfully installed receiver {}",
|
||||
prom_receiver_name
|
||||
)))
|
||||
}
|
||||
|
||||
pub async fn install_rule(
|
||||
&self,
|
||||
prometheus_rule: &AlertManagerRuleGroup,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let prometheus_rule = prometheus_rule.configure_rule().await;
|
||||
let mut config = self.config.lock().unwrap();
|
||||
|
||||
config.alert_rules.push(prometheus_rule.clone());
|
||||
Ok(Outcome::success(format!(
|
||||
"Successfully installed alert rule: {:#?},",
|
||||
prometheus_rule
|
||||
)))
|
||||
}
|
||||
|
||||
pub async fn install_prometheus<T: Topology + HelmCommand + Send + Sync>(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
prometheus_helm_chart_score(self.config.clone())
|
||||
.interpret(inventory, topology)
|
||||
.await
|
||||
}
|
||||
pub async fn install_grafana<T: Topology + HelmCommand + Send + Sync>(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let namespace = {
|
||||
let config = self.config.lock().unwrap();
|
||||
config.namespace.clone()
|
||||
};
|
||||
|
||||
if let Some(ns) = namespace.as_deref() {
|
||||
grafana_helm_chart_score(ns, false)
|
||||
.interpret(inventory, topology)
|
||||
.await
|
||||
} else {
|
||||
Err(InterpretError::new(
|
||||
"could not install grafana, missing namespace".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
#[async_trait]
|
||||
impl<T: Topology + HelmCommand + TenantManager> Installable<T> for Prometheus {
|
||||
async fn configure(&self, _inventory: &Inventory, topology: &T) -> Result<(), InterpretError> {
|
||||
self.configure_with_topology(topology).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn ensure_installed(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<(), InterpretError> {
|
||||
self.install_prometheus(inventory, topology).await?;
|
||||
|
||||
let install_grafana = {
|
||||
let config = self.config.lock().unwrap();
|
||||
config.grafana
|
||||
};
|
||||
|
||||
if install_grafana {
|
||||
self.install_grafana(inventory, topology).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait PrometheusReceiver: Send + Sync + std::fmt::Debug {
|
||||
fn name(&self) -> String;
|
||||
async fn configure_receiver(&self) -> AlertManagerChannelConfig;
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertReceiver<Prometheus>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Box<dyn AlertReceiver<Prometheus>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait PrometheusRule: Send + Sync + std::fmt::Debug {
|
||||
fn name(&self) -> String;
|
||||
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules;
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertRule<Prometheus>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Box<dyn AlertRule<Prometheus>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
modules::monitoring::{
|
||||
kube_prometheus::types::ServiceMonitor,
|
||||
prometheus::{Prometheus, helm::prometheus_config::PrometheusConfig},
|
||||
},
|
||||
score::Score,
|
||||
topology::{
|
||||
Topology,
|
||||
monitoring::{AlertReceiver, AlertRule, AlertingInterpret, Observability, ScrapeTarget},
|
||||
},
|
||||
};
|
||||
|
||||
//TODO untested
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct PrometheusAlertingScore {
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>,
|
||||
pub rules: Vec<Box<dyn AlertRule<Prometheus>>>,
|
||||
pub scrape_targets: Option<Vec<Box<dyn ScrapeTarget<Prometheus>>>>,
|
||||
pub service_monitors: Vec<ServiceMonitor>,
|
||||
pub config: Arc<Mutex<PrometheusConfig>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + Observability<Prometheus>> Score<T> for PrometheusAlertingScore {
|
||||
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
|
||||
//TODO test that additional service monitor is added
|
||||
self.config
|
||||
.try_lock()
|
||||
.expect("couldn't lock config")
|
||||
.additional_service_monitors = self.service_monitors.clone();
|
||||
|
||||
Box::new(AlertingInterpret {
|
||||
sender: Prometheus {
|
||||
config: self.config.clone(),
|
||||
},
|
||||
receivers: self.receivers.clone(),
|
||||
rules: self.rules.clone(),
|
||||
scrape_targets: self.scrape_targets.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"HelmPrometheusAlertingScore".to_string()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
use kube::api::ObjectMeta;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::{
|
||||
k8s::resource::K8sResourceScore,
|
||||
monitoring::{
|
||||
kube_prometheus::crd::crd_alertmanager_config::{
|
||||
AlertmanagerConfig, AlertmanagerConfigSpec,
|
||||
},
|
||||
prometheus::Prometheus,
|
||||
},
|
||||
},
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, monitoring::AlertReceiver},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct PrometheusReceiverScore {
|
||||
pub sender: Prometheus,
|
||||
pub receiver: Box<dyn AlertReceiver<Prometheus>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for PrometheusReceiverScore {
|
||||
fn name(&self) -> String {
|
||||
"PrometheusReceiverScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
let name = self.receiver.name();
|
||||
let namespace = self.sender.config.lock().unwrap().namespace.clone();
|
||||
let route = self.receiver.build_route().expect(&format!(
|
||||
"failed to build route for receveiver {}",
|
||||
name.clone()
|
||||
));
|
||||
|
||||
let receiver = self.receiver.build_receiver().expect(&format!(
|
||||
"failed to build receiver path for receiver {}",
|
||||
name.clone()
|
||||
));
|
||||
|
||||
let data = serde_json::json!({
|
||||
"route": route,
|
||||
"receivers": [receiver]
|
||||
});
|
||||
|
||||
let alertmanager_config = AlertmanagerConfig {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(name),
|
||||
namespace: namespace.clone(),
|
||||
..Default::default()
|
||||
},
|
||||
spec: AlertmanagerConfigSpec { data: data },
|
||||
};
|
||||
|
||||
K8sResourceScore::single(alertmanager_config, namespace).create_interpret()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
use async_trait::async_trait;
|
||||
use harmony_types::id::Id;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::monitoring::prometheus::Prometheus,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology},
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct PrometheusEnsureReadyScore {
|
||||
pub sender: Prometheus,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for PrometheusEnsureReadyScore {
|
||||
fn name(&self) -> String {
|
||||
"PrometheusEnsureReadyScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(PrometheusEnsureReadyInterpret {
|
||||
sender: self.sender.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct PrometheusEnsureReadyInterpret {
|
||||
pub sender: Prometheus,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient> Interpret<T> for PrometheusEnsureReadyInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let client = topology.k8s_client().await?;
|
||||
let namespace = self
|
||||
.sender
|
||||
.config
|
||||
.lock()
|
||||
.unwrap()
|
||||
.namespace
|
||||
.clone()
|
||||
.unwrap_or("default".to_string());
|
||||
|
||||
let prometheus_name = "prometheues-prometheus-operator";
|
||||
|
||||
client
|
||||
.wait_until_deployment_ready(prometheus_name, Some(&namespace), None)
|
||||
.await?;
|
||||
|
||||
Ok(Outcome::success(format!(
|
||||
"deployment: {} ready in ns: {}",
|
||||
prometheus_name, namespace
|
||||
)))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
use async_trait::async_trait;
|
||||
use harmony_types::id::Id;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::monitoring::prometheus::{
|
||||
Prometheus, helm::prometheus_helm::prometheus_helm_chart_score,
|
||||
},
|
||||
score::Score,
|
||||
topology::{HelmCommand, K8sclient, Topology},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct PrometheusInstallScore {
|
||||
pub sender: Prometheus,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient + HelmCommand> Score<T> for PrometheusInstallScore {
|
||||
fn name(&self) -> String {
|
||||
"PrometheusInstallScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(PrometheusInstallInterpret {
|
||||
score: self.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct PrometheusInstallInterpret {
|
||||
score: PrometheusInstallScore,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient + HelmCommand> Interpret<T> for PrometheusInstallInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
//TODO add interpret to install service monitors CRD from sender Prometheus
|
||||
let score = prometheus_helm_chart_score(self.score.sender.config.clone());
|
||||
score.create_interpret().execute(inventory, topology).await
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Custom("PrometheusInstallInterpret")
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
use kube::api::ObjectMeta;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::{
|
||||
k8s::resource::K8sResourceScore,
|
||||
monitoring::{
|
||||
kube_prometheus::crd::crd_prometheus_rules::{
|
||||
PrometheusRule, PrometheusRuleSpec, RuleGroup,
|
||||
},
|
||||
prometheus::Prometheus,
|
||||
},
|
||||
},
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, monitoring::AlertRule},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct PrometheusRuleScore {
|
||||
pub sender: Prometheus,
|
||||
pub rule: Box<dyn AlertRule<Prometheus>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for PrometheusRuleScore {
|
||||
fn name(&self) -> String {
|
||||
"PrometheusRuleScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
let name = self.rule.name();
|
||||
let namespace = self.sender.config.lock().unwrap().namespace.clone();
|
||||
let groups: Vec<RuleGroup> =
|
||||
serde_json::from_value(self.rule.build_rule().expect("failed to build alert rule"))
|
||||
.expect("failed to serialize rule group");
|
||||
|
||||
let prometheus_rule = PrometheusRule {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(name.clone()),
|
||||
namespace: namespace.clone(),
|
||||
..Default::default()
|
||||
},
|
||||
|
||||
spec: PrometheusRuleSpec { groups },
|
||||
};
|
||||
K8sResourceScore::single(prometheus_rule, namespace).create_interpret()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
use kube::api::ObjectMeta;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::topology::monitoring::AlertRule;
|
||||
use crate::{
|
||||
interpret::Interpret,
|
||||
modules::{
|
||||
k8s::resource::K8sResourceScore,
|
||||
monitoring::{
|
||||
kube_prometheus::crd::crd_scrape_config::{
|
||||
ScrapeConfig, ScrapeConfigSpec, StaticConfig,
|
||||
},
|
||||
prometheus::Prometheus,
|
||||
},
|
||||
},
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, monitoring::ScrapeTarget},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct PrometheusScrapeTargetScore {
|
||||
pub sender: Prometheus,
|
||||
pub scrape_target: Box<dyn ScrapeTarget<Prometheus>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for PrometheusScrapeTargetScore {
|
||||
fn name(&self) -> String {
|
||||
"PrometheusScrapeTargetScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
let name = self.scrape_target.name();
|
||||
let namespace = self.sender.config.lock().unwrap().namespace.clone();
|
||||
|
||||
let external_target = self
|
||||
.scrape_target
|
||||
.build_scrape_target()
|
||||
.expect("failed to build external scrape target");
|
||||
|
||||
//TODO this may need to modified to include a scrapeConfigSelector label from the
|
||||
//prometheus operator
|
||||
let labels = external_target.labels;
|
||||
|
||||
let scrape_target = ScrapeConfig {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(name.clone()),
|
||||
namespace: namespace.clone(),
|
||||
..Default::default()
|
||||
},
|
||||
spec: ScrapeConfigSpec {
|
||||
static_configs: Some(vec![StaticConfig {
|
||||
targets: vec![format!("{}:{}", external_target.ip, external_target.port)],
|
||||
labels,
|
||||
}]),
|
||||
metrics_path: external_target.path,
|
||||
scrape_interval: external_target.interval,
|
||||
job_name: Some(name),
|
||||
..Default::default()
|
||||
},
|
||||
};
|
||||
|
||||
K8sResourceScore::single(scrape_target, namespace).create_interpret()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
pub mod rhob_alertmanager_config;
|
||||
pub mod rhob_alertmanagers;
|
||||
pub mod rhob_default_rules;
|
||||
pub mod rhob_monitoring_stack;
|
||||
pub mod rhob_prometheus_rules;
|
||||
pub mod rhob_prometheuses;
|
||||
pub mod rhob_role;
|
||||
pub mod rhob_service_monitor;
|
||||
@@ -0,0 +1,17 @@
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema, Default)]
|
||||
#[kube(
|
||||
group = "monitoring.rhobs",
|
||||
version = "v1alpha1",
|
||||
kind = "AlertmanagerConfig",
|
||||
plural = "alertmanagerconfigs",
|
||||
namespaced,
|
||||
derive = "Default"
|
||||
)]
|
||||
pub struct AlertmanagerConfigSpec {
|
||||
#[serde(flatten)]
|
||||
pub data: serde_json::Value,
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user