Xwip: uses a helm chart to deploy a prometheus operator if crd are ont present in cluster, and deploys a grafana operator.
All checks were successful
Run Check Script / check (pull_request) Successful in -32s
All checks were successful
Run Check Script / check (pull_request) Successful in -32s
added a sample dashboard and prometheus data source to grafana
This commit is contained in:
parent
114219385f
commit
1d8b503bd2
@ -73,6 +73,6 @@ pub trait AlertRule<S: AlertSender>: std::fmt::Debug + Send + Sync {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
pub trait ScrapeTarger<S: AlertSender> {
|
pub trait ScrapeTarget<S: AlertSender> {
|
||||||
async fn install(&self, sender: &S) -> Result<(), InterpretError>;
|
async fn install(&self, sender: &S) -> Result<(), InterpretError>;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,32 +1,23 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use crate::modules::monitoring::application_monitoring::helm_prometheus_application_alerting::HelmPrometheusApplicationAlertingScore;
|
||||||
|
use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::CRDAlertManagerReceiver;
|
||||||
use crate::modules::monitoring::kube_prometheus::crd::crd_default_rules::{
|
use crate::modules::monitoring::kube_prometheus::crd::crd_default_rules::{
|
||||||
build_rule_container_restarting, build_rule_pod_failed,
|
build_rule_container_restarting, build_rule_pod_failed,
|
||||||
};
|
};
|
||||||
use crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::RuleGroup;
|
use crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::RuleGroup;
|
||||||
use crate::modules::monitoring::kube_prometheus::service_monitor::{
|
use crate::modules::monitoring::kube_prometheus::crd::service_monitor::{
|
||||||
ServiceMonitor, ServiceMonitorSpec,
|
ServiceMonitor, ServiceMonitorSpec,
|
||||||
};
|
};
|
||||||
use crate::modules::monitoring::kube_prometheus::types::{Selector, ServiceMonitorEndpoint};
|
use crate::modules::monitoring::kube_prometheus::types::ServiceMonitorEndpoint;
|
||||||
use crate::{
|
use crate::{
|
||||||
inventory::Inventory,
|
inventory::Inventory,
|
||||||
modules::{
|
modules::{
|
||||||
application::{Application, ApplicationFeature, OCICompliant},
|
application::{Application, ApplicationFeature, OCICompliant},
|
||||||
monitoring::{
|
monitoring::{alert_channel::webhook_receiver::WebhookReceiver, ntfy::ntfy::NtfyScore},
|
||||||
alert_channel::webhook_receiver::WebhookReceiver,
|
|
||||||
kube_prometheus::{
|
|
||||||
alert_manager_config::{CRDAlertManager, CRDAlertManagerReceiver},
|
|
||||||
helm_prometheus_application_alerting::HelmPrometheusApplicationAlertingScore,
|
|
||||||
types::{NamespaceSelector, ServiceMonitor as KubePrometheusServiceMonitor},
|
|
||||||
},
|
|
||||||
ntfy::ntfy::NtfyScore,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
score::Score,
|
score::Score,
|
||||||
topology::{
|
topology::{HelmCommand, K8sclient, Topology, Url, tenant::TenantManager},
|
||||||
HelmCommand, K8sclient, Topology, Url, oberservability::monitoring::AlertReceiver,
|
|
||||||
tenant::TenantManager,
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use base64::{Engine as _, engine::general_purpose};
|
use base64::{Engine as _, engine::general_purpose};
|
||||||
@ -103,18 +94,32 @@ impl<T: Topology + HelmCommand + 'static + TenantManager + K8sclient + std::fmt:
|
|||||||
|
|
||||||
//TODO add service monitors to PrometheusApplicationMonitoring which can be
|
//TODO add service monitors to PrometheusApplicationMonitoring which can be
|
||||||
//deployed for the namespace using prometheus crd-servicemonitors
|
//deployed for the namespace using prometheus crd-servicemonitors
|
||||||
let service_monitor = ServiceMonitor {
|
let mut service_monitor = ServiceMonitor {
|
||||||
metadata: ObjectMeta {
|
metadata: ObjectMeta {
|
||||||
name: Some(self.application.name().clone()),
|
name: Some(self.application.name().clone()),
|
||||||
labels: Some(std::collections::BTreeMap::from([(
|
labels: Some(std::collections::BTreeMap::from([
|
||||||
"alertmanagerConfig".to_string(),
|
("alertmanagerConfig".to_string(), "enabled".to_string()),
|
||||||
"enabled".to_string(),
|
("client".to_string(), "prometheus".to_string()),
|
||||||
)])),
|
(
|
||||||
|
"app.kubernetes.io/name".to_string(),
|
||||||
|
"kube-state-metrics".to_string(),
|
||||||
|
),
|
||||||
|
])),
|
||||||
namespace: Some(namespace),
|
namespace: Some(namespace),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
},
|
},
|
||||||
spec: ServiceMonitorSpec::default(),
|
spec: ServiceMonitorSpec::default(),
|
||||||
};
|
};
|
||||||
|
let service_mon_endpoint = ServiceMonitorEndpoint {
|
||||||
|
port: Some("http".into()),
|
||||||
|
interval: Some("30s".into()),
|
||||||
|
path: Some("/metrics".into()),
|
||||||
|
scheme: None,
|
||||||
|
relabelings: vec![],
|
||||||
|
metric_relabelings: vec![],
|
||||||
|
};
|
||||||
|
|
||||||
|
service_monitor.spec.endpoints.push(service_mon_endpoint);
|
||||||
|
|
||||||
alerting_score.service_monitors.push(service_monitor);
|
alerting_score.service_monitors.push(service_monitor);
|
||||||
|
|
||||||
|
|||||||
@ -10,13 +10,14 @@ use serde::Serialize;
|
|||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
use serde_yaml::{Mapping, Value};
|
use serde_yaml::{Mapping, Value};
|
||||||
|
|
||||||
use crate::modules::monitoring::kube_prometheus::alert_manager_config::AlertmanagerConfigSpec;
|
use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::{
|
||||||
|
AlertmanagerConfig, AlertmanagerConfigSpec, CRDAlertManager, CRDAlertManagerReceiver,
|
||||||
|
};
|
||||||
use crate::topology::k8s::K8sClient;
|
use crate::topology::k8s::K8sClient;
|
||||||
use crate::{
|
use crate::{
|
||||||
interpret::{InterpretError, Outcome},
|
interpret::{InterpretError, Outcome},
|
||||||
modules::monitoring::{
|
modules::monitoring::{
|
||||||
kube_prometheus::{
|
kube_prometheus::{
|
||||||
alert_manager_config::{AlertmanagerConfig, CRDAlertManager, CRDAlertManagerReceiver},
|
|
||||||
prometheus::{KubePrometheus, KubePrometheusReceiver},
|
prometheus::{KubePrometheus, KubePrometheusReceiver},
|
||||||
types::{AlertChannelConfig, AlertManagerChannelConfig},
|
types::{AlertChannelConfig, AlertManagerChannelConfig},
|
||||||
},
|
},
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
use std::{collections::BTreeMap, sync::Arc};
|
use std::sync::Arc;
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use k8s_openapi::api::core::v1::Secret;
|
use k8s_openapi::api::core::v1::Secret;
|
||||||
@ -12,7 +12,7 @@ use crate::{
|
|||||||
interpret::{InterpretError, Outcome},
|
interpret::{InterpretError, Outcome},
|
||||||
modules::monitoring::{
|
modules::monitoring::{
|
||||||
kube_prometheus::{
|
kube_prometheus::{
|
||||||
alert_manager_config::{
|
crd::crd_alertmanager_config::{
|
||||||
AlertmanagerConfig, AlertmanagerConfigSpec, CRDAlertManager,
|
AlertmanagerConfig, AlertmanagerConfigSpec, CRDAlertManager,
|
||||||
CRDAlertManagerReceiver,
|
CRDAlertManagerReceiver,
|
||||||
},
|
},
|
||||||
@ -37,24 +37,6 @@ impl CRDAlertManagerReceiver for WebhookReceiver {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn configure_receiver(&self, client: &Arc<K8sClient>, ns: String) -> AlertmanagerConfig {
|
async fn configure_receiver(&self, client: &Arc<K8sClient>, ns: String) -> AlertmanagerConfig {
|
||||||
// let secret_name = format!("{}-secret", self.name.clone());
|
|
||||||
// let webhook_key = format!("{}", self.url.clone());
|
|
||||||
//
|
|
||||||
// let mut string_data = BTreeMap::new();
|
|
||||||
// string_data.insert("webhook-url".to_string(), webhook_key.clone());
|
|
||||||
//
|
|
||||||
// let secret = Secret {
|
|
||||||
// metadata: kube::core::ObjectMeta {
|
|
||||||
// name: Some(secret_name.clone()),
|
|
||||||
// ..Default::default()
|
|
||||||
// },
|
|
||||||
// string_data: Some(string_data),
|
|
||||||
// type_: Some("Opaque".to_string()),
|
|
||||||
// ..Default::default()
|
|
||||||
// };
|
|
||||||
//
|
|
||||||
// let _ = client.apply(&secret, Some(&ns)).await;
|
|
||||||
|
|
||||||
let spec = AlertmanagerConfigSpec {
|
let spec = AlertmanagerConfigSpec {
|
||||||
data: json!({
|
data: json!({
|
||||||
"route": {
|
"route": {
|
||||||
|
|||||||
@ -0,0 +1,556 @@
|
|||||||
|
use std::fs;
|
||||||
|
use std::{collections::BTreeMap, sync::Arc};
|
||||||
|
use tempfile::tempdir;
|
||||||
|
use tokio::io::AsyncWriteExt;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use kube::api::ObjectMeta;
|
||||||
|
use log::{debug, info};
|
||||||
|
use serde::Serialize;
|
||||||
|
use tokio::process::Command;
|
||||||
|
|
||||||
|
use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::{
|
||||||
|
AlertmanagerConfig, CRDAlertManager, CRDAlertManagerReceiver,
|
||||||
|
};
|
||||||
|
use crate::modules::monitoring::kube_prometheus::crd::crd_grafana::{
|
||||||
|
Grafana, GrafanaDashboard, GrafanaDashboardSpec, GrafanaDatasource, GrafanaDatasourceConfig,
|
||||||
|
GrafanaDatasourceSpec, GrafanaSpec,
|
||||||
|
};
|
||||||
|
use crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::{
|
||||||
|
PrometheusRule, PrometheusRuleSpec, RuleGroup,
|
||||||
|
};
|
||||||
|
use crate::modules::monitoring::kube_prometheus::crd::service_monitor::ServiceMonitor;
|
||||||
|
use crate::topology::{K8sclient, Topology, k8s::K8sClient};
|
||||||
|
use crate::{
|
||||||
|
data::{Id, Version},
|
||||||
|
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||||
|
inventory::Inventory,
|
||||||
|
modules::monitoring::kube_prometheus::crd::{
|
||||||
|
crd_alertmanagers::{Alertmanager, AlertmanagerSpec},
|
||||||
|
crd_prometheuses::{
|
||||||
|
AlertmanagerEndpoints, LabelSelector, Prometheus, PrometheusSpec,
|
||||||
|
PrometheusSpecAlerting,
|
||||||
|
},
|
||||||
|
role::{build_prom_role, build_prom_rolebinding, build_prom_service_account},
|
||||||
|
},
|
||||||
|
score::Score,
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize)]
|
||||||
|
pub struct HelmPrometheusApplicationAlertingScore {
|
||||||
|
pub namespace: String,
|
||||||
|
pub receivers: Vec<Box<dyn CRDAlertManagerReceiver>>,
|
||||||
|
pub service_monitors: Vec<ServiceMonitor>,
|
||||||
|
pub prometheus_rules: Vec<RuleGroup>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Topology + K8sclient> Score<T> for HelmPrometheusApplicationAlertingScore {
|
||||||
|
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
|
||||||
|
Box::new(HelmPrometheusApplicationAlertingInterpret {
|
||||||
|
namespace: self.namespace.clone(),
|
||||||
|
receivers: self.receivers.clone(),
|
||||||
|
service_monitors: self.service_monitors.clone(),
|
||||||
|
prometheus_rules: self.prometheus_rules.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn name(&self) -> String {
|
||||||
|
"HelmPrometheusApplicationAlertingScore".into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct HelmPrometheusApplicationAlertingInterpret {
|
||||||
|
pub namespace: String,
|
||||||
|
pub receivers: Vec<Box<dyn CRDAlertManagerReceiver>>,
|
||||||
|
pub service_monitors: Vec<ServiceMonitor>,
|
||||||
|
pub prometheus_rules: Vec<RuleGroup>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl<T: Topology + K8sclient> Interpret<T> for HelmPrometheusApplicationAlertingInterpret {
|
||||||
|
async fn execute(
|
||||||
|
&self,
|
||||||
|
_inventory: &Inventory,
|
||||||
|
topology: &T,
|
||||||
|
) -> Result<Outcome, InterpretError> {
|
||||||
|
let client = topology.k8s_client().await.unwrap();
|
||||||
|
self.ensure_prometheus_operator().await?;
|
||||||
|
self.ensure_grafana_operator().await?;
|
||||||
|
self.install_prometheus(&client).await?;
|
||||||
|
self.install_alert_manager(&client).await?;
|
||||||
|
self.install_grafana(&client).await?;
|
||||||
|
self.install_receivers(&self.receivers, &client).await?;
|
||||||
|
self.install_rules(&self.prometheus_rules, &client).await?;
|
||||||
|
self.install_monitors(self.service_monitors.clone(), &client)
|
||||||
|
.await?;
|
||||||
|
Ok(Outcome::success(format!(
|
||||||
|
"deployed application monitoring composants channels"
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_name(&self) -> InterpretName {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_version(&self) -> Version {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_status(&self) -> InterpretStatus {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_children(&self) -> Vec<Id> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl HelmPrometheusApplicationAlertingInterpret {
|
||||||
|
async fn crd_exists(&self, crd: &str) -> bool {
|
||||||
|
let output = Command::new("kubectl")
|
||||||
|
.args(["get", "crd", crd])
|
||||||
|
.output()
|
||||||
|
.await;
|
||||||
|
|
||||||
|
matches!(output, Ok(o) if o.status.success())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn ensure_prometheus_operator(&self) -> Result<Outcome, InterpretError> {
|
||||||
|
if self.crd_exists("prometheuses.monitoring.coreos.com").await {
|
||||||
|
debug!("Prometheus CRDs already exist — skipping install.");
|
||||||
|
return Ok(Outcome::success(
|
||||||
|
"Prometheus CRDs already exist".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let temp_dir =
|
||||||
|
tempdir().map_err(|e| InterpretError::new(format!("Tempdir error: {}", e)))?;
|
||||||
|
let temp_path = temp_dir.path().to_path_buf();
|
||||||
|
debug!("Using temp directory: {}", temp_path.display());
|
||||||
|
|
||||||
|
let pull_output = Command::new("helm")
|
||||||
|
.args(&[
|
||||||
|
"pull",
|
||||||
|
"oci://hub.nationtech.io/harmony/nt-prometheus-operator",
|
||||||
|
"--destination",
|
||||||
|
temp_path.to_str().unwrap(),
|
||||||
|
])
|
||||||
|
.output()
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(format!("Helm pull error: {}", e)))?;
|
||||||
|
|
||||||
|
if !pull_output.status.success() {
|
||||||
|
return Err(InterpretError::new(format!(
|
||||||
|
"Helm pull failed: {}",
|
||||||
|
String::from_utf8_lossy(&pull_output.stderr)
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let tgz_path = fs::read_dir(&temp_path)
|
||||||
|
.unwrap()
|
||||||
|
.filter_map(|entry| {
|
||||||
|
let entry = entry.ok()?;
|
||||||
|
let path = entry.path();
|
||||||
|
if path.extension()? == "tgz" {
|
||||||
|
Some(path)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| InterpretError::new("Could not find pulled Helm chart".into()))?;
|
||||||
|
|
||||||
|
debug!("Installing chart from: {}", tgz_path.display());
|
||||||
|
|
||||||
|
let install_output = Command::new("helm")
|
||||||
|
.args(&[
|
||||||
|
"install",
|
||||||
|
"nt-prometheus-operator",
|
||||||
|
tgz_path.to_str().unwrap(),
|
||||||
|
"--namespace",
|
||||||
|
&self.namespace,
|
||||||
|
"--create-namespace",
|
||||||
|
"--wait",
|
||||||
|
"--atomic",
|
||||||
|
])
|
||||||
|
.output()
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(format!("Helm install error: {}", e)))?;
|
||||||
|
|
||||||
|
if !install_output.status.success() {
|
||||||
|
return Err(InterpretError::new(format!(
|
||||||
|
"Helm install failed: {}",
|
||||||
|
String::from_utf8_lossy(&install_output.stderr)
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"Installed prometheus operator in namespace: {}",
|
||||||
|
self.namespace
|
||||||
|
);
|
||||||
|
Ok(Outcome::success(format!(
|
||||||
|
"Installed prometheus operator in namespace {}",
|
||||||
|
self.namespace
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn ensure_grafana_operator(&self) -> Result<Outcome, InterpretError> {
|
||||||
|
if self.crd_exists("grafanas.grafana.integreatly.org").await {
|
||||||
|
debug!("grafana CRDs already exist — skipping install.");
|
||||||
|
return Ok(Outcome::success("Grafana CRDs already exist".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let _ = Command::new("helm")
|
||||||
|
.args(&[
|
||||||
|
"repo",
|
||||||
|
"add",
|
||||||
|
"grafana-operator",
|
||||||
|
"https://grafana.github.io/helm-charts",
|
||||||
|
])
|
||||||
|
.output()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let _ = Command::new("helm")
|
||||||
|
.args(&["repo", "update"])
|
||||||
|
.output()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let _ = Command::new("helm")
|
||||||
|
.args(&[
|
||||||
|
"install",
|
||||||
|
"grafana-operator",
|
||||||
|
"grafana-operator/grafana-operator",
|
||||||
|
"--namespace",
|
||||||
|
&self.namespace,
|
||||||
|
"--create-namespace",
|
||||||
|
])
|
||||||
|
.output()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
Ok(Outcome::success(format!(
|
||||||
|
"installed grafana operator in ns {}",
|
||||||
|
self.namespace.clone()
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
async fn install_prometheus(&self, client: &Arc<K8sClient>) -> Result<Outcome, InterpretError> {
|
||||||
|
debug!(
|
||||||
|
"installing crd-prometheuses in namespace {}",
|
||||||
|
self.namespace.clone()
|
||||||
|
);
|
||||||
|
debug!("building role/rolebinding/serviceaccount for crd-prometheus");
|
||||||
|
let rolename = format!("{}-prom", self.namespace.clone());
|
||||||
|
let sa_name = format!("{}-prom-sa", self.namespace.clone());
|
||||||
|
let role = build_prom_role(rolename.clone(), self.namespace.clone());
|
||||||
|
let rolebinding =
|
||||||
|
build_prom_rolebinding(rolename.clone(), self.namespace.clone(), sa_name.clone());
|
||||||
|
let sa = build_prom_service_account(sa_name.clone(), self.namespace.clone());
|
||||||
|
let prom_spec = PrometheusSpec {
|
||||||
|
alerting: Some(PrometheusSpecAlerting {
|
||||||
|
alertmanagers: Some(vec![AlertmanagerEndpoints {
|
||||||
|
name: Some(format!("alertmanager-operated")),
|
||||||
|
namespace: Some(format!("{}", self.namespace.clone())),
|
||||||
|
port: Some("web".into()),
|
||||||
|
scheme: Some("http".into()),
|
||||||
|
}]),
|
||||||
|
}),
|
||||||
|
service_account_name: sa_name.clone(),
|
||||||
|
service_monitor_namespace_selector: Some(LabelSelector {
|
||||||
|
match_labels: BTreeMap::from([(
|
||||||
|
"kubernetes.io/metadata.name".to_string(),
|
||||||
|
format!("{}", self.namespace.clone()),
|
||||||
|
)]),
|
||||||
|
match_expressions: vec![],
|
||||||
|
}),
|
||||||
|
service_monitor_selector: Some(LabelSelector {
|
||||||
|
match_labels: BTreeMap::from([("client".to_string(), "prometheus".to_string())]),
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
|
||||||
|
service_discovery_role: Some("Endpoints".into()),
|
||||||
|
|
||||||
|
pod_monitor_selector: Some(LabelSelector {
|
||||||
|
match_labels: BTreeMap::from([("client".to_string(), "prometheus".to_string())]),
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
|
||||||
|
rule_selector: Some(LabelSelector {
|
||||||
|
match_labels: BTreeMap::from([("role".to_string(), "prometheus-rule".to_string())]),
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
|
||||||
|
rule_namespace_selector: Some(LabelSelector {
|
||||||
|
match_labels: BTreeMap::from([(
|
||||||
|
"kubernetes.io/metadata.name".to_string(),
|
||||||
|
format!("{}", self.namespace.clone()),
|
||||||
|
)]),
|
||||||
|
match_expressions: vec![],
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
let prom = Prometheus {
|
||||||
|
metadata: ObjectMeta {
|
||||||
|
name: Some(self.namespace.clone()),
|
||||||
|
labels: Some(std::collections::BTreeMap::from([(
|
||||||
|
"alertmanagerConfig".to_string(),
|
||||||
|
"enabled".to_string(),
|
||||||
|
)])),
|
||||||
|
namespace: Some(self.namespace.clone()),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
spec: prom_spec,
|
||||||
|
};
|
||||||
|
client
|
||||||
|
.apply(&role, Some(&self.namespace.clone()))
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||||
|
info!(
|
||||||
|
"installed prometheus role: {:#?} in ns {:#?}",
|
||||||
|
role.metadata.name.unwrap(),
|
||||||
|
role.metadata.namespace.unwrap()
|
||||||
|
);
|
||||||
|
client
|
||||||
|
.apply(&rolebinding, Some(&self.namespace.clone()))
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||||
|
info!(
|
||||||
|
"installed prometheus rolebinding: {:#?} in ns {:#?}",
|
||||||
|
rolebinding.metadata.name.unwrap(),
|
||||||
|
rolebinding.metadata.namespace.unwrap()
|
||||||
|
);
|
||||||
|
client
|
||||||
|
.apply(&sa, Some(&self.namespace.clone()))
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||||
|
info!(
|
||||||
|
"installed prometheus service account: {:#?} in ns {:#?}",
|
||||||
|
sa.metadata.name.unwrap(),
|
||||||
|
sa.metadata.namespace.unwrap()
|
||||||
|
);
|
||||||
|
client
|
||||||
|
.apply(&prom, Some(&self.namespace.clone()))
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||||
|
info!(
|
||||||
|
"installed prometheus: {:#?} in ns {:#?}",
|
||||||
|
&prom.metadata.name.clone().unwrap(),
|
||||||
|
&prom.metadata.namespace.clone().unwrap()
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(Outcome::success(format!(
|
||||||
|
"successfully deployed crd-prometheus {:#?}",
|
||||||
|
prom
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn install_alert_manager(
|
||||||
|
&self,
|
||||||
|
client: &Arc<K8sClient>,
|
||||||
|
) -> Result<Outcome, InterpretError> {
|
||||||
|
let am = Alertmanager {
|
||||||
|
metadata: ObjectMeta {
|
||||||
|
name: Some(self.namespace.clone()),
|
||||||
|
labels: Some(std::collections::BTreeMap::from([(
|
||||||
|
"alertmanagerConfig".to_string(),
|
||||||
|
"enabled".to_string(),
|
||||||
|
)])),
|
||||||
|
namespace: Some(self.namespace.clone()),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
spec: AlertmanagerSpec::default(),
|
||||||
|
};
|
||||||
|
client
|
||||||
|
.apply(&am, Some(&self.namespace.clone()))
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||||
|
Ok(Outcome::success(format!(
|
||||||
|
"successfully deployed service monitor {:#?}",
|
||||||
|
am.metadata.name
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
async fn install_monitors(
|
||||||
|
&self,
|
||||||
|
monitors: Vec<ServiceMonitor>,
|
||||||
|
client: &Arc<K8sClient>,
|
||||||
|
) -> Result<Outcome, InterpretError> {
|
||||||
|
for monitor in monitors.iter() {
|
||||||
|
client
|
||||||
|
.apply(monitor, Some(&self.namespace.clone()))
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||||
|
}
|
||||||
|
Ok(Outcome::success(
|
||||||
|
"succesfully deployed service monitors".to_string(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn install_rules(
|
||||||
|
&self,
|
||||||
|
rules: &Vec<RuleGroup>,
|
||||||
|
client: &Arc<K8sClient>,
|
||||||
|
) -> Result<Outcome, InterpretError> {
|
||||||
|
let prom_rule_spec = PrometheusRuleSpec {
|
||||||
|
groups: rules.clone(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let prom_rules = PrometheusRule {
|
||||||
|
metadata: ObjectMeta {
|
||||||
|
name: Some(self.namespace.clone()),
|
||||||
|
labels: Some(std::collections::BTreeMap::from([
|
||||||
|
("alertmanagerConfig".to_string(), "enabled".to_string()),
|
||||||
|
("role".to_string(), "prometheus-rule".to_string()),
|
||||||
|
])),
|
||||||
|
namespace: Some(self.namespace.clone()),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
spec: prom_rule_spec,
|
||||||
|
};
|
||||||
|
client
|
||||||
|
.apply(&prom_rules, Some(&self.namespace))
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||||
|
Ok(Outcome::success(format!(
|
||||||
|
"successfully deployed rules {:#?}",
|
||||||
|
prom_rules.metadata.name
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn install_grafana(&self, client: &Arc<K8sClient>) -> Result<Outcome, InterpretError> {
|
||||||
|
let mut label = BTreeMap::new();
|
||||||
|
label.insert("dashboards".to_string(), "grafana".to_string());
|
||||||
|
let labels = LabelSelector {
|
||||||
|
match_labels: label.clone(),
|
||||||
|
match_expressions: vec![],
|
||||||
|
};
|
||||||
|
let mut json_data = BTreeMap::new();
|
||||||
|
json_data.insert("timeInterval".to_string(), "5s".to_string());
|
||||||
|
let namespace = self.namespace.clone();
|
||||||
|
|
||||||
|
let json = format!(
|
||||||
|
r#"{{
|
||||||
|
"title": "UP Status Dashboard",
|
||||||
|
"timezone": "browser",
|
||||||
|
"panels": [
|
||||||
|
{{
|
||||||
|
"type": "table",
|
||||||
|
"title": "Service UP Status",
|
||||||
|
"gridPos": {{ "x": 0, "y": 0, "w": 24, "h": 10 }},
|
||||||
|
"targets": [
|
||||||
|
{{
|
||||||
|
"expr": "up{{namespace=\"{namespace}\"}}",
|
||||||
|
"format": "table",
|
||||||
|
"refId": "A"
|
||||||
|
}}
|
||||||
|
],
|
||||||
|
"options": {{
|
||||||
|
"showHeader": true
|
||||||
|
}},
|
||||||
|
"fieldConfig": {{
|
||||||
|
"defaults": {{
|
||||||
|
"custom": {{}}
|
||||||
|
}},
|
||||||
|
"overrides": []
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
],
|
||||||
|
"schemaVersion": 30,
|
||||||
|
"version": 1
|
||||||
|
}}"#
|
||||||
|
);
|
||||||
|
|
||||||
|
let graf_data_source = GrafanaDatasource {
|
||||||
|
metadata: ObjectMeta {
|
||||||
|
name: Some(self.namespace.clone()),
|
||||||
|
namespace: Some(self.namespace.clone()),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
spec: GrafanaDatasourceSpec {
|
||||||
|
instance_selector: labels.clone(),
|
||||||
|
allow_cross_namespace_import: Some(false),
|
||||||
|
datasource: GrafanaDatasourceConfig {
|
||||||
|
access: "proxy".to_string(),
|
||||||
|
database: Some("prometheus".to_string()),
|
||||||
|
json_data: Some(json_data),
|
||||||
|
//this is fragile
|
||||||
|
name: format!("prometheus-{}-0", self.namespace.clone()),
|
||||||
|
r#type: "prometheus".to_string(),
|
||||||
|
url: format!(
|
||||||
|
"http://prometheus-operated.{}.svc.cluster.local:9090",
|
||||||
|
self.namespace.clone()
|
||||||
|
),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
client
|
||||||
|
.apply(&graf_data_source, Some(&self.namespace))
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||||
|
|
||||||
|
let graf_dashboard = GrafanaDashboard {
|
||||||
|
metadata: ObjectMeta {
|
||||||
|
name: Some(self.namespace.clone()),
|
||||||
|
namespace: Some(self.namespace.clone()),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
spec: GrafanaDashboardSpec {
|
||||||
|
resync_period: Some("30s".to_string()),
|
||||||
|
instance_selector: labels.clone(),
|
||||||
|
json,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
client
|
||||||
|
.apply(&graf_dashboard, Some(&self.namespace))
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||||
|
|
||||||
|
let grafana = Grafana {
|
||||||
|
metadata: ObjectMeta {
|
||||||
|
name: Some(self.namespace.clone()),
|
||||||
|
namespace: Some(self.namespace.clone()),
|
||||||
|
labels: Some(label.clone()),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
spec: GrafanaSpec {
|
||||||
|
config: None,
|
||||||
|
admin_user: None,
|
||||||
|
admin_password: None,
|
||||||
|
ingress: None,
|
||||||
|
persistence: None,
|
||||||
|
resources: None,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
client
|
||||||
|
.apply(&grafana, Some(&self.namespace))
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||||
|
Ok(Outcome::success(format!(
|
||||||
|
"successfully deployed grafana instance {:#?}",
|
||||||
|
grafana.metadata.name
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn install_receivers(
|
||||||
|
&self,
|
||||||
|
receivers: &Vec<Box<dyn CRDAlertManagerReceiver>>,
|
||||||
|
client: &Arc<K8sClient>,
|
||||||
|
) -> Result<Outcome, InterpretError> {
|
||||||
|
for receiver in receivers.iter() {
|
||||||
|
let alertmanager_config: AlertmanagerConfig = receiver
|
||||||
|
.configure_receiver(&client, self.namespace.clone())
|
||||||
|
.await;
|
||||||
|
let sender = CRDAlertManager {
|
||||||
|
alertmanager_configs: alertmanager_config,
|
||||||
|
namespace: self.namespace.clone(),
|
||||||
|
client: client.clone(),
|
||||||
|
};
|
||||||
|
receiver.install(&sender).await.map_err(|err| {
|
||||||
|
InterpretError::new(format!("failed to install receiver: {}", err))
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
Ok(Outcome::success(format!("successfully deployed receivers")))
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1 +1,2 @@
|
|||||||
|
pub mod helm_prometheus_application_alerting;
|
||||||
pub mod k8s_application_monitoring_score;
|
pub mod k8s_application_monitoring_score;
|
||||||
|
|||||||
@ -1,20 +1,13 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use kube::{CustomResource, api::ObjectMeta};
|
use kube::CustomResource;
|
||||||
use schemars::JsonSchema;
|
use schemars::JsonSchema;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::{
|
use crate::topology::{
|
||||||
interpret::{InterpretError, Outcome},
|
k8s::K8sClient,
|
||||||
inventory::Inventory,
|
oberservability::monitoring::{AlertReceiver, AlertSender},
|
||||||
topology::{
|
|
||||||
HelmCommand, K8sclient, Topology,
|
|
||||||
installable::Installable,
|
|
||||||
k8s::K8sClient,
|
|
||||||
oberservability::monitoring::{AlertReceiver, AlertSender},
|
|
||||||
tenant::TenantManager,
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||||
@ -4,6 +4,10 @@ use kube::CustomResource;
|
|||||||
use schemars::JsonSchema;
|
use schemars::JsonSchema;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::modules::monitoring::kube_prometheus::types::Operator;
|
||||||
|
|
||||||
|
use super::crd_prometheuses::LabelSelector;
|
||||||
|
|
||||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||||
#[kube(
|
#[kube(
|
||||||
group = "grafana.integreatly.org",
|
group = "grafana.integreatly.org",
|
||||||
@ -132,7 +136,7 @@ pub struct GrafanaDatasourceConfig {
|
|||||||
pub access: String,
|
pub access: String,
|
||||||
pub database: Option<String>,
|
pub database: Option<String>,
|
||||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
pub json_data: Option<BTreeMap<String, serde_json::Value>>,
|
pub json_data: Option<BTreeMap<String, String>>,
|
||||||
pub name: String,
|
pub name: String,
|
||||||
pub r#type: String,
|
pub r#type: String,
|
||||||
pub url: String,
|
pub url: String,
|
||||||
@ -140,25 +144,6 @@ pub struct GrafanaDatasourceConfig {
|
|||||||
|
|
||||||
// ------------------------------------------------------------------------------------------------
|
// ------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct LabelSelector {
|
|
||||||
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
|
|
||||||
pub match_labels: BTreeMap<String, String>,
|
|
||||||
|
|
||||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
|
||||||
pub match_expressions: Vec<LabelSelectorRequirement>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct LabelSelectorRequirement {
|
|
||||||
pub key: String,
|
|
||||||
pub operator: String,
|
|
||||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
|
||||||
pub values: Vec<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)]
|
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct ResourceRequirements {
|
pub struct ResourceRequirements {
|
||||||
|
|||||||
@ -16,6 +16,9 @@ use crate::modules::monitoring::kube_prometheus::types::Operator;
|
|||||||
)]
|
)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct PrometheusSpec {
|
pub struct PrometheusSpec {
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub alerting: Option<PrometheusSpecAlerting>,
|
||||||
|
|
||||||
pub service_account_name: String,
|
pub service_account_name: String,
|
||||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
pub service_monitor_namespace_selector: Option<LabelSelector>,
|
pub service_monitor_namespace_selector: Option<LabelSelector>,
|
||||||
@ -36,6 +39,41 @@ pub struct PrometheusSpec {
|
|||||||
pub rule_namespace_selector: Option<LabelSelector>,
|
pub rule_namespace_selector: Option<LabelSelector>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct NamespaceSelector {
|
||||||
|
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||||
|
pub match_names: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Contains alerting configuration, specifically Alertmanager endpoints.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||||
|
pub struct PrometheusSpecAlerting {
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub alertmanagers: Option<Vec<AlertmanagerEndpoints>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents an Alertmanager endpoint configuration used by Prometheus.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||||
|
pub struct AlertmanagerEndpoints {
|
||||||
|
/// Name of the Alertmanager Service.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub name: Option<String>,
|
||||||
|
|
||||||
|
/// Namespace of the Alertmanager Service.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub namespace: Option<String>,
|
||||||
|
|
||||||
|
/// Port to access on the Alertmanager Service (e.g. "web").
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub port: Option<String>,
|
||||||
|
|
||||||
|
/// Scheme to use for connecting (e.g. "http").
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub scheme: Option<String>,
|
||||||
|
// Other fields like `tls_config`, `path_prefix`, etc., can be added if needed.
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct LabelSelector {
|
pub struct LabelSelector {
|
||||||
@ -58,6 +96,8 @@ pub struct LabelSelectorRequirement {
|
|||||||
impl Default for PrometheusSpec {
|
impl Default for PrometheusSpec {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
PrometheusSpec {
|
PrometheusSpec {
|
||||||
|
alerting: None,
|
||||||
|
|
||||||
service_account_name: "prometheus".into(),
|
service_account_name: "prometheus".into(),
|
||||||
|
|
||||||
// null means "only my namespace"
|
// null means "only my namespace"
|
||||||
|
|||||||
@ -7,7 +7,7 @@ use crate::modules::helm::chart::HelmChartScore;
|
|||||||
pub fn grafana_operator_helm_chart_score(ns: String) -> HelmChartScore {
|
pub fn grafana_operator_helm_chart_score(ns: String) -> HelmChartScore {
|
||||||
HelmChartScore {
|
HelmChartScore {
|
||||||
namespace: Some(NonBlankString::from_str(&ns).unwrap()),
|
namespace: Some(NonBlankString::from_str(&ns).unwrap()),
|
||||||
release_name: NonBlankString::from_str("kube-prometheus").unwrap(),
|
release_name: NonBlankString::from_str("grafana_operator").unwrap(),
|
||||||
chart_name: NonBlankString::from_str(
|
chart_name: NonBlankString::from_str(
|
||||||
"grafana-operator oci://ghcr.io/grafana/helm-charts/grafana-operator",
|
"grafana-operator oci://ghcr.io/grafana/helm-charts/grafana-operator",
|
||||||
)
|
)
|
||||||
|
|||||||
@ -1,7 +1,10 @@
|
|||||||
|
pub mod crd_alertmanager_config;
|
||||||
pub mod crd_alertmanagers;
|
pub mod crd_alertmanagers;
|
||||||
pub mod crd_default_rules;
|
pub mod crd_default_rules;
|
||||||
pub mod crd_grafana;
|
pub mod crd_grafana;
|
||||||
pub mod crd_prometheus_rules;
|
pub mod crd_prometheus_rules;
|
||||||
pub mod crd_prometheuses;
|
pub mod crd_prometheuses;
|
||||||
pub mod grafana_operator;
|
pub mod grafana_operator;
|
||||||
|
pub mod prometheus_operator;
|
||||||
pub mod role;
|
pub mod role;
|
||||||
|
pub mod service_monitor;
|
||||||
|
|||||||
@ -0,0 +1,22 @@
|
|||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use non_blank_string_rs::NonBlankString;
|
||||||
|
|
||||||
|
use crate::modules::helm::chart::HelmChartScore;
|
||||||
|
|
||||||
|
pub fn prometheus_operator_helm_chart_score(ns: String) -> HelmChartScore {
|
||||||
|
HelmChartScore {
|
||||||
|
namespace: Some(NonBlankString::from_str(&ns).unwrap()),
|
||||||
|
release_name: NonBlankString::from_str("prometheus-operator").unwrap(),
|
||||||
|
chart_name: NonBlankString::from_str(
|
||||||
|
"grafana-operator oci://ghcr.io/grafana/helm-charts/grafana-operator",
|
||||||
|
)
|
||||||
|
.unwrap(),
|
||||||
|
chart_version: None,
|
||||||
|
values_overrides: None,
|
||||||
|
values_yaml: None,
|
||||||
|
create_namespace: true,
|
||||||
|
install_only: true,
|
||||||
|
repository: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize};
|
|||||||
|
|
||||||
use crate::interpret::InterpretError;
|
use crate::interpret::InterpretError;
|
||||||
|
|
||||||
use super::types::{
|
use crate::modules::monitoring::kube_prometheus::types::{
|
||||||
HTTPScheme, MatchExpression, NamespaceSelector, Operator, Selector,
|
HTTPScheme, MatchExpression, NamespaceSelector, Operator, Selector,
|
||||||
ServiceMonitor as KubeServiceMonitor, ServiceMonitorEndpoint,
|
ServiceMonitor as KubeServiceMonitor, ServiceMonitorEndpoint,
|
||||||
};
|
};
|
||||||
@ -1,253 +0,0 @@
|
|||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use async_trait::async_trait;
|
|
||||||
use kube::{Api, api::ObjectMeta};
|
|
||||||
use log::{debug, info};
|
|
||||||
use serde::Serialize;
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
data::{Id, Version},
|
|
||||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
|
||||||
inventory::Inventory,
|
|
||||||
modules::monitoring::kube_prometheus::crd::{
|
|
||||||
crd_alertmanagers::{Alertmanager, AlertmanagerSpec},
|
|
||||||
crd_prometheuses::{Prometheus, PrometheusSpec},
|
|
||||||
role::{build_prom_role, build_prom_rolebinding, build_prom_service_account},
|
|
||||||
},
|
|
||||||
score::Score,
|
|
||||||
topology::{K8sclient, Topology, k8s::K8sClient, oberservability::monitoring::AlertReceiver},
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::{
|
|
||||||
alert_manager_config::{
|
|
||||||
AlertmanagerConfig, AlertmanagerConfigSpec, CRDAlertManager, CRDAlertManagerReceiver,
|
|
||||||
},
|
|
||||||
crd::crd_prometheus_rules::{PrometheusRule, PrometheusRuleSpec, RuleGroup},
|
|
||||||
prometheus::KubePrometheus,
|
|
||||||
};
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize)]
|
|
||||||
pub struct HelmPrometheusApplicationAlertingScore {
|
|
||||||
pub namespace: String,
|
|
||||||
pub receivers: Vec<Box<dyn CRDAlertManagerReceiver>>,
|
|
||||||
pub service_monitors: Vec<super::service_monitor::ServiceMonitor>,
|
|
||||||
pub prometheus_rules: Vec<RuleGroup>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T: Topology + K8sclient> Score<T> for HelmPrometheusApplicationAlertingScore {
|
|
||||||
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
|
|
||||||
Box::new(HelmPrometheusApplicationAlertingInterpret {
|
|
||||||
namespace: self.namespace.clone(),
|
|
||||||
receivers: self.receivers.clone(),
|
|
||||||
service_monitors: self.service_monitors.clone(),
|
|
||||||
prometheus_rules: self.prometheus_rules.clone(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn name(&self) -> String {
|
|
||||||
"HelmPrometheusApplicationAlertingScore".into()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub struct HelmPrometheusApplicationAlertingInterpret {
|
|
||||||
pub namespace: String,
|
|
||||||
pub receivers: Vec<Box<dyn CRDAlertManagerReceiver>>,
|
|
||||||
pub service_monitors: Vec<super::service_monitor::ServiceMonitor>,
|
|
||||||
pub prometheus_rules: Vec<RuleGroup>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[async_trait]
|
|
||||||
impl<T: Topology + K8sclient> Interpret<T> for HelmPrometheusApplicationAlertingInterpret {
|
|
||||||
async fn execute(
|
|
||||||
&self,
|
|
||||||
inventory: &Inventory,
|
|
||||||
topology: &T,
|
|
||||||
) -> Result<Outcome, InterpretError> {
|
|
||||||
let client = topology.k8s_client().await.unwrap();
|
|
||||||
self.install_prometheus(&client).await?;
|
|
||||||
self.install_alert_manager(&client).await?;
|
|
||||||
for receiver in self.receivers.iter() {
|
|
||||||
let alertmanager_config: AlertmanagerConfig = receiver
|
|
||||||
.configure_receiver(&client, self.namespace.clone())
|
|
||||||
.await;
|
|
||||||
let sender = CRDAlertManager {
|
|
||||||
alertmanager_configs: alertmanager_config,
|
|
||||||
namespace: self.namespace.clone(),
|
|
||||||
client: client.clone(),
|
|
||||||
};
|
|
||||||
receiver.install(&sender).await.map_err(|err| {
|
|
||||||
InterpretError::new(format!("failed to install receiver: {}", err))
|
|
||||||
})?;
|
|
||||||
}
|
|
||||||
self.install_rules(self.prometheus_rules.clone(), client.clone())
|
|
||||||
.await
|
|
||||||
.map_err(|err| InterpretError::new(format!("failed to install rules: {}", err)))?;
|
|
||||||
|
|
||||||
debug!("\n\n\n monitors: {:#?}", self.service_monitors.clone());
|
|
||||||
for monitor in self.service_monitors.iter() {
|
|
||||||
self.install_monitor(monitor.clone(), client.clone())
|
|
||||||
.await?;
|
|
||||||
}
|
|
||||||
Ok(Outcome::success(format!("deployed alert channels")))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_name(&self) -> InterpretName {
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_version(&self) -> Version {
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_status(&self) -> InterpretStatus {
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_children(&self) -> Vec<Id> {
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl HelmPrometheusApplicationAlertingInterpret {
|
|
||||||
async fn install_prometheus(&self, client: &Arc<K8sClient>) -> Result<Outcome, InterpretError> {
|
|
||||||
debug!(
|
|
||||||
"installing crd-prometheuses in namespace {}",
|
|
||||||
self.namespace.clone()
|
|
||||||
);
|
|
||||||
debug!("building role/rolebinding/serviceaccount for crd-prometheus");
|
|
||||||
let rolename = format!("{}-prom", self.namespace.clone());
|
|
||||||
let sa_name = format!("{}-prom-sa", self.namespace.clone());
|
|
||||||
let role = build_prom_role(rolename.clone(), self.namespace.clone());
|
|
||||||
let rolebinding =
|
|
||||||
build_prom_rolebinding(rolename.clone(), self.namespace.clone(), sa_name.clone());
|
|
||||||
let sa = build_prom_service_account(sa_name.clone(), self.namespace.clone());
|
|
||||||
let mut prom_spec = PrometheusSpec::default();
|
|
||||||
prom_spec.service_account_name = sa_name.clone();
|
|
||||||
let prom = Prometheus {
|
|
||||||
metadata: ObjectMeta {
|
|
||||||
name: Some(self.namespace.clone()),
|
|
||||||
labels: Some(std::collections::BTreeMap::from([(
|
|
||||||
"alertmanagerConfig".to_string(),
|
|
||||||
"enabled".to_string(),
|
|
||||||
)])),
|
|
||||||
namespace: Some(self.namespace.clone()),
|
|
||||||
..Default::default()
|
|
||||||
},
|
|
||||||
spec: prom_spec,
|
|
||||||
};
|
|
||||||
client
|
|
||||||
.apply(&role, Some(&self.namespace.clone()))
|
|
||||||
.await
|
|
||||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
|
||||||
info!(
|
|
||||||
"installed prometheus role: {:#?} in ns {:#?}",
|
|
||||||
role.metadata.name.unwrap(),
|
|
||||||
role.metadata.namespace.unwrap()
|
|
||||||
);
|
|
||||||
client
|
|
||||||
.apply(&rolebinding, Some(&self.namespace.clone()))
|
|
||||||
.await
|
|
||||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
|
||||||
info!(
|
|
||||||
"installed prometheus rolebinding: {:#?} in ns {:#?}",
|
|
||||||
rolebinding.metadata.name.unwrap(),
|
|
||||||
rolebinding.metadata.namespace.unwrap()
|
|
||||||
);
|
|
||||||
client
|
|
||||||
.apply(&sa, Some(&self.namespace.clone()))
|
|
||||||
.await
|
|
||||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
|
||||||
info!(
|
|
||||||
"installed prometheus service account: {:#?} in ns {:#?}",
|
|
||||||
sa.metadata.name.unwrap(),
|
|
||||||
sa.metadata.namespace.unwrap()
|
|
||||||
);
|
|
||||||
client
|
|
||||||
.apply(&prom, Some(&self.namespace.clone()))
|
|
||||||
.await
|
|
||||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
|
||||||
info!(
|
|
||||||
"installed prometheus: {:#?} in ns {:#?}",
|
|
||||||
&prom.metadata.name.clone().unwrap(),
|
|
||||||
&prom.metadata.namespace.clone().unwrap()
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(Outcome::success(format!(
|
|
||||||
"successfully deployed crd-prometheus {:#?}",
|
|
||||||
prom
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn install_alert_manager(
|
|
||||||
&self,
|
|
||||||
client: &Arc<K8sClient>,
|
|
||||||
) -> Result<Outcome, InterpretError> {
|
|
||||||
let am = Alertmanager {
|
|
||||||
metadata: ObjectMeta {
|
|
||||||
name: Some(self.namespace.clone()),
|
|
||||||
labels: Some(std::collections::BTreeMap::from([(
|
|
||||||
"alertmanagerConfig".to_string(),
|
|
||||||
"enabled".to_string(),
|
|
||||||
)])),
|
|
||||||
namespace: Some(self.namespace.clone()),
|
|
||||||
..Default::default()
|
|
||||||
},
|
|
||||||
spec: AlertmanagerSpec::default(),
|
|
||||||
};
|
|
||||||
client
|
|
||||||
.apply(&am, Some(&self.namespace.clone()))
|
|
||||||
.await
|
|
||||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
|
||||||
Ok(Outcome::success(format!(
|
|
||||||
"successfully deployed service monitor {:#?}",
|
|
||||||
am.metadata.name
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn install_monitor(
|
|
||||||
&self,
|
|
||||||
monitor: super::service_monitor::ServiceMonitor,
|
|
||||||
client: Arc<K8sClient>,
|
|
||||||
) -> Result<Outcome, InterpretError> {
|
|
||||||
debug!("service monitor: \n{:#?}", monitor.clone());
|
|
||||||
let namespace = self.namespace.clone();
|
|
||||||
client
|
|
||||||
.apply(&monitor, Some(&namespace))
|
|
||||||
.await
|
|
||||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
|
||||||
Ok(Outcome::success(format!(
|
|
||||||
"successfully deployed service monitor {:#?}",
|
|
||||||
monitor.metadata.name
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn install_rules(
|
|
||||||
&self,
|
|
||||||
rules: Vec<RuleGroup>,
|
|
||||||
client: Arc<K8sClient>,
|
|
||||||
) -> Result<Outcome, InterpretError> {
|
|
||||||
let prom_rule_spec = PrometheusRuleSpec { groups: rules };
|
|
||||||
|
|
||||||
let prom_rules = PrometheusRule {
|
|
||||||
metadata: ObjectMeta {
|
|
||||||
name: Some(self.namespace.clone()),
|
|
||||||
labels: Some(std::collections::BTreeMap::from([(
|
|
||||||
"alertmanagerConfig".to_string(),
|
|
||||||
"enabled".to_string(),
|
|
||||||
)])),
|
|
||||||
namespace: Some(self.namespace.clone()),
|
|
||||||
..Default::default()
|
|
||||||
},
|
|
||||||
spec: prom_rule_spec,
|
|
||||||
};
|
|
||||||
client
|
|
||||||
.apply(&prom_rules, Some(&self.namespace))
|
|
||||||
.await
|
|
||||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
|
||||||
Ok(Outcome::success(format!(
|
|
||||||
"successfully deployed service monitor {:#?}",
|
|
||||||
prom_rules.metadata.name
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,8 +1,5 @@
|
|||||||
pub mod alert_manager_config;
|
|
||||||
pub mod crd;
|
pub mod crd;
|
||||||
pub mod helm;
|
pub mod helm;
|
||||||
pub mod helm_prometheus_alert_score;
|
pub mod helm_prometheus_alert_score;
|
||||||
pub mod helm_prometheus_application_alerting;
|
|
||||||
pub mod prometheus;
|
pub mod prometheus;
|
||||||
pub mod service_monitor;
|
|
||||||
pub mod types;
|
pub mod types;
|
||||||
|
|||||||
@ -224,7 +224,7 @@ pub struct Selector {
|
|||||||
pub match_expressions: Vec<MatchExpression>,
|
pub match_expressions: Vec<MatchExpression>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct ServiceMonitor {
|
pub struct ServiceMonitor {
|
||||||
pub name: String,
|
pub name: String,
|
||||||
@ -268,7 +268,7 @@ pub struct ServiceMonitor {
|
|||||||
pub fallback_scrape_protocol: Option<String>,
|
pub fallback_scrape_protocol: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct NamespaceSelector {
|
pub struct NamespaceSelector {
|
||||||
/// Select all namespaces.
|
/// Select all namespaces.
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user