feat: added a monitoring stack that works with openshift/okd (#134)
* Okd needs to use the cluster observability operator in order to deploy namespaced prometheuses and alertmanagers * allow namespaced deployments of alertmanager and prometheuses as well as its associated rules, etc. Co-authored-by: Ian Letourneau <ian@noma.to> Reviewed-on: https://git.nationtech.io/NationTech/harmony/pulls/134 Co-authored-by: Willem <wrolleman@nationtech.io> Co-committed-by: Willem <wrolleman@nationtech.io>
This commit is contained in:
parent
ed70bfd236
commit
b42815f79c
15
Cargo.lock
generated
15
Cargo.lock
generated
@ -4631,6 +4631,21 @@ dependencies = [
|
|||||||
"subtle",
|
"subtle",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rhob-application-monitoring"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"base64 0.22.1",
|
||||||
|
"env_logger",
|
||||||
|
"harmony",
|
||||||
|
"harmony_cli",
|
||||||
|
"harmony_macros",
|
||||||
|
"harmony_types",
|
||||||
|
"log",
|
||||||
|
"tokio",
|
||||||
|
"url",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ring"
|
name = "ring"
|
||||||
version = "0.17.14"
|
version = "0.17.14"
|
||||||
|
17
examples/rhob_application_monitoring/Cargo.toml
Normal file
17
examples/rhob_application_monitoring/Cargo.toml
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
[package]
|
||||||
|
name = "rhob-application-monitoring"
|
||||||
|
edition = "2024"
|
||||||
|
version.workspace = true
|
||||||
|
readme.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
harmony = { path = "../../harmony" }
|
||||||
|
harmony_cli = { path = "../../harmony_cli" }
|
||||||
|
harmony_types = { path = "../../harmony_types" }
|
||||||
|
harmony_macros = { path = "../../harmony_macros" }
|
||||||
|
tokio = { workspace = true }
|
||||||
|
log = { workspace = true }
|
||||||
|
env_logger = { workspace = true }
|
||||||
|
url = { workspace = true }
|
||||||
|
base64.workspace = true
|
50
examples/rhob_application_monitoring/src/main.rs
Normal file
50
examples/rhob_application_monitoring/src/main.rs
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
use std::{path::PathBuf, sync::Arc};
|
||||||
|
|
||||||
|
use harmony::{
|
||||||
|
inventory::Inventory,
|
||||||
|
modules::{
|
||||||
|
application::{
|
||||||
|
ApplicationScore, RustWebFramework, RustWebapp,
|
||||||
|
features::rhob_monitoring::RHOBMonitoring,
|
||||||
|
},
|
||||||
|
monitoring::alert_channel::discord_alert_channel::DiscordWebhook,
|
||||||
|
},
|
||||||
|
topology::K8sAnywhereTopology,
|
||||||
|
};
|
||||||
|
use harmony_types::net::Url;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() {
|
||||||
|
let application = Arc::new(RustWebapp {
|
||||||
|
name: "test-rhob-monitoring".to_string(),
|
||||||
|
domain: Url::Url(url::Url::parse("htps://some-fake-url").unwrap()),
|
||||||
|
project_root: PathBuf::from("./webapp"), // Relative from 'harmony-path' param
|
||||||
|
framework: Some(RustWebFramework::Leptos),
|
||||||
|
service_port: 3000,
|
||||||
|
});
|
||||||
|
|
||||||
|
let discord_receiver = DiscordWebhook {
|
||||||
|
name: "test-discord".to_string(),
|
||||||
|
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
|
||||||
|
};
|
||||||
|
|
||||||
|
let app = ApplicationScore {
|
||||||
|
features: vec![
|
||||||
|
Box::new(RHOBMonitoring {
|
||||||
|
application: application.clone(),
|
||||||
|
alert_receiver: vec![Box::new(discord_receiver)],
|
||||||
|
}),
|
||||||
|
// TODO add backups, multisite ha, etc
|
||||||
|
],
|
||||||
|
application,
|
||||||
|
};
|
||||||
|
|
||||||
|
harmony_cli::run(
|
||||||
|
Inventory::autoload(),
|
||||||
|
K8sAnywhereTopology::from_env(),
|
||||||
|
vec![Box::new(app)],
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
@ -32,6 +32,7 @@ pub enum InterpretName {
|
|||||||
K8sPrometheusCrdAlerting,
|
K8sPrometheusCrdAlerting,
|
||||||
DiscoverInventoryAgent,
|
DiscoverInventoryAgent,
|
||||||
CephClusterHealth,
|
CephClusterHealth,
|
||||||
|
RHOBAlerting,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for InterpretName {
|
impl std::fmt::Display for InterpretName {
|
||||||
@ -60,6 +61,7 @@ impl std::fmt::Display for InterpretName {
|
|||||||
InterpretName::K8sPrometheusCrdAlerting => f.write_str("K8sPrometheusCrdAlerting"),
|
InterpretName::K8sPrometheusCrdAlerting => f.write_str("K8sPrometheusCrdAlerting"),
|
||||||
InterpretName::DiscoverInventoryAgent => f.write_str("DiscoverInventoryAgent"),
|
InterpretName::DiscoverInventoryAgent => f.write_str("DiscoverInventoryAgent"),
|
||||||
InterpretName::CephClusterHealth => f.write_str("CephClusterHealth"),
|
InterpretName::CephClusterHealth => f.write_str("CephClusterHealth"),
|
||||||
|
InterpretName::RHOBAlerting => f.write_str("RHOBAlerting"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -14,10 +14,11 @@ use crate::{
|
|||||||
monitoring::kube_prometheus::crd::{
|
monitoring::kube_prometheus::crd::{
|
||||||
crd_alertmanager_config::CRDPrometheus,
|
crd_alertmanager_config::CRDPrometheus,
|
||||||
prometheus_operator::prometheus_operator_helm_chart_score,
|
prometheus_operator::prometheus_operator_helm_chart_score,
|
||||||
|
rhob_alertmanager_config::RHOBObservability,
|
||||||
},
|
},
|
||||||
prometheus::{
|
prometheus::{
|
||||||
k8s_prometheus_alerting_score::K8sPrometheusCRDAlertingScore,
|
k8s_prometheus_alerting_score::K8sPrometheusCRDAlertingScore,
|
||||||
prometheus::PrometheusApplicationMonitoring,
|
prometheus::PrometheusApplicationMonitoring, rhob_alerting_score::RHOBAlertingScore,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
score::Score,
|
score::Score,
|
||||||
@ -108,6 +109,43 @@ impl PrometheusApplicationMonitoring<CRDPrometheus> for K8sAnywhereTopology {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl PrometheusApplicationMonitoring<RHOBObservability> for K8sAnywhereTopology {
|
||||||
|
async fn install_prometheus(
|
||||||
|
&self,
|
||||||
|
sender: &RHOBObservability,
|
||||||
|
inventory: &Inventory,
|
||||||
|
receivers: Option<Vec<Box<dyn AlertReceiver<RHOBObservability>>>>,
|
||||||
|
) -> Result<PreparationOutcome, PreparationError> {
|
||||||
|
let po_result = self.ensure_cluster_observability_operator(sender).await?;
|
||||||
|
|
||||||
|
if po_result == PreparationOutcome::Noop {
|
||||||
|
debug!("Skipping Prometheus CR installation due to missing operator.");
|
||||||
|
return Ok(po_result);
|
||||||
|
}
|
||||||
|
|
||||||
|
let result = self
|
||||||
|
.get_cluster_observability_operator_prometheus_application_score(
|
||||||
|
sender.clone(),
|
||||||
|
receivers,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.interpret(inventory, self)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Ok(outcome) => match outcome.status {
|
||||||
|
InterpretStatus::SUCCESS => Ok(PreparationOutcome::Success {
|
||||||
|
details: outcome.message,
|
||||||
|
}),
|
||||||
|
InterpretStatus::NOOP => Ok(PreparationOutcome::Noop),
|
||||||
|
_ => Err(PreparationError::new(outcome.message)),
|
||||||
|
},
|
||||||
|
Err(err) => Err(PreparationError::new(err.to_string())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Serialize for K8sAnywhereTopology {
|
impl Serialize for K8sAnywhereTopology {
|
||||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||||
where
|
where
|
||||||
@ -134,6 +172,19 @@ impl K8sAnywhereTopology {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn get_cluster_observability_operator_prometheus_application_score(
|
||||||
|
&self,
|
||||||
|
sender: RHOBObservability,
|
||||||
|
receivers: Option<Vec<Box<dyn AlertReceiver<RHOBObservability>>>>,
|
||||||
|
) -> RHOBAlertingScore {
|
||||||
|
RHOBAlertingScore {
|
||||||
|
sender,
|
||||||
|
receivers: receivers.unwrap_or_default(),
|
||||||
|
service_monitors: vec![],
|
||||||
|
prometheus_rules: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async fn get_k8s_prometheus_application_score(
|
async fn get_k8s_prometheus_application_score(
|
||||||
&self,
|
&self,
|
||||||
sender: CRDPrometheus,
|
sender: CRDPrometheus,
|
||||||
@ -286,6 +337,60 @@ impl K8sAnywhereTopology {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn ensure_cluster_observability_operator(
|
||||||
|
&self,
|
||||||
|
sender: &RHOBObservability,
|
||||||
|
) -> Result<PreparationOutcome, PreparationError> {
|
||||||
|
let status = Command::new("sh")
|
||||||
|
.args(["-c", "kubectl get crd -A | grep -i rhobs"])
|
||||||
|
.status()
|
||||||
|
.map_err(|e| PreparationError::new(format!("could not connect to cluster: {}", e)))?;
|
||||||
|
|
||||||
|
if !status.success() {
|
||||||
|
if let Some(Some(k8s_state)) = self.k8s_state.get() {
|
||||||
|
match k8s_state.source {
|
||||||
|
K8sSource::LocalK3d => {
|
||||||
|
debug!("installing cluster observability operator");
|
||||||
|
todo!();
|
||||||
|
let op_score =
|
||||||
|
prometheus_operator_helm_chart_score(sender.namespace.clone());
|
||||||
|
let result = op_score.interpret(&Inventory::empty(), self).await;
|
||||||
|
|
||||||
|
return match result {
|
||||||
|
Ok(outcome) => match outcome.status {
|
||||||
|
InterpretStatus::SUCCESS => Ok(PreparationOutcome::Success {
|
||||||
|
details: "installed cluster observability operator".into(),
|
||||||
|
}),
|
||||||
|
InterpretStatus::NOOP => Ok(PreparationOutcome::Noop),
|
||||||
|
_ => Err(PreparationError::new(
|
||||||
|
"failed to install cluster observability operator (unknown error)".into(),
|
||||||
|
)),
|
||||||
|
},
|
||||||
|
Err(err) => Err(PreparationError::new(err.to_string())),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
K8sSource::Kubeconfig => {
|
||||||
|
debug!(
|
||||||
|
"unable to install cluster observability operator, contact cluster admin"
|
||||||
|
);
|
||||||
|
return Ok(PreparationOutcome::Noop);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
warn!(
|
||||||
|
"Unable to detect k8s_state. Skipping Cluster Observability Operator install."
|
||||||
|
);
|
||||||
|
return Ok(PreparationOutcome::Noop);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
debug!("Cluster Observability Operator is already present, skipping install");
|
||||||
|
|
||||||
|
Ok(PreparationOutcome::Success {
|
||||||
|
details: "cluster observability operator present in cluster".into(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
async fn ensure_prometheus_operator(
|
async fn ensure_prometheus_operator(
|
||||||
&self,
|
&self,
|
||||||
sender: &CRDPrometheus,
|
sender: &CRDPrometheus,
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
mod endpoint;
|
mod endpoint;
|
||||||
|
pub mod rhob_monitoring;
|
||||||
pub use endpoint::*;
|
pub use endpoint::*;
|
||||||
|
|
||||||
mod monitoring;
|
mod monitoring;
|
||||||
|
109
harmony/src/modules/application/features/rhob_monitoring.rs
Normal file
109
harmony/src/modules/application/features/rhob_monitoring.rs
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use crate::modules::application::{Application, ApplicationFeature};
|
||||||
|
use crate::modules::monitoring::application_monitoring::application_monitoring_score::ApplicationMonitoringScore;
|
||||||
|
use crate::modules::monitoring::application_monitoring::rhobs_application_monitoring_score::ApplicationRHOBMonitoringScore;
|
||||||
|
|
||||||
|
use crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::RHOBObservability;
|
||||||
|
use crate::topology::MultiTargetTopology;
|
||||||
|
use crate::{
|
||||||
|
inventory::Inventory,
|
||||||
|
modules::monitoring::{
|
||||||
|
alert_channel::webhook_receiver::WebhookReceiver, ntfy::ntfy::NtfyScore,
|
||||||
|
},
|
||||||
|
score::Score,
|
||||||
|
topology::{HelmCommand, K8sclient, Topology, tenant::TenantManager},
|
||||||
|
};
|
||||||
|
use crate::{
|
||||||
|
modules::prometheus::prometheus::PrometheusApplicationMonitoring,
|
||||||
|
topology::oberservability::monitoring::AlertReceiver,
|
||||||
|
};
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use base64::{Engine as _, engine::general_purpose};
|
||||||
|
use harmony_types::net::Url;
|
||||||
|
use log::{debug, info};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct RHOBMonitoring {
|
||||||
|
pub application: Arc<dyn Application>,
|
||||||
|
pub alert_receiver: Vec<Box<dyn AlertReceiver<RHOBObservability>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl<
|
||||||
|
T: Topology
|
||||||
|
+ HelmCommand
|
||||||
|
+ 'static
|
||||||
|
+ TenantManager
|
||||||
|
+ K8sclient
|
||||||
|
+ MultiTargetTopology
|
||||||
|
+ std::fmt::Debug
|
||||||
|
+ PrometheusApplicationMonitoring<RHOBObservability>,
|
||||||
|
> ApplicationFeature<T> for RHOBMonitoring
|
||||||
|
{
|
||||||
|
async fn ensure_installed(&self, topology: &T) -> Result<(), String> {
|
||||||
|
info!("Ensuring monitoring is available for application");
|
||||||
|
let namespace = topology
|
||||||
|
.get_tenant_config()
|
||||||
|
.await
|
||||||
|
.map(|ns| ns.name.clone())
|
||||||
|
.unwrap_or_else(|| self.application.name());
|
||||||
|
|
||||||
|
let mut alerting_score = ApplicationRHOBMonitoringScore {
|
||||||
|
sender: RHOBObservability {
|
||||||
|
namespace: namespace.clone(),
|
||||||
|
client: topology.k8s_client().await.unwrap(),
|
||||||
|
},
|
||||||
|
application: self.application.clone(),
|
||||||
|
receivers: self.alert_receiver.clone(),
|
||||||
|
};
|
||||||
|
let ntfy = NtfyScore {
|
||||||
|
namespace: namespace.clone(),
|
||||||
|
host: "ntfy.harmonydemo.apps.ncd0.harmony.mcd".to_string(),
|
||||||
|
};
|
||||||
|
ntfy.interpret(&Inventory::empty(), topology)
|
||||||
|
.await
|
||||||
|
.map_err(|e| e.to_string())?;
|
||||||
|
|
||||||
|
let ntfy_default_auth_username = "harmony";
|
||||||
|
let ntfy_default_auth_password = "harmony";
|
||||||
|
let ntfy_default_auth_header = format!(
|
||||||
|
"Basic {}",
|
||||||
|
general_purpose::STANDARD.encode(format!(
|
||||||
|
"{ntfy_default_auth_username}:{ntfy_default_auth_password}"
|
||||||
|
))
|
||||||
|
);
|
||||||
|
|
||||||
|
debug!("ntfy_default_auth_header: {ntfy_default_auth_header}");
|
||||||
|
|
||||||
|
let ntfy_default_auth_param = general_purpose::STANDARD
|
||||||
|
.encode(ntfy_default_auth_header)
|
||||||
|
.replace("=", "");
|
||||||
|
|
||||||
|
debug!("ntfy_default_auth_param: {ntfy_default_auth_param}");
|
||||||
|
|
||||||
|
let ntfy_receiver = WebhookReceiver {
|
||||||
|
name: "ntfy-webhook".to_string(),
|
||||||
|
url: Url::Url(
|
||||||
|
url::Url::parse(
|
||||||
|
format!(
|
||||||
|
"http://ntfy.{}.svc.cluster.local/rust-web-app?auth={ntfy_default_auth_param}",
|
||||||
|
namespace.clone()
|
||||||
|
)
|
||||||
|
.as_str(),
|
||||||
|
)
|
||||||
|
.unwrap(),
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
alerting_score.receivers.push(Box::new(ntfy_receiver));
|
||||||
|
alerting_score
|
||||||
|
.interpret(&Inventory::empty(), topology)
|
||||||
|
.await
|
||||||
|
.map_err(|e| e.to_string())?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
fn name(&self) -> String {
|
||||||
|
"Monitoring".to_string()
|
||||||
|
}
|
||||||
|
}
|
@ -4,6 +4,7 @@ use std::collections::BTreeMap;
|
|||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use k8s_openapi::api::core::v1::Secret;
|
use k8s_openapi::api::core::v1::Secret;
|
||||||
use kube::api::ObjectMeta;
|
use kube::api::ObjectMeta;
|
||||||
|
use log::debug;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
use serde_yaml::{Mapping, Value};
|
use serde_yaml::{Mapping, Value};
|
||||||
@ -11,6 +12,7 @@ use serde_yaml::{Mapping, Value};
|
|||||||
use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::{
|
use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::{
|
||||||
AlertmanagerConfig, AlertmanagerConfigSpec, CRDPrometheus,
|
AlertmanagerConfig, AlertmanagerConfigSpec, CRDPrometheus,
|
||||||
};
|
};
|
||||||
|
use crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::RHOBObservability;
|
||||||
use crate::{
|
use crate::{
|
||||||
interpret::{InterpretError, Outcome},
|
interpret::{InterpretError, Outcome},
|
||||||
modules::monitoring::{
|
modules::monitoring::{
|
||||||
@ -30,6 +32,71 @@ pub struct DiscordWebhook {
|
|||||||
pub url: Url,
|
pub url: Url,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl AlertReceiver<RHOBObservability> for DiscordWebhook {
|
||||||
|
async fn install(&self, sender: &RHOBObservability) -> Result<Outcome, InterpretError> {
|
||||||
|
let spec = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfigSpec {
|
||||||
|
data: json!({
|
||||||
|
"route": {
|
||||||
|
"receiver": self.name,
|
||||||
|
},
|
||||||
|
"receivers": [
|
||||||
|
{
|
||||||
|
"name": self.name,
|
||||||
|
"webhookConfigs": [
|
||||||
|
{
|
||||||
|
"url": self.url,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
|
||||||
|
let alertmanager_configs = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfig {
|
||||||
|
metadata: ObjectMeta {
|
||||||
|
name: Some(self.name.clone()),
|
||||||
|
labels: Some(std::collections::BTreeMap::from([(
|
||||||
|
"alertmanagerConfig".to_string(),
|
||||||
|
"enabled".to_string(),
|
||||||
|
)])),
|
||||||
|
namespace: Some(sender.namespace.clone()),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
spec,
|
||||||
|
};
|
||||||
|
debug!(
|
||||||
|
"alertmanager_configs yaml:\n{:#?}",
|
||||||
|
serde_yaml::to_string(&alertmanager_configs)
|
||||||
|
);
|
||||||
|
debug!(
|
||||||
|
"alert manager configs: \n{:#?}",
|
||||||
|
alertmanager_configs.clone()
|
||||||
|
);
|
||||||
|
|
||||||
|
sender
|
||||||
|
.client
|
||||||
|
.apply(&alertmanager_configs, Some(&sender.namespace))
|
||||||
|
.await?;
|
||||||
|
Ok(Outcome::success(format!(
|
||||||
|
"installed rhob-alertmanagerconfigs for {}",
|
||||||
|
self.name
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn name(&self) -> String {
|
||||||
|
"webhook-receiver".to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn clone_box(&self) -> Box<dyn AlertReceiver<RHOBObservability>> {
|
||||||
|
Box::new(self.clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_any(&self) -> &dyn Any {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl AlertReceiver<CRDPrometheus> for DiscordWebhook {
|
impl AlertReceiver<CRDPrometheus> for DiscordWebhook {
|
||||||
async fn install(&self, sender: &CRDPrometheus) -> Result<Outcome, InterpretError> {
|
async fn install(&self, sender: &CRDPrometheus) -> Result<Outcome, InterpretError> {
|
||||||
|
@ -11,8 +11,8 @@ use crate::{
|
|||||||
interpret::{InterpretError, Outcome},
|
interpret::{InterpretError, Outcome},
|
||||||
modules::monitoring::{
|
modules::monitoring::{
|
||||||
kube_prometheus::{
|
kube_prometheus::{
|
||||||
crd::crd_alertmanager_config::{
|
crd::{
|
||||||
AlertmanagerConfig, AlertmanagerConfigSpec, CRDPrometheus,
|
crd_alertmanager_config::CRDPrometheus, rhob_alertmanager_config::RHOBObservability,
|
||||||
},
|
},
|
||||||
prometheus::{KubePrometheus, KubePrometheusReceiver},
|
prometheus::{KubePrometheus, KubePrometheusReceiver},
|
||||||
types::{AlertChannelConfig, AlertManagerChannelConfig},
|
types::{AlertChannelConfig, AlertManagerChannelConfig},
|
||||||
@ -30,9 +30,9 @@ pub struct WebhookReceiver {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl AlertReceiver<CRDPrometheus> for WebhookReceiver {
|
impl AlertReceiver<RHOBObservability> for WebhookReceiver {
|
||||||
async fn install(&self, sender: &CRDPrometheus) -> Result<Outcome, InterpretError> {
|
async fn install(&self, sender: &RHOBObservability) -> Result<Outcome, InterpretError> {
|
||||||
let spec = AlertmanagerConfigSpec {
|
let spec = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfigSpec {
|
||||||
data: json!({
|
data: json!({
|
||||||
"route": {
|
"route": {
|
||||||
"receiver": self.name,
|
"receiver": self.name,
|
||||||
@ -50,7 +50,68 @@ impl AlertReceiver<CRDPrometheus> for WebhookReceiver {
|
|||||||
}),
|
}),
|
||||||
};
|
};
|
||||||
|
|
||||||
let alertmanager_configs = AlertmanagerConfig {
|
let alertmanager_configs = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfig {
|
||||||
|
metadata: ObjectMeta {
|
||||||
|
name: Some(self.name.clone()),
|
||||||
|
labels: Some(std::collections::BTreeMap::from([(
|
||||||
|
"alertmanagerConfig".to_string(),
|
||||||
|
"enabled".to_string(),
|
||||||
|
)])),
|
||||||
|
namespace: Some(sender.namespace.clone()),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
spec,
|
||||||
|
};
|
||||||
|
debug!(
|
||||||
|
"alert manager configs: \n{:#?}",
|
||||||
|
alertmanager_configs.clone()
|
||||||
|
);
|
||||||
|
|
||||||
|
sender
|
||||||
|
.client
|
||||||
|
.apply(&alertmanager_configs, Some(&sender.namespace))
|
||||||
|
.await?;
|
||||||
|
Ok(Outcome::success(format!(
|
||||||
|
"installed rhob-alertmanagerconfigs for {}",
|
||||||
|
self.name
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn name(&self) -> String {
|
||||||
|
"webhook-receiver".to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn clone_box(&self) -> Box<dyn AlertReceiver<RHOBObservability>> {
|
||||||
|
Box::new(self.clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_any(&self) -> &dyn Any {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl AlertReceiver<CRDPrometheus> for WebhookReceiver {
|
||||||
|
async fn install(&self, sender: &CRDPrometheus) -> Result<Outcome, InterpretError> {
|
||||||
|
let spec = crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::AlertmanagerConfigSpec {
|
||||||
|
data: json!({
|
||||||
|
"route": {
|
||||||
|
"receiver": self.name,
|
||||||
|
},
|
||||||
|
"receivers": [
|
||||||
|
{
|
||||||
|
"name": self.name,
|
||||||
|
"webhookConfigs": [
|
||||||
|
{
|
||||||
|
"url": self.url,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
|
||||||
|
let alertmanager_configs = crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::AlertmanagerConfig {
|
||||||
metadata: ObjectMeta {
|
metadata: ObjectMeta {
|
||||||
name: Some(self.name.clone()),
|
name: Some(self.name.clone()),
|
||||||
labels: Some(std::collections::BTreeMap::from([(
|
labels: Some(std::collections::BTreeMap::from([(
|
||||||
@ -115,6 +176,7 @@ impl PrometheusReceiver for WebhookReceiver {
|
|||||||
self.get_config().await
|
self.get_config().await
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl AlertReceiver<KubePrometheus> for WebhookReceiver {
|
impl AlertReceiver<KubePrometheus> for WebhookReceiver {
|
||||||
async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> {
|
async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> {
|
||||||
|
@ -1 +1,2 @@
|
|||||||
pub mod application_monitoring_score;
|
pub mod application_monitoring_score;
|
||||||
|
pub mod rhobs_application_monitoring_score;
|
||||||
|
@ -0,0 +1,94 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
data::Version,
|
||||||
|
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||||
|
inventory::Inventory,
|
||||||
|
modules::{
|
||||||
|
application::Application,
|
||||||
|
monitoring::kube_prometheus::crd::{
|
||||||
|
crd_alertmanager_config::CRDPrometheus, rhob_alertmanager_config::RHOBObservability,
|
||||||
|
},
|
||||||
|
prometheus::prometheus::PrometheusApplicationMonitoring,
|
||||||
|
},
|
||||||
|
score::Score,
|
||||||
|
topology::{PreparationOutcome, Topology, oberservability::monitoring::AlertReceiver},
|
||||||
|
};
|
||||||
|
use harmony_types::id::Id;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize)]
|
||||||
|
pub struct ApplicationRHOBMonitoringScore {
|
||||||
|
pub sender: RHOBObservability,
|
||||||
|
pub application: Arc<dyn Application>,
|
||||||
|
pub receivers: Vec<Box<dyn AlertReceiver<RHOBObservability>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Topology + PrometheusApplicationMonitoring<RHOBObservability>> Score<T>
|
||||||
|
for ApplicationRHOBMonitoringScore
|
||||||
|
{
|
||||||
|
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||||
|
Box::new(ApplicationRHOBMonitoringInterpret {
|
||||||
|
score: self.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn name(&self) -> String {
|
||||||
|
format!(
|
||||||
|
"{} monitoring [ApplicationRHOBMonitoringScore]",
|
||||||
|
self.application.name()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct ApplicationRHOBMonitoringInterpret {
|
||||||
|
score: ApplicationRHOBMonitoringScore,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl<T: Topology + PrometheusApplicationMonitoring<RHOBObservability>> Interpret<T>
|
||||||
|
for ApplicationRHOBMonitoringInterpret
|
||||||
|
{
|
||||||
|
async fn execute(
|
||||||
|
&self,
|
||||||
|
inventory: &Inventory,
|
||||||
|
topology: &T,
|
||||||
|
) -> Result<Outcome, InterpretError> {
|
||||||
|
let result = topology
|
||||||
|
.install_prometheus(
|
||||||
|
&self.score.sender,
|
||||||
|
inventory,
|
||||||
|
Some(self.score.receivers.clone()),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Ok(outcome) => match outcome {
|
||||||
|
PreparationOutcome::Success { details: _ } => {
|
||||||
|
Ok(Outcome::success("Prometheus installed".into()))
|
||||||
|
}
|
||||||
|
PreparationOutcome::Noop => Ok(Outcome::noop()),
|
||||||
|
},
|
||||||
|
Err(err) => Err(InterpretError::from(err)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_name(&self) -> InterpretName {
|
||||||
|
InterpretName::ApplicationMonitoring
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_version(&self) -> Version {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_status(&self) -> InterpretStatus {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_children(&self) -> Vec<Id> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
}
|
@ -7,5 +7,15 @@ pub mod crd_prometheuses;
|
|||||||
pub mod grafana_default_dashboard;
|
pub mod grafana_default_dashboard;
|
||||||
pub mod grafana_operator;
|
pub mod grafana_operator;
|
||||||
pub mod prometheus_operator;
|
pub mod prometheus_operator;
|
||||||
|
pub mod rhob_alertmanager_config;
|
||||||
|
pub mod rhob_alertmanagers;
|
||||||
|
pub mod rhob_cluster_observability_operator;
|
||||||
|
pub mod rhob_default_rules;
|
||||||
|
pub mod rhob_grafana;
|
||||||
|
pub mod rhob_monitoring_stack;
|
||||||
|
pub mod rhob_prometheus_rules;
|
||||||
|
pub mod rhob_prometheuses;
|
||||||
|
pub mod rhob_role;
|
||||||
|
pub mod rhob_service_monitor;
|
||||||
pub mod role;
|
pub mod role;
|
||||||
pub mod service_monitor;
|
pub mod service_monitor;
|
||||||
|
@ -0,0 +1,50 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use kube::CustomResource;
|
||||||
|
use schemars::JsonSchema;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::topology::{
|
||||||
|
k8s::K8sClient,
|
||||||
|
oberservability::monitoring::{AlertReceiver, AlertSender},
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||||
|
#[kube(
|
||||||
|
group = "monitoring.rhobs",
|
||||||
|
version = "v1alpha1",
|
||||||
|
kind = "AlertmanagerConfig",
|
||||||
|
plural = "alertmanagerconfigs",
|
||||||
|
namespaced
|
||||||
|
)]
|
||||||
|
pub struct AlertmanagerConfigSpec {
|
||||||
|
#[serde(flatten)]
|
||||||
|
pub data: serde_json::Value,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize)]
|
||||||
|
pub struct RHOBObservability {
|
||||||
|
pub namespace: String,
|
||||||
|
pub client: Arc<K8sClient>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AlertSender for RHOBObservability {
|
||||||
|
fn name(&self) -> String {
|
||||||
|
"RHOBAlertManager".to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Clone for Box<dyn AlertReceiver<RHOBObservability>> {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
self.clone_box()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Serialize for Box<dyn AlertReceiver<RHOBObservability>> {
|
||||||
|
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||||
|
where
|
||||||
|
S: serde::Serializer,
|
||||||
|
{
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,52 @@
|
|||||||
|
use kube::CustomResource;
|
||||||
|
use schemars::JsonSchema;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use super::crd_prometheuses::LabelSelector;
|
||||||
|
|
||||||
|
/// Rust CRD for `Alertmanager` from Prometheus Operator
|
||||||
|
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||||
|
#[kube(
|
||||||
|
group = "monitoring.rhobs",
|
||||||
|
version = "v1",
|
||||||
|
kind = "Alertmanager",
|
||||||
|
plural = "alertmanagers",
|
||||||
|
namespaced
|
||||||
|
)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct AlertmanagerSpec {
|
||||||
|
/// Number of replicas for HA
|
||||||
|
pub replicas: i32,
|
||||||
|
|
||||||
|
/// Selectors for AlertmanagerConfig CRDs
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub alertmanager_config_selector: Option<LabelSelector>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub alertmanager_config_namespace_selector: Option<LabelSelector>,
|
||||||
|
|
||||||
|
/// Optional pod template metadata (annotations, labels)
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub pod_metadata: Option<LabelSelector>,
|
||||||
|
|
||||||
|
/// Optional topology spread settings
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub version: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for AlertmanagerSpec {
|
||||||
|
fn default() -> Self {
|
||||||
|
AlertmanagerSpec {
|
||||||
|
replicas: 1,
|
||||||
|
|
||||||
|
// Match all AlertmanagerConfigs in the same namespace
|
||||||
|
alertmanager_config_namespace_selector: None,
|
||||||
|
|
||||||
|
// Empty selector matches all AlertmanagerConfigs in that namespace
|
||||||
|
alertmanager_config_selector: Some(LabelSelector::default()),
|
||||||
|
|
||||||
|
pod_metadata: None,
|
||||||
|
version: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,22 @@
|
|||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use non_blank_string_rs::NonBlankString;
|
||||||
|
|
||||||
|
use crate::modules::helm::chart::HelmChartScore;
|
||||||
|
//TODO package chart or something for COO okd
|
||||||
|
pub fn rhob_cluster_observability_operator() -> HelmChartScore {
|
||||||
|
HelmChartScore {
|
||||||
|
namespace: None,
|
||||||
|
release_name: NonBlankString::from_str("").unwrap(),
|
||||||
|
chart_name: NonBlankString::from_str(
|
||||||
|
"oci://hub.nationtech.io/harmony/nt-prometheus-operator",
|
||||||
|
)
|
||||||
|
.unwrap(),
|
||||||
|
chart_version: None,
|
||||||
|
values_overrides: None,
|
||||||
|
values_yaml: None,
|
||||||
|
create_namespace: true,
|
||||||
|
install_only: true,
|
||||||
|
repository: None,
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,26 @@
|
|||||||
|
use crate::modules::{
|
||||||
|
monitoring::kube_prometheus::crd::rhob_prometheus_rules::Rule,
|
||||||
|
prometheus::alerts::k8s::{
|
||||||
|
deployment::alert_deployment_unavailable,
|
||||||
|
pod::{alert_container_restarting, alert_pod_not_ready, pod_failed},
|
||||||
|
pvc::high_pvc_fill_rate_over_two_days,
|
||||||
|
service::alert_service_down,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn build_default_application_rules() -> Vec<Rule> {
|
||||||
|
let pod_failed: Rule = pod_failed().into();
|
||||||
|
let container_restarting: Rule = alert_container_restarting().into();
|
||||||
|
let pod_not_ready: Rule = alert_pod_not_ready().into();
|
||||||
|
let service_down: Rule = alert_service_down().into();
|
||||||
|
let deployment_unavailable: Rule = alert_deployment_unavailable().into();
|
||||||
|
let high_pvc_fill_rate: Rule = high_pvc_fill_rate_over_two_days().into();
|
||||||
|
vec![
|
||||||
|
pod_failed,
|
||||||
|
container_restarting,
|
||||||
|
pod_not_ready,
|
||||||
|
service_down,
|
||||||
|
deployment_unavailable,
|
||||||
|
high_pvc_fill_rate,
|
||||||
|
]
|
||||||
|
}
|
@ -0,0 +1,153 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
|
use kube::CustomResource;
|
||||||
|
use schemars::JsonSchema;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::modules::monitoring::kube_prometheus::crd::rhob_prometheuses::LabelSelector;
|
||||||
|
|
||||||
|
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||||
|
#[kube(
|
||||||
|
group = "grafana.integreatly.org",
|
||||||
|
version = "v1beta1",
|
||||||
|
kind = "Grafana",
|
||||||
|
plural = "grafanas",
|
||||||
|
namespaced
|
||||||
|
)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct GrafanaSpec {
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub config: Option<GrafanaConfig>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub admin_user: Option<String>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub admin_password: Option<String>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub ingress: Option<GrafanaIngress>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub persistence: Option<GrafanaPersistence>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub resources: Option<ResourceRequirements>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct GrafanaConfig {
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub log: Option<GrafanaLogConfig>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub security: Option<GrafanaSecurityConfig>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct GrafanaLogConfig {
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub mode: Option<String>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub level: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct GrafanaSecurityConfig {
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub admin_user: Option<String>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub admin_password: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct GrafanaIngress {
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub enabled: Option<bool>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub hosts: Option<Vec<String>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct GrafanaPersistence {
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub enabled: Option<bool>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub storage_class_name: Option<String>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub size: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||||
|
#[kube(
|
||||||
|
group = "grafana.integreatly.org",
|
||||||
|
version = "v1beta1",
|
||||||
|
kind = "GrafanaDashboard",
|
||||||
|
plural = "grafanadashboards",
|
||||||
|
namespaced
|
||||||
|
)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct GrafanaDashboardSpec {
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub resync_period: Option<String>,
|
||||||
|
|
||||||
|
pub instance_selector: LabelSelector,
|
||||||
|
|
||||||
|
pub json: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||||
|
#[kube(
|
||||||
|
group = "grafana.integreatly.org",
|
||||||
|
version = "v1beta1",
|
||||||
|
kind = "GrafanaDatasource",
|
||||||
|
plural = "grafanadatasources",
|
||||||
|
namespaced
|
||||||
|
)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct GrafanaDatasourceSpec {
|
||||||
|
pub instance_selector: LabelSelector,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub allow_cross_namespace_import: Option<bool>,
|
||||||
|
|
||||||
|
pub datasource: GrafanaDatasourceConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct GrafanaDatasourceConfig {
|
||||||
|
pub access: String,
|
||||||
|
pub database: Option<String>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub json_data: Option<BTreeMap<String, String>>,
|
||||||
|
pub name: String,
|
||||||
|
pub r#type: String,
|
||||||
|
pub url: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct ResourceRequirements {
|
||||||
|
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
|
||||||
|
pub limits: BTreeMap<String, String>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
|
||||||
|
pub requests: BTreeMap<String, String>,
|
||||||
|
}
|
@ -0,0 +1,41 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
|
use kube::CustomResource;
|
||||||
|
use schemars::JsonSchema;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::modules::monitoring::kube_prometheus::crd::rhob_prometheuses::LabelSelector;
|
||||||
|
|
||||||
|
/// MonitoringStack CRD for monitoring.rhobs/v1alpha1
|
||||||
|
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||||
|
#[kube(
|
||||||
|
group = "monitoring.rhobs",
|
||||||
|
version = "v1alpha1",
|
||||||
|
kind = "MonitoringStack",
|
||||||
|
plural = "monitoringstacks",
|
||||||
|
namespaced
|
||||||
|
)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct MonitoringStackSpec {
|
||||||
|
/// Verbosity of logs (e.g. "debug", "info", "warn", "error").
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub log_level: Option<String>,
|
||||||
|
|
||||||
|
/// Retention period for Prometheus TSDB data (e.g. "1d").
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub retention: Option<String>,
|
||||||
|
|
||||||
|
/// Resource selector for workloads monitored by this stack.
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub resource_selector: Option<LabelSelector>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for MonitoringStackSpec {
|
||||||
|
fn default() -> Self {
|
||||||
|
MonitoringStackSpec {
|
||||||
|
log_level: Some("info".into()),
|
||||||
|
retention: Some("7d".into()),
|
||||||
|
resource_selector: Some(LabelSelector::default()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,57 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
|
use kube::CustomResource;
|
||||||
|
use schemars::JsonSchema;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
|
||||||
|
|
||||||
|
#[derive(CustomResource, Debug, Serialize, Deserialize, Clone, JsonSchema)]
|
||||||
|
#[kube(
|
||||||
|
group = "monitoring.rhobs",
|
||||||
|
version = "v1",
|
||||||
|
kind = "PrometheusRule",
|
||||||
|
plural = "prometheusrules",
|
||||||
|
namespaced
|
||||||
|
)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct PrometheusRuleSpec {
|
||||||
|
pub groups: Vec<RuleGroup>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
|
||||||
|
pub struct RuleGroup {
|
||||||
|
pub name: String,
|
||||||
|
pub rules: Vec<Rule>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct Rule {
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub alert: Option<String>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub expr: Option<String>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub for_: Option<String>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub labels: Option<std::collections::BTreeMap<String, String>>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub annotations: Option<std::collections::BTreeMap<String, String>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<PrometheusAlertRule> for Rule {
|
||||||
|
fn from(value: PrometheusAlertRule) -> Self {
|
||||||
|
Rule {
|
||||||
|
alert: Some(value.alert),
|
||||||
|
expr: Some(value.expr),
|
||||||
|
for_: value.r#for,
|
||||||
|
labels: Some(value.labels.into_iter().collect::<BTreeMap<_, _>>()),
|
||||||
|
annotations: Some(value.annotations.into_iter().collect::<BTreeMap<_, _>>()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,118 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
|
use kube::CustomResource;
|
||||||
|
use schemars::JsonSchema;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::modules::monitoring::kube_prometheus::types::Operator;
|
||||||
|
|
||||||
|
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||||
|
#[kube(
|
||||||
|
group = "monitoring.rhobs",
|
||||||
|
version = "v1",
|
||||||
|
kind = "Prometheus",
|
||||||
|
plural = "prometheuses",
|
||||||
|
namespaced
|
||||||
|
)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct PrometheusSpec {
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub alerting: Option<PrometheusSpecAlerting>,
|
||||||
|
|
||||||
|
pub service_account_name: String,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub service_monitor_namespace_selector: Option<LabelSelector>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub service_monitor_selector: Option<LabelSelector>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub service_discovery_role: Option<String>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub pod_monitor_selector: Option<LabelSelector>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub rule_selector: Option<LabelSelector>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub rule_namespace_selector: Option<LabelSelector>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct NamespaceSelector {
|
||||||
|
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||||
|
pub match_names: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Contains alerting configuration, specifically Alertmanager endpoints.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||||
|
pub struct PrometheusSpecAlerting {
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub alertmanagers: Option<Vec<AlertmanagerEndpoints>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents an Alertmanager endpoint configuration used by Prometheus.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||||
|
pub struct AlertmanagerEndpoints {
|
||||||
|
/// Name of the Alertmanager Service.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub name: Option<String>,
|
||||||
|
|
||||||
|
/// Namespace of the Alertmanager Service.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub namespace: Option<String>,
|
||||||
|
|
||||||
|
/// Port to access on the Alertmanager Service (e.g. "web").
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub port: Option<String>,
|
||||||
|
|
||||||
|
/// Scheme to use for connecting (e.g. "http").
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub scheme: Option<String>,
|
||||||
|
// Other fields like `tls_config`, `path_prefix`, etc., can be added if needed.
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct LabelSelector {
|
||||||
|
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
|
||||||
|
pub match_labels: BTreeMap<String, String>,
|
||||||
|
|
||||||
|
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||||
|
pub match_expressions: Vec<LabelSelectorRequirement>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct LabelSelectorRequirement {
|
||||||
|
pub key: String,
|
||||||
|
pub operator: Operator,
|
||||||
|
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||||
|
pub values: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for PrometheusSpec {
|
||||||
|
fn default() -> Self {
|
||||||
|
PrometheusSpec {
|
||||||
|
alerting: None,
|
||||||
|
|
||||||
|
service_account_name: "prometheus".into(),
|
||||||
|
|
||||||
|
// null means "only my namespace"
|
||||||
|
service_monitor_namespace_selector: None,
|
||||||
|
|
||||||
|
// empty selector means match all ServiceMonitors in that namespace
|
||||||
|
service_monitor_selector: Some(LabelSelector::default()),
|
||||||
|
|
||||||
|
service_discovery_role: Some("Endpoints".into()),
|
||||||
|
|
||||||
|
pod_monitor_selector: None,
|
||||||
|
|
||||||
|
rule_selector: None,
|
||||||
|
|
||||||
|
rule_namespace_selector: Some(LabelSelector::default()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,62 @@
|
|||||||
|
use k8s_openapi::api::{
|
||||||
|
core::v1::ServiceAccount,
|
||||||
|
rbac::v1::{PolicyRule, Role, RoleBinding, RoleRef, Subject},
|
||||||
|
};
|
||||||
|
use kube::api::ObjectMeta;
|
||||||
|
|
||||||
|
pub fn build_prom_role(role_name: String, namespace: String) -> Role {
|
||||||
|
Role {
|
||||||
|
metadata: ObjectMeta {
|
||||||
|
name: Some(role_name),
|
||||||
|
namespace: Some(namespace),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
rules: Some(vec![PolicyRule {
|
||||||
|
api_groups: Some(vec!["".into()]), // core API group
|
||||||
|
resources: Some(vec!["services".into(), "endpoints".into(), "pods".into()]),
|
||||||
|
verbs: vec!["get".into(), "list".into(), "watch".into()],
|
||||||
|
..Default::default()
|
||||||
|
}]),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build_prom_rolebinding(
|
||||||
|
role_name: String,
|
||||||
|
namespace: String,
|
||||||
|
service_account_name: String,
|
||||||
|
) -> RoleBinding {
|
||||||
|
RoleBinding {
|
||||||
|
metadata: ObjectMeta {
|
||||||
|
name: Some(format!("{}-rolebinding", role_name)),
|
||||||
|
namespace: Some(namespace.clone()),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
role_ref: RoleRef {
|
||||||
|
api_group: "rbac.authorization.k8s.io".into(),
|
||||||
|
kind: "Role".into(),
|
||||||
|
name: role_name,
|
||||||
|
},
|
||||||
|
subjects: Some(vec![Subject {
|
||||||
|
kind: "ServiceAccount".into(),
|
||||||
|
name: service_account_name,
|
||||||
|
namespace: Some(namespace.clone()),
|
||||||
|
..Default::default()
|
||||||
|
}]),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build_prom_service_account(
|
||||||
|
service_account_name: String,
|
||||||
|
namespace: String,
|
||||||
|
) -> ServiceAccount {
|
||||||
|
ServiceAccount {
|
||||||
|
automount_service_account_token: None,
|
||||||
|
image_pull_secrets: None,
|
||||||
|
metadata: ObjectMeta {
|
||||||
|
name: Some(service_account_name),
|
||||||
|
namespace: Some(namespace),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
secrets: None,
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,87 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use kube::CustomResource;
|
||||||
|
use schemars::JsonSchema;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::modules::monitoring::kube_prometheus::types::{
|
||||||
|
HTTPScheme, MatchExpression, NamespaceSelector, Operator, Selector,
|
||||||
|
ServiceMonitor as KubeServiceMonitor, ServiceMonitorEndpoint,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// This is the top-level struct for the ServiceMonitor Custom Resource.
|
||||||
|
/// The `#[derive(CustomResource)]` macro handles all the boilerplate for you,
|
||||||
|
/// including the `impl Resource`.
|
||||||
|
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||||
|
#[kube(
|
||||||
|
group = "monitoring.rhobs",
|
||||||
|
version = "v1",
|
||||||
|
kind = "ServiceMonitor",
|
||||||
|
plural = "servicemonitors",
|
||||||
|
namespaced
|
||||||
|
)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct ServiceMonitorSpec {
|
||||||
|
/// A label selector to select services to monitor.
|
||||||
|
pub selector: Selector,
|
||||||
|
|
||||||
|
/// A list of endpoints on the selected services to be monitored.
|
||||||
|
pub endpoints: Vec<ServiceMonitorEndpoint>,
|
||||||
|
|
||||||
|
/// Selector to select which namespaces the Kubernetes Endpoints objects
|
||||||
|
/// are discovered from.
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub namespace_selector: Option<NamespaceSelector>,
|
||||||
|
|
||||||
|
/// The label to use to retrieve the job name from.
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub job_label: Option<String>,
|
||||||
|
|
||||||
|
/// Pod-based target labels to transfer from the Kubernetes Pod onto the target.
|
||||||
|
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||||
|
pub pod_target_labels: Vec<String>,
|
||||||
|
|
||||||
|
/// TargetLabels transfers labels on the Kubernetes Service object to the target.
|
||||||
|
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||||
|
pub target_labels: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for ServiceMonitorSpec {
|
||||||
|
fn default() -> Self {
|
||||||
|
let labels = HashMap::new();
|
||||||
|
Self {
|
||||||
|
selector: Selector {
|
||||||
|
match_labels: { labels },
|
||||||
|
match_expressions: vec![MatchExpression {
|
||||||
|
key: "app.kubernetes.io/name".into(),
|
||||||
|
operator: Operator::Exists,
|
||||||
|
values: vec![],
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
endpoints: vec![ServiceMonitorEndpoint {
|
||||||
|
port: Some("http".to_string()),
|
||||||
|
path: Some("/metrics".into()),
|
||||||
|
interval: Some("30s".into()),
|
||||||
|
scheme: Some(HTTPScheme::HTTP),
|
||||||
|
..Default::default()
|
||||||
|
}],
|
||||||
|
namespace_selector: None, // only the same namespace
|
||||||
|
job_label: Some("app".into()),
|
||||||
|
pod_target_labels: vec![],
|
||||||
|
target_labels: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<KubeServiceMonitor> for ServiceMonitorSpec {
|
||||||
|
fn from(value: KubeServiceMonitor) -> Self {
|
||||||
|
Self {
|
||||||
|
selector: value.selector,
|
||||||
|
endpoints: value.endpoints,
|
||||||
|
namespace_selector: value.namespace_selector,
|
||||||
|
job_label: value.job_label,
|
||||||
|
pod_target_labels: value.pod_target_labels,
|
||||||
|
target_labels: value.target_labels,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -2,3 +2,4 @@ pub mod alerts;
|
|||||||
pub mod k8s_prometheus_alerting_score;
|
pub mod k8s_prometheus_alerting_score;
|
||||||
#[allow(clippy::module_inception)]
|
#[allow(clippy::module_inception)]
|
||||||
pub mod prometheus;
|
pub mod prometheus;
|
||||||
|
pub mod rhob_alerting_score;
|
||||||
|
486
harmony/src/modules/prometheus/rhob_alerting_score.rs
Normal file
486
harmony/src/modules/prometheus/rhob_alerting_score.rs
Normal file
@ -0,0 +1,486 @@
|
|||||||
|
use std::fs;
|
||||||
|
use std::{collections::BTreeMap, sync::Arc};
|
||||||
|
use tempfile::tempdir;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use kube::api::ObjectMeta;
|
||||||
|
use log::{debug, info};
|
||||||
|
use serde::Serialize;
|
||||||
|
use std::process::Command;
|
||||||
|
|
||||||
|
use crate::modules::monitoring::kube_prometheus::crd::grafana_default_dashboard::build_default_dashboard;
|
||||||
|
use crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::RHOBObservability;
|
||||||
|
use crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanagers::{
|
||||||
|
Alertmanager, AlertmanagerSpec,
|
||||||
|
};
|
||||||
|
use crate::modules::monitoring::kube_prometheus::crd::rhob_grafana::{
|
||||||
|
Grafana, GrafanaDashboard, GrafanaDashboardSpec, GrafanaDatasource, GrafanaDatasourceConfig,
|
||||||
|
GrafanaDatasourceSpec, GrafanaSpec,
|
||||||
|
};
|
||||||
|
use crate::modules::monitoring::kube_prometheus::crd::rhob_monitoring_stack::{
|
||||||
|
MonitoringStack, MonitoringStackSpec,
|
||||||
|
};
|
||||||
|
use crate::modules::monitoring::kube_prometheus::crd::rhob_prometheus_rules::{
|
||||||
|
PrometheusRule, PrometheusRuleSpec, RuleGroup,
|
||||||
|
};
|
||||||
|
use crate::modules::monitoring::kube_prometheus::crd::rhob_prometheuses::LabelSelector;
|
||||||
|
|
||||||
|
use crate::modules::monitoring::kube_prometheus::crd::rhob_service_monitor::{
|
||||||
|
ServiceMonitor, ServiceMonitorSpec,
|
||||||
|
};
|
||||||
|
use crate::score::Score;
|
||||||
|
use crate::topology::oberservability::monitoring::AlertReceiver;
|
||||||
|
use crate::topology::{K8sclient, Topology, k8s::K8sClient};
|
||||||
|
use crate::{
|
||||||
|
data::Version,
|
||||||
|
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||||
|
inventory::Inventory,
|
||||||
|
};
|
||||||
|
use harmony_types::id::Id;
|
||||||
|
|
||||||
|
use super::prometheus::PrometheusApplicationMonitoring;
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize)]
|
||||||
|
pub struct RHOBAlertingScore {
|
||||||
|
pub sender: RHOBObservability,
|
||||||
|
pub receivers: Vec<Box<dyn AlertReceiver<RHOBObservability>>>,
|
||||||
|
pub service_monitors: Vec<ServiceMonitor>,
|
||||||
|
pub prometheus_rules: Vec<RuleGroup>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Topology + K8sclient + PrometheusApplicationMonitoring<RHOBObservability>> Score<T>
|
||||||
|
for RHOBAlertingScore
|
||||||
|
{
|
||||||
|
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
|
||||||
|
Box::new(RHOBAlertingInterpret {
|
||||||
|
sender: self.sender.clone(),
|
||||||
|
receivers: self.receivers.clone(),
|
||||||
|
service_monitors: self.service_monitors.clone(),
|
||||||
|
prometheus_rules: self.prometheus_rules.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn name(&self) -> String {
|
||||||
|
"RHOB alerting [RHOBAlertingScore]".into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct RHOBAlertingInterpret {
|
||||||
|
pub sender: RHOBObservability,
|
||||||
|
pub receivers: Vec<Box<dyn AlertReceiver<RHOBObservability>>>,
|
||||||
|
pub service_monitors: Vec<ServiceMonitor>,
|
||||||
|
pub prometheus_rules: Vec<RuleGroup>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl<T: Topology + K8sclient + PrometheusApplicationMonitoring<RHOBObservability>> Interpret<T>
|
||||||
|
for RHOBAlertingInterpret
|
||||||
|
{
|
||||||
|
async fn execute(
|
||||||
|
&self,
|
||||||
|
_inventory: &Inventory,
|
||||||
|
topology: &T,
|
||||||
|
) -> Result<Outcome, InterpretError> {
|
||||||
|
let client = topology.k8s_client().await.unwrap();
|
||||||
|
self.ensure_grafana_operator().await?;
|
||||||
|
self.install_prometheus(&client).await?;
|
||||||
|
self.install_client_kube_metrics().await?;
|
||||||
|
self.install_grafana(&client).await?;
|
||||||
|
self.install_receivers(&self.sender, &self.receivers)
|
||||||
|
.await?;
|
||||||
|
self.install_rules(&self.prometheus_rules, &client).await?;
|
||||||
|
self.install_monitors(self.service_monitors.clone(), &client)
|
||||||
|
.await?;
|
||||||
|
Ok(Outcome::success(
|
||||||
|
"K8s monitoring components installed".to_string(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_name(&self) -> InterpretName {
|
||||||
|
InterpretName::RHOBAlerting
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_version(&self) -> Version {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_status(&self) -> InterpretStatus {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_children(&self) -> Vec<Id> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RHOBAlertingInterpret {
|
||||||
|
async fn crd_exists(&self, crd: &str) -> bool {
|
||||||
|
let status = Command::new("sh")
|
||||||
|
.args(["-c", &format!("kubectl get crd -A | grep -i {crd}")])
|
||||||
|
.status()
|
||||||
|
.map_err(|e| InterpretError::new(format!("could not connect to cluster: {}", e)))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
status.success()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn install_chart(
|
||||||
|
&self,
|
||||||
|
chart_path: String,
|
||||||
|
chart_name: String,
|
||||||
|
) -> Result<(), InterpretError> {
|
||||||
|
let temp_dir =
|
||||||
|
tempdir().map_err(|e| InterpretError::new(format!("Tempdir error: {}", e)))?;
|
||||||
|
let temp_path = temp_dir.path().to_path_buf();
|
||||||
|
debug!("Using temp directory: {}", temp_path.display());
|
||||||
|
let chart = format!("{}/{}", chart_path, chart_name);
|
||||||
|
let pull_output = Command::new("helm")
|
||||||
|
.args(["pull", &chart, "--destination", temp_path.to_str().unwrap()])
|
||||||
|
.output()
|
||||||
|
.map_err(|e| InterpretError::new(format!("Helm pull error: {}", e)))?;
|
||||||
|
|
||||||
|
if !pull_output.status.success() {
|
||||||
|
return Err(InterpretError::new(format!(
|
||||||
|
"Helm pull failed: {}",
|
||||||
|
String::from_utf8_lossy(&pull_output.stderr)
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let tgz_path = fs::read_dir(&temp_path)
|
||||||
|
.unwrap()
|
||||||
|
.filter_map(|entry| {
|
||||||
|
let entry = entry.ok()?;
|
||||||
|
let path = entry.path();
|
||||||
|
if path.extension()? == "tgz" {
|
||||||
|
Some(path)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| InterpretError::new("Could not find pulled Helm chart".into()))?;
|
||||||
|
|
||||||
|
debug!("Installing chart from: {}", tgz_path.display());
|
||||||
|
|
||||||
|
let install_output = Command::new("helm")
|
||||||
|
.args([
|
||||||
|
"upgrade",
|
||||||
|
"--install",
|
||||||
|
&chart_name,
|
||||||
|
tgz_path.to_str().unwrap(),
|
||||||
|
"--namespace",
|
||||||
|
&self.sender.namespace.clone(),
|
||||||
|
"--create-namespace",
|
||||||
|
"--wait",
|
||||||
|
"--atomic",
|
||||||
|
])
|
||||||
|
.output()
|
||||||
|
.map_err(|e| InterpretError::new(format!("Helm install error: {}", e)))?;
|
||||||
|
|
||||||
|
if !install_output.status.success() {
|
||||||
|
return Err(InterpretError::new(format!(
|
||||||
|
"Helm install failed: {}",
|
||||||
|
String::from_utf8_lossy(&install_output.stderr)
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"Installed chart {}/{} in namespace: {}",
|
||||||
|
&chart_path,
|
||||||
|
&chart_name,
|
||||||
|
self.sender.namespace.clone()
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn ensure_grafana_operator(&self) -> Result<Outcome, InterpretError> {
|
||||||
|
let _ = Command::new("helm")
|
||||||
|
.args([
|
||||||
|
"repo",
|
||||||
|
"add",
|
||||||
|
"grafana-operator",
|
||||||
|
"https://grafana.github.io/helm-charts",
|
||||||
|
])
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let _ = Command::new("helm")
|
||||||
|
.args(["repo", "update"])
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let output = Command::new("helm")
|
||||||
|
.args([
|
||||||
|
"install",
|
||||||
|
"grafana-operator",
|
||||||
|
"grafana-operator/grafana-operator",
|
||||||
|
"--namespace",
|
||||||
|
&self.sender.namespace.clone(),
|
||||||
|
"--create-namespace",
|
||||||
|
"--set",
|
||||||
|
"namespaceScope=true",
|
||||||
|
])
|
||||||
|
.output()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
if !output.status.success() {
|
||||||
|
return Err(InterpretError::new(format!(
|
||||||
|
"helm install failed:\nstdout: {}\nstderr: {}",
|
||||||
|
String::from_utf8_lossy(&output.stdout),
|
||||||
|
String::from_utf8_lossy(&output.stderr)
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Outcome::success(format!(
|
||||||
|
"installed grafana operator in ns {}",
|
||||||
|
self.sender.namespace.clone()
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn install_prometheus(&self, client: &Arc<K8sClient>) -> Result<Outcome, InterpretError> {
|
||||||
|
debug!(
|
||||||
|
"installing crd-prometheuses in namespace {}",
|
||||||
|
self.sender.namespace.clone()
|
||||||
|
);
|
||||||
|
|
||||||
|
let stack = MonitoringStack {
|
||||||
|
metadata: ObjectMeta {
|
||||||
|
name: Some(format!("{}-monitoring", self.sender.namespace.clone()).into()),
|
||||||
|
namespace: Some(self.sender.namespace.clone()),
|
||||||
|
labels: Some([("coo".into(), "example".into())].into()),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
spec: MonitoringStackSpec {
|
||||||
|
log_level: Some("debug".into()),
|
||||||
|
retention: Some("1d".into()),
|
||||||
|
resource_selector: Some(LabelSelector {
|
||||||
|
match_labels: [("app".into(), "demo".into())].into(),
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
client
|
||||||
|
.apply(&stack, Some(&self.sender.namespace.clone()))
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||||
|
info!("installed rhob monitoring stack",);
|
||||||
|
Ok(Outcome::success(format!(
|
||||||
|
"successfully deployed rhob-prometheus {:#?}",
|
||||||
|
stack
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn install_alert_manager(
|
||||||
|
&self,
|
||||||
|
client: &Arc<K8sClient>,
|
||||||
|
) -> Result<Outcome, InterpretError> {
|
||||||
|
let am = Alertmanager {
|
||||||
|
metadata: ObjectMeta {
|
||||||
|
name: Some(self.sender.namespace.clone()),
|
||||||
|
labels: Some(std::collections::BTreeMap::from([(
|
||||||
|
"alertmanagerConfig".to_string(),
|
||||||
|
"enabled".to_string(),
|
||||||
|
)])),
|
||||||
|
namespace: Some(self.sender.namespace.clone()),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
spec: AlertmanagerSpec::default(),
|
||||||
|
};
|
||||||
|
client
|
||||||
|
.apply(&am, Some(&self.sender.namespace.clone()))
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||||
|
Ok(Outcome::success(format!(
|
||||||
|
"successfully deployed service monitor {:#?}",
|
||||||
|
am.metadata.name
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
async fn install_monitors(
|
||||||
|
&self,
|
||||||
|
mut monitors: Vec<ServiceMonitor>,
|
||||||
|
client: &Arc<K8sClient>,
|
||||||
|
) -> Result<Outcome, InterpretError> {
|
||||||
|
let default_service_monitor = ServiceMonitor {
|
||||||
|
metadata: ObjectMeta {
|
||||||
|
name: Some(self.sender.namespace.clone()),
|
||||||
|
labels: Some(std::collections::BTreeMap::from([
|
||||||
|
("alertmanagerConfig".to_string(), "enabled".to_string()),
|
||||||
|
("client".to_string(), "prometheus".to_string()),
|
||||||
|
(
|
||||||
|
"app.kubernetes.io/name".to_string(),
|
||||||
|
"kube-state-metrics".to_string(),
|
||||||
|
),
|
||||||
|
])),
|
||||||
|
namespace: Some(self.sender.namespace.clone()),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
spec: ServiceMonitorSpec::default(),
|
||||||
|
};
|
||||||
|
monitors.push(default_service_monitor);
|
||||||
|
for monitor in monitors.iter() {
|
||||||
|
client
|
||||||
|
.apply(monitor, Some(&self.sender.namespace.clone()))
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||||
|
}
|
||||||
|
Ok(Outcome::success(
|
||||||
|
"succesfully deployed service monitors".to_string(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn install_rules(
|
||||||
|
&self,
|
||||||
|
#[allow(clippy::ptr_arg)] rules: &Vec<RuleGroup>,
|
||||||
|
client: &Arc<K8sClient>,
|
||||||
|
) -> Result<Outcome, InterpretError> {
|
||||||
|
let mut prom_rule_spec = PrometheusRuleSpec {
|
||||||
|
groups: rules.clone(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let default_rules_group = RuleGroup {
|
||||||
|
name: "default-rules".to_string(),
|
||||||
|
rules: crate::modules::monitoring::kube_prometheus::crd::rhob_default_rules::build_default_application_rules(),
|
||||||
|
};
|
||||||
|
|
||||||
|
prom_rule_spec.groups.push(default_rules_group);
|
||||||
|
let prom_rules = PrometheusRule {
|
||||||
|
metadata: ObjectMeta {
|
||||||
|
name: Some(self.sender.namespace.clone()),
|
||||||
|
labels: Some(std::collections::BTreeMap::from([
|
||||||
|
("alertmanagerConfig".to_string(), "enabled".to_string()),
|
||||||
|
("role".to_string(), "prometheus-rule".to_string()),
|
||||||
|
])),
|
||||||
|
namespace: Some(self.sender.namespace.clone()),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
spec: prom_rule_spec,
|
||||||
|
};
|
||||||
|
client
|
||||||
|
.apply(&prom_rules, Some(&self.sender.namespace.clone()))
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||||
|
Ok(Outcome::success(format!(
|
||||||
|
"successfully deployed rules {:#?}",
|
||||||
|
prom_rules.metadata.name
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn install_client_kube_metrics(&self) -> Result<Outcome, InterpretError> {
|
||||||
|
self.install_chart(
|
||||||
|
"oci://hub.nationtech.io/harmony".to_string(),
|
||||||
|
"nt-kube-metrics".to_string(),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok(Outcome::success(format!(
|
||||||
|
"Installed client kube metrics in ns {}",
|
||||||
|
&self.sender.namespace.clone()
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn install_grafana(&self, client: &Arc<K8sClient>) -> Result<Outcome, InterpretError> {
|
||||||
|
let mut label = BTreeMap::new();
|
||||||
|
label.insert("dashboards".to_string(), "grafana".to_string());
|
||||||
|
let labels = LabelSelector {
|
||||||
|
match_labels: label.clone(),
|
||||||
|
match_expressions: vec![],
|
||||||
|
};
|
||||||
|
let mut json_data = BTreeMap::new();
|
||||||
|
json_data.insert("timeInterval".to_string(), "5s".to_string());
|
||||||
|
let namespace = self.sender.namespace.clone();
|
||||||
|
|
||||||
|
let json = build_default_dashboard(&namespace);
|
||||||
|
|
||||||
|
let graf_data_source = GrafanaDatasource {
|
||||||
|
metadata: ObjectMeta {
|
||||||
|
name: Some(format!(
|
||||||
|
"grafana-datasource-{}",
|
||||||
|
self.sender.namespace.clone()
|
||||||
|
)),
|
||||||
|
namespace: Some(self.sender.namespace.clone()),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
spec: GrafanaDatasourceSpec {
|
||||||
|
instance_selector: labels.clone(),
|
||||||
|
allow_cross_namespace_import: Some(false),
|
||||||
|
datasource: GrafanaDatasourceConfig {
|
||||||
|
access: "proxy".to_string(),
|
||||||
|
database: Some("prometheus".to_string()),
|
||||||
|
json_data: Some(json_data),
|
||||||
|
//this is fragile
|
||||||
|
name: format!("prometheus-{}-0", self.sender.namespace.clone()),
|
||||||
|
r#type: "prometheus".to_string(),
|
||||||
|
url: format!(
|
||||||
|
"http://prometheus-operated.{}.svc.cluster.local:9090",
|
||||||
|
self.sender.namespace.clone()
|
||||||
|
),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
client
|
||||||
|
.apply(&graf_data_source, Some(&self.sender.namespace.clone()))
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||||
|
|
||||||
|
let graf_dashboard = GrafanaDashboard {
|
||||||
|
metadata: ObjectMeta {
|
||||||
|
name: Some(format!(
|
||||||
|
"grafana-dashboard-{}",
|
||||||
|
self.sender.namespace.clone()
|
||||||
|
)),
|
||||||
|
namespace: Some(self.sender.namespace.clone()),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
spec: GrafanaDashboardSpec {
|
||||||
|
resync_period: Some("30s".to_string()),
|
||||||
|
instance_selector: labels.clone(),
|
||||||
|
json,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
client
|
||||||
|
.apply(&graf_dashboard, Some(&self.sender.namespace.clone()))
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||||
|
|
||||||
|
let grafana = Grafana {
|
||||||
|
metadata: ObjectMeta {
|
||||||
|
name: Some(format!("grafana-{}", self.sender.namespace.clone())),
|
||||||
|
namespace: Some(self.sender.namespace.clone()),
|
||||||
|
labels: Some(label.clone()),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
spec: GrafanaSpec {
|
||||||
|
config: None,
|
||||||
|
admin_user: None,
|
||||||
|
admin_password: None,
|
||||||
|
ingress: None,
|
||||||
|
persistence: None,
|
||||||
|
resources: None,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
client
|
||||||
|
.apply(&grafana, Some(&self.sender.namespace.clone()))
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||||
|
Ok(Outcome::success(format!(
|
||||||
|
"successfully deployed grafana instance {:#?}",
|
||||||
|
grafana.metadata.name
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn install_receivers(
|
||||||
|
&self,
|
||||||
|
sender: &RHOBObservability,
|
||||||
|
receivers: &Vec<Box<dyn AlertReceiver<RHOBObservability>>>,
|
||||||
|
) -> Result<Outcome, InterpretError> {
|
||||||
|
for receiver in receivers.iter() {
|
||||||
|
receiver.install(sender).await.map_err(|err| {
|
||||||
|
InterpretError::new(format!("failed to install receiver: {}", err))
|
||||||
|
})?;
|
||||||
|
}
|
||||||
|
Ok(Outcome::success("successfully deployed receivers".into()))
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user