added a monitoring stack that works with openshift/okd. Okd needs to use the cluster observability operator in order to deploy namespaced prometheuses and alertmanagers #134
19
Cargo.lock
generated
19
Cargo.lock
generated
@ -4615,6 +4615,21 @@ dependencies = [
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rhob-application-monitoring"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"env_logger",
|
||||
"harmony",
|
||||
"harmony_cli",
|
||||
"harmony_macros",
|
||||
"harmony_types",
|
||||
"log",
|
||||
"tokio",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.17.14"
|
||||
@ -6273,6 +6288,10 @@ version = "0.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
|
||||
|
||||
[[package]]
|
||||
name = "try_rust_webapp"
|
||||
version = "0.1.0"
|
||||
|
||||
[[package]]
|
||||
name = "tui-logger"
|
||||
version = "0.14.5"
|
||||
|
17
examples/rhob_application_monitoring/Cargo.toml
Normal file
17
examples/rhob_application_monitoring/Cargo.toml
Normal file
@ -0,0 +1,17 @@
|
||||
[package]
|
||||
name = "rhob-application-monitoring"
|
||||
edition = "2024"
|
||||
version.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
harmony = { path = "../../harmony" }
|
||||
harmony_cli = { path = "../../harmony_cli" }
|
||||
harmony_types = { path = "../../harmony_types" }
|
||||
harmony_macros = { path = "../../harmony_macros" }
|
||||
tokio = { workspace = true }
|
||||
log = { workspace = true }
|
||||
env_logger = { workspace = true }
|
||||
url = { workspace = true }
|
||||
base64.workspace = true
|
51
examples/rhob_application_monitoring/src/main.rs
Normal file
51
examples/rhob_application_monitoring/src/main.rs
Normal file
@ -0,0 +1,51 @@
|
||||
use std::{path::PathBuf, sync::Arc};
|
||||
|
||||
use harmony::{
|
||||
inventory::Inventory,
|
||||
modules::{
|
||||
application::{
|
||||
ApplicationScore, RustWebFramework, RustWebapp,
|
||||
features::rhob_monitoring::RHOBMonitoring,
|
||||
},
|
||||
monitoring::alert_channel::{
|
||||
discord_alert_channel::DiscordWebhook, webhook_receiver::WebhookReceiver,
|
||||
},
|
||||
},
|
||||
topology::K8sAnywhereTopology,
|
||||
};
|
||||
use harmony_types::net::Url;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let application = Arc::new(RustWebapp {
|
||||
name: "test-rhob-monitoring".to_string(),
|
||||
domain: Url::Url(url::Url::parse("hhtps://some-fake-url").unwrap()),
|
||||
project_root: PathBuf::from("./webapp"), // Relative from 'harmony-path' param
|
||||
framework: Some(RustWebFramework::Leptos),
|
||||
});
|
||||
|
||||
let discord_receiver = DiscordWebhook {
|
||||
name: "test-discord".to_string(),
|
||||
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
|
||||
};
|
||||
|
||||
let app = ApplicationScore {
|
||||
features: vec![
|
||||
Box::new(RHOBMonitoring {
|
||||
|
||||
application: application.clone(),
|
||||
alert_receiver: vec![Box::new(discord_receiver)],
|
||||
}),
|
||||
// TODO add backups, multisite ha, etc
|
||||
],
|
||||
application,
|
||||
};
|
||||
|
||||
harmony_cli::run(
|
||||
Inventory::autoload(),
|
||||
K8sAnywhereTopology::from_env(),
|
||||
vec![Box::new(app)],
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
@ -32,6 +32,7 @@ pub enum InterpretName {
|
||||
K8sPrometheusCrdAlerting,
|
||||
DiscoverInventoryAgent,
|
||||
CephClusterHealth,
|
||||
RHOBAlerting,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for InterpretName {
|
||||
@ -60,6 +61,7 @@ impl std::fmt::Display for InterpretName {
|
||||
InterpretName::K8sPrometheusCrdAlerting => f.write_str("K8sPrometheusCrdAlerting"),
|
||||
InterpretName::DiscoverInventoryAgent => f.write_str("DiscoverInventoryAgent"),
|
||||
InterpretName::CephClusterHealth => f.write_str("CephClusterHealth"),
|
||||
InterpretName::RHOBAlerting => f.write_str("RHOBAlerting"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -14,10 +14,11 @@ use crate::{
|
||||
monitoring::kube_prometheus::crd::{
|
||||
crd_alertmanager_config::CRDPrometheus,
|
||||
prometheus_operator::prometheus_operator_helm_chart_score,
|
||||
rhob_alertmanager_config::RHOBObservability,
|
||||
},
|
||||
prometheus::{
|
||||
k8s_prometheus_alerting_score::K8sPrometheusCRDAlertingScore,
|
||||
prometheus::PrometheusApplicationMonitoring,
|
||||
prometheus::PrometheusApplicationMonitoring, rhob_alerting_score::RHOBAlertingScore,
|
||||
},
|
||||
},
|
||||
score::Score,
|
||||
@ -108,6 +109,43 @@ impl PrometheusApplicationMonitoring<CRDPrometheus> for K8sAnywhereTopology {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PrometheusApplicationMonitoring<RHOBObservability> for K8sAnywhereTopology {
|
||||
async fn install_prometheus(
|
||||
&self,
|
||||
sender: &RHOBObservability,
|
||||
inventory: &Inventory,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<RHOBObservability>>>>,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let po_result = self.ensure_cluster_observability_operator(sender).await?;
|
||||
|
||||
if po_result == PreparationOutcome::Noop {
|
||||
debug!("Skipping Prometheus CR installation due to missing operator.");
|
||||
return Ok(po_result);
|
||||
}
|
||||
|
||||
let result = self
|
||||
.get_cluster_observability_operator_prometheus_application_score(
|
||||
sender.clone(),
|
||||
receivers,
|
||||
)
|
||||
.await
|
||||
.interpret(inventory, self)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(outcome) => match outcome.status {
|
||||
InterpretStatus::SUCCESS => Ok(PreparationOutcome::Success {
|
||||
details: outcome.message,
|
||||
}),
|
||||
InterpretStatus::NOOP => Ok(PreparationOutcome::Noop),
|
||||
_ => Err(PreparationError::new(outcome.message)),
|
||||
},
|
||||
Err(err) => Err(PreparationError::new(err.to_string())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for K8sAnywhereTopology {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
@ -134,6 +172,19 @@ impl K8sAnywhereTopology {
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_cluster_observability_operator_prometheus_application_score(
|
||||
&self,
|
||||
sender: RHOBObservability,
|
||||
receivers: Option<Vec<Box<dyn AlertReceiver<RHOBObservability>>>>,
|
||||
) -> RHOBAlertingScore {
|
||||
RHOBAlertingScore {
|
||||
sender,
|
||||
receivers: receivers.unwrap_or_default(),
|
||||
service_monitors: vec![],
|
||||
prometheus_rules: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_k8s_prometheus_application_score(
|
||||
&self,
|
||||
sender: CRDPrometheus,
|
||||
@ -286,6 +337,60 @@ impl K8sAnywhereTopology {
|
||||
}
|
||||
}
|
||||
|
||||
async fn ensure_cluster_observability_operator(
|
||||
&self,
|
||||
sender: &RHOBObservability,
|
||||
) -> Result<PreparationOutcome, PreparationError> {
|
||||
let status = Command::new("sh")
|
||||
.args(["-c", "kubectl get crd -A | grep -i rhobs"])
|
||||
.status()
|
||||
.map_err(|e| PreparationError::new(format!("could not connect to cluster: {}", e)))?;
|
||||
|
||||
if !status.success() {
|
||||
if let Some(Some(k8s_state)) = self.k8s_state.get() {
|
||||
match k8s_state.source {
|
||||
K8sSource::LocalK3d => {
|
||||
debug!("installing cluster observability operator");
|
||||
todo!();
|
||||
let op_score =
|
||||
prometheus_operator_helm_chart_score(sender.namespace.clone());
|
||||
let result = op_score.interpret(&Inventory::empty(), self).await;
|
||||
|
||||
return match result {
|
||||
Ok(outcome) => match outcome.status {
|
||||
InterpretStatus::SUCCESS => Ok(PreparationOutcome::Success {
|
||||
details: "installed cluster observability operator".into(),
|
||||
}),
|
||||
InterpretStatus::NOOP => Ok(PreparationOutcome::Noop),
|
||||
_ => Err(PreparationError::new(
|
||||
"failed to install cluster observability operator (unknown error)".into(),
|
||||
)),
|
||||
},
|
||||
Err(err) => Err(PreparationError::new(err.to_string())),
|
||||
};
|
||||
}
|
||||
K8sSource::Kubeconfig => {
|
||||
debug!(
|
||||
"unable to install cluster observability operator, contact cluster admin"
|
||||
);
|
||||
return Ok(PreparationOutcome::Noop);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
warn!(
|
||||
"Unable to detect k8s_state. Skipping Cluster Observability Operator install."
|
||||
);
|
||||
return Ok(PreparationOutcome::Noop);
|
||||
}
|
||||
}
|
||||
|
||||
debug!("Cluster Observability Operator is already present, skipping install");
|
||||
|
||||
Ok(PreparationOutcome::Success {
|
||||
details: "cluster observability operator present in cluster".into(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn ensure_prometheus_operator(
|
||||
&self,
|
||||
sender: &CRDPrometheus,
|
||||
|
@ -1,4 +1,5 @@
|
||||
mod endpoint;
|
||||
pub mod rhob_monitoring;
|
||||
pub use endpoint::*;
|
||||
|
||||
mod monitoring;
|
||||
|
109
harmony/src/modules/application/features/rhob_monitoring.rs
Normal file
109
harmony/src/modules/application/features/rhob_monitoring.rs
Normal file
@ -0,0 +1,109 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::modules::application::{Application, ApplicationFeature};
|
||||
use crate::modules::monitoring::application_monitoring::application_monitoring_score::ApplicationMonitoringScore;
|
||||
use crate::modules::monitoring::application_monitoring::rhobs_application_monitoring_score::ApplicationRHOBMonitoringScore;
|
||||
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::RHOBObservability;
|
||||
use crate::topology::MultiTargetTopology;
|
||||
use crate::{
|
||||
inventory::Inventory,
|
||||
modules::monitoring::{
|
||||
alert_channel::webhook_receiver::WebhookReceiver, ntfy::ntfy::NtfyScore,
|
||||
},
|
||||
score::Score,
|
||||
topology::{HelmCommand, K8sclient, Topology, tenant::TenantManager},
|
||||
};
|
||||
use crate::{
|
||||
modules::prometheus::prometheus::PrometheusApplicationMonitoring,
|
||||
topology::oberservability::monitoring::AlertReceiver,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use base64::{Engine as _, engine::general_purpose};
|
||||
use harmony_types::net::Url;
|
||||
use log::{debug, info};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RHOBMonitoring {
|
||||
pub application: Arc<dyn Application>,
|
||||
pub alert_receiver: Vec<Box<dyn AlertReceiver<RHOBObservability>>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<
|
||||
T: Topology
|
||||
+ HelmCommand
|
||||
+ 'static
|
||||
+ TenantManager
|
||||
+ K8sclient
|
||||
+ MultiTargetTopology
|
||||
+ std::fmt::Debug
|
||||
+ PrometheusApplicationMonitoring<RHOBObservability>,
|
||||
> ApplicationFeature<T> for RHOBMonitoring
|
||||
{
|
||||
async fn ensure_installed(&self, topology: &T) -> Result<(), String> {
|
||||
info!("Ensuring monitoring is available for application");
|
||||
let namespace = topology
|
||||
.get_tenant_config()
|
||||
.await
|
||||
.map(|ns| ns.name.clone())
|
||||
.unwrap_or_else(|| self.application.name());
|
||||
|
||||
let mut alerting_score = ApplicationRHOBMonitoringScore {
|
||||
sender: RHOBObservability {
|
||||
namespace: namespace.clone(),
|
||||
client: topology.k8s_client().await.unwrap(),
|
||||
},
|
||||
application: self.application.clone(),
|
||||
receivers: self.alert_receiver.clone(),
|
||||
};
|
||||
let ntfy = NtfyScore {
|
||||
namespace: namespace.clone(),
|
||||
host: "ntfy.harmonydemo.apps.ncd0.harmony.mcd".to_string(),
|
||||
};
|
||||
ntfy.interpret(&Inventory::empty(), topology)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
let ntfy_default_auth_username = "harmony";
|
||||
let ntfy_default_auth_password = "harmony";
|
||||
let ntfy_default_auth_header = format!(
|
||||
"Basic {}",
|
||||
general_purpose::STANDARD.encode(format!(
|
||||
"{ntfy_default_auth_username}:{ntfy_default_auth_password}"
|
||||
))
|
||||
);
|
||||
|
||||
debug!("ntfy_default_auth_header: {ntfy_default_auth_header}");
|
||||
|
||||
let ntfy_default_auth_param = general_purpose::STANDARD
|
||||
.encode(ntfy_default_auth_header)
|
||||
.replace("=", "");
|
||||
|
||||
debug!("ntfy_default_auth_param: {ntfy_default_auth_param}");
|
||||
|
||||
let ntfy_receiver = WebhookReceiver {
|
||||
name: "ntfy-webhook".to_string(),
|
||||
url: Url::Url(
|
||||
url::Url::parse(
|
||||
format!(
|
||||
"http://ntfy.{}.svc.cluster.local/rust-web-app?auth={ntfy_default_auth_param}",
|
||||
namespace.clone()
|
||||
)
|
||||
.as_str(),
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
};
|
||||
|
||||
alerting_score.receivers.push(Box::new(ntfy_receiver));
|
||||
alerting_score
|
||||
.interpret(&Inventory::empty(), topology)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(())
|
||||
}
|
||||
fn name(&self) -> String {
|
||||
"Monitoring".to_string()
|
||||
}
|
||||
}
|
@ -4,6 +4,7 @@ use std::collections::BTreeMap;
|
||||
use async_trait::async_trait;
|
||||
use k8s_openapi::api::core::v1::Secret;
|
||||
use kube::api::ObjectMeta;
|
||||
use log::debug;
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
use serde_yaml::{Mapping, Value};
|
||||
@ -11,6 +12,7 @@ use serde_yaml::{Mapping, Value};
|
||||
use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::{
|
||||
AlertmanagerConfig, AlertmanagerConfigSpec, CRDPrometheus,
|
||||
};
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::RHOBObservability;
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
modules::monitoring::{
|
||||
@ -30,6 +32,67 @@ pub struct DiscordWebhook {
|
||||
pub url: Url,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<RHOBObservability> for DiscordWebhook {
|
||||
async fn install(&self, sender: &RHOBObservability) -> Result<Outcome, InterpretError> {
|
||||
let spec = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfigSpec {
|
||||
data: json!({
|
||||
"route": {
|
||||
"receiver": self.name,
|
||||
},
|
||||
"receivers": [
|
||||
{
|
||||
"name": self.name,
|
||||
"webhookConfigs": [
|
||||
{
|
||||
"url": self.url,
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}),
|
||||
};
|
||||
|
||||
let alertmanager_configs = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfig {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.name.clone()),
|
||||
labels: Some(std::collections::BTreeMap::from([(
|
||||
"alertmanagerConfig".to_string(),
|
||||
"enabled".to_string(),
|
||||
)])),
|
||||
namespace: Some(sender.namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec,
|
||||
};
|
||||
debug!(
|
||||
"alert manager configs: \n{:#?}",
|
||||
alertmanager_configs.clone()
|
||||
);
|
||||
|
||||
sender
|
||||
.client
|
||||
.apply(&alertmanager_configs, Some(&sender.namespace))
|
||||
.await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"installed rhob-alertmanagerconfigs for {}",
|
||||
self.name
|
||||
)))
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"webhook-receiver".to_string()
|
||||
}
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<RHOBObservability>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<CRDPrometheus> for DiscordWebhook {
|
||||
async fn install(&self, sender: &CRDPrometheus) -> Result<Outcome, InterpretError> {
|
||||
|
@ -11,8 +11,8 @@ use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
modules::monitoring::{
|
||||
kube_prometheus::{
|
||||
crd::crd_alertmanager_config::{
|
||||
AlertmanagerConfig, AlertmanagerConfigSpec, CRDPrometheus,
|
||||
crd::{
|
||||
crd_alertmanager_config::CRDPrometheus, rhob_alertmanager_config::RHOBObservability,
|
||||
},
|
||||
prometheus::{KubePrometheus, KubePrometheusReceiver},
|
||||
types::{AlertChannelConfig, AlertManagerChannelConfig},
|
||||
@ -30,9 +30,9 @@ pub struct WebhookReceiver {
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<CRDPrometheus> for WebhookReceiver {
|
||||
async fn install(&self, sender: &CRDPrometheus) -> Result<Outcome, InterpretError> {
|
||||
let spec = AlertmanagerConfigSpec {
|
||||
impl AlertReceiver<RHOBObservability> for WebhookReceiver {
|
||||
async fn install(&self, sender: &RHOBObservability) -> Result<Outcome, InterpretError> {
|
||||
let spec = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfigSpec {
|
||||
data: json!({
|
||||
"route": {
|
||||
"receiver": self.name,
|
||||
@ -50,7 +50,68 @@ impl AlertReceiver<CRDPrometheus> for WebhookReceiver {
|
||||
}),
|
||||
};
|
||||
|
||||
let alertmanager_configs = AlertmanagerConfig {
|
||||
let alertmanager_configs = crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::AlertmanagerConfig {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.name.clone()),
|
||||
labels: Some(std::collections::BTreeMap::from([(
|
||||
"alertmanagerConfig".to_string(),
|
||||
"enabled".to_string(),
|
||||
)])),
|
||||
namespace: Some(sender.namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec,
|
||||
};
|
||||
debug!(
|
||||
"alert manager configs: \n{:#?}",
|
||||
alertmanager_configs.clone()
|
||||
);
|
||||
|
||||
sender
|
||||
.client
|
||||
.apply(&alertmanager_configs, Some(&sender.namespace))
|
||||
.await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"installed rhob-alertmanagerconfigs for {}",
|
||||
self.name
|
||||
)))
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"webhook-receiver".to_string()
|
||||
}
|
||||
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<RHOBObservability>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<CRDPrometheus> for WebhookReceiver {
|
||||
async fn install(&self, sender: &CRDPrometheus) -> Result<Outcome, InterpretError> {
|
||||
let spec = crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::AlertmanagerConfigSpec {
|
||||
data: json!({
|
||||
"route": {
|
||||
"receiver": self.name,
|
||||
},
|
||||
"receivers": [
|
||||
{
|
||||
"name": self.name,
|
||||
"webhookConfigs": [
|
||||
{
|
||||
"url": self.url,
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}),
|
||||
};
|
||||
|
||||
let alertmanager_configs = crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::AlertmanagerConfig {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.name.clone()),
|
||||
labels: Some(std::collections::BTreeMap::from([(
|
||||
@ -115,6 +176,7 @@ impl PrometheusReceiver for WebhookReceiver {
|
||||
self.get_config().await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<KubePrometheus> for WebhookReceiver {
|
||||
async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> {
|
||||
|
@ -1 +1,2 @@
|
||||
pub mod application_monitoring_score;
|
||||
pub mod rhobs_application_monitoring_score;
|
||||
|
@ -0,0 +1,94 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::{
|
||||
application::Application,
|
||||
monitoring::kube_prometheus::crd::{
|
||||
crd_alertmanager_config::CRDPrometheus, rhob_alertmanager_config::RHOBObservability,
|
||||
},
|
||||
prometheus::prometheus::PrometheusApplicationMonitoring,
|
||||
},
|
||||
score::Score,
|
||||
topology::{PreparationOutcome, Topology, oberservability::monitoring::AlertReceiver},
|
||||
};
|
||||
use harmony_types::id::Id;
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ApplicationRHOBMonitoringScore {
|
||||
pub sender: RHOBObservability,
|
||||
pub application: Arc<dyn Application>,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<RHOBObservability>>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + PrometheusApplicationMonitoring<RHOBObservability>> Score<T>
|
||||
for ApplicationRHOBMonitoringScore
|
||||
{
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(ApplicationRHOBMonitoringInterpret {
|
||||
score: self.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
format!(
|
||||
"{} monitoring [ApplicationRHOBMonitoringScore]",
|
||||
self.application.name()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ApplicationRHOBMonitoringInterpret {
|
||||
score: ApplicationRHOBMonitoringScore,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + PrometheusApplicationMonitoring<RHOBObservability>> Interpret<T>
|
||||
for ApplicationRHOBMonitoringInterpret
|
||||
{
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let result = topology
|
||||
.install_prometheus(
|
||||
&self.score.sender,
|
||||
inventory,
|
||||
Some(self.score.receivers.clone()),
|
||||
)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(outcome) => match outcome {
|
||||
PreparationOutcome::Success { details: _ } => {
|
||||
Ok(Outcome::success("Prometheus installed".into()))
|
||||
}
|
||||
PreparationOutcome::Noop => Ok(Outcome::noop()),
|
||||
},
|
||||
Err(err) => Err(InterpretError::from(err)),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::ApplicationMonitoring
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
@ -7,5 +7,14 @@ pub mod crd_prometheuses;
|
||||
pub mod grafana_default_dashboard;
|
||||
pub mod grafana_operator;
|
||||
pub mod prometheus_operator;
|
||||
pub mod rhob_alertmanager_config;
|
||||
pub mod rhob_alertmanagers;
|
||||
pub mod rhob_cluster_observability_operator;
|
||||
pub mod rhob_default_rules;
|
||||
pub mod rhob_grafana;
|
||||
pub mod rhob_monitoring_stack;
|
||||
pub mod rhob_prometheus_rules;
|
||||
pub mod rhob_prometheuses;
|
||||
pub mod rhob_role;
|
||||
pub mod role;
|
||||
pub mod service_monitor;
|
||||
|
@ -0,0 +1,50 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::topology::{
|
||||
k8s::K8sClient,
|
||||
oberservability::monitoring::{AlertReceiver, AlertSender},
|
||||
};
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "monitoring.rhobs",
|
||||
version = "v1alpha",
|
||||
kind = "AlertmanagerConfig",
|
||||
plural = "alertmanagerconfigs",
|
||||
namespaced
|
||||
)]
|
||||
pub struct AlertmanagerConfigSpec {
|
||||
#[serde(flatten)]
|
||||
pub data: serde_json::Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct RHOBObservability {
|
||||
pub namespace: String,
|
||||
pub client: Arc<K8sClient>,
|
||||
}
|
||||
|
||||
impl AlertSender for RHOBObservability {
|
||||
fn name(&self) -> String {
|
||||
"RHOBAlertManager".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Box<dyn AlertReceiver<RHOBObservability>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertReceiver<RHOBObservability>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
@ -0,0 +1,52 @@
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::crd_prometheuses::LabelSelector;
|
||||
|
||||
/// Rust CRD for `Alertmanager` from Prometheus Operator
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "monitoring.rhobs",
|
||||
version = "v1",
|
||||
kind = "Alertmanager",
|
||||
plural = "alertmanagers",
|
||||
namespaced
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct AlertmanagerSpec {
|
||||
/// Number of replicas for HA
|
||||
pub replicas: i32,
|
||||
|
||||
/// Selectors for AlertmanagerConfig CRDs
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub alertmanager_config_selector: Option<LabelSelector>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub alertmanager_config_namespace_selector: Option<LabelSelector>,
|
||||
|
||||
/// Optional pod template metadata (annotations, labels)
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub pod_metadata: Option<LabelSelector>,
|
||||
|
||||
/// Optional topology spread settings
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub version: Option<String>,
|
||||
}
|
||||
|
||||
impl Default for AlertmanagerSpec {
|
||||
fn default() -> Self {
|
||||
AlertmanagerSpec {
|
||||
replicas: 1,
|
||||
|
||||
// Match all AlertmanagerConfigs in the same namespace
|
||||
alertmanager_config_namespace_selector: None,
|
||||
|
||||
// Empty selector matches all AlertmanagerConfigs in that namespace
|
||||
alertmanager_config_selector: Some(LabelSelector::default()),
|
||||
|
||||
pod_metadata: None,
|
||||
version: None,
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,22 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use non_blank_string_rs::NonBlankString;
|
||||
|
||||
use crate::modules::helm::chart::HelmChartScore;
|
||||
//TODO package chart or something for COO okd
|
||||
pub fn rhob_cluster_observability_operator() -> HelmChartScore {
|
||||
HelmChartScore {
|
||||
namespace: None,
|
||||
release_name: NonBlankString::from_str("").unwrap(),
|
||||
letian marked this conversation as resolved
letian
commented
shouldn't it have a release name? shouldn't it have a release name?
|
||||
chart_name: NonBlankString::from_str(
|
||||
"oci://hub.nationtech.io/harmony/nt-prometheus-operator",
|
||||
)
|
||||
.unwrap(),
|
||||
chart_version: None,
|
||||
values_overrides: None,
|
||||
values_yaml: None,
|
||||
create_namespace: true,
|
||||
install_only: true,
|
||||
repository: None,
|
||||
}
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
use crate::modules::{
|
||||
monitoring::kube_prometheus::crd::rhob_prometheus_rules::Rule,
|
||||
prometheus::alerts::k8s::{
|
||||
deployment::alert_deployment_unavailable,
|
||||
pod::{alert_container_restarting, alert_pod_not_ready, pod_failed},
|
||||
pvc::high_pvc_fill_rate_over_two_days,
|
||||
service::alert_service_down,
|
||||
},
|
||||
};
|
||||
|
||||
pub fn build_default_application_rules() -> Vec<Rule> {
|
||||
let pod_failed: Rule = pod_failed().into();
|
||||
let container_restarting: Rule = alert_container_restarting().into();
|
||||
let pod_not_ready: Rule = alert_pod_not_ready().into();
|
||||
let service_down: Rule = alert_service_down().into();
|
||||
let deployment_unavailable: Rule = alert_deployment_unavailable().into();
|
||||
let high_pvc_fill_rate: Rule = high_pvc_fill_rate_over_two_days().into();
|
||||
vec![
|
||||
pod_failed,
|
||||
container_restarting,
|
||||
pod_not_ready,
|
||||
service_down,
|
||||
deployment_unavailable,
|
||||
high_pvc_fill_rate,
|
||||
]
|
||||
}
|
@ -0,0 +1,153 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_prometheuses::LabelSelector;
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "grafana.integreatly.org",
|
||||
version = "v1beta1",
|
||||
kind = "Grafana",
|
||||
plural = "grafanas",
|
||||
namespaced
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GrafanaSpec {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub config: Option<GrafanaConfig>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub admin_user: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub admin_password: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub ingress: Option<GrafanaIngress>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub persistence: Option<GrafanaPersistence>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub resources: Option<ResourceRequirements>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GrafanaConfig {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub log: Option<GrafanaLogConfig>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub security: Option<GrafanaSecurityConfig>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GrafanaLogConfig {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub mode: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub level: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GrafanaSecurityConfig {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub admin_user: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub admin_password: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GrafanaIngress {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub enabled: Option<bool>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub hosts: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GrafanaPersistence {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub enabled: Option<bool>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub storage_class_name: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub size: Option<String>,
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "grafana.integreatly.org",
|
||||
version = "v1beta1",
|
||||
kind = "GrafanaDashboard",
|
||||
plural = "grafanadashboards",
|
||||
namespaced
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GrafanaDashboardSpec {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub resync_period: Option<String>,
|
||||
|
||||
pub instance_selector: LabelSelector,
|
||||
|
||||
pub json: String,
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "grafana.integreatly.org",
|
||||
version = "v1beta1",
|
||||
kind = "GrafanaDatasource",
|
||||
plural = "grafanadatasources",
|
||||
namespaced
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GrafanaDatasourceSpec {
|
||||
pub instance_selector: LabelSelector,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub allow_cross_namespace_import: Option<bool>,
|
||||
|
||||
pub datasource: GrafanaDatasourceConfig,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct GrafanaDatasourceConfig {
|
||||
pub access: String,
|
||||
pub database: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub json_data: Option<BTreeMap<String, String>>,
|
||||
pub name: String,
|
||||
pub r#type: String,
|
||||
pub url: String,
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ResourceRequirements {
|
||||
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
|
||||
pub limits: BTreeMap<String, String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
|
||||
pub requests: BTreeMap<String, String>,
|
||||
}
|
@ -0,0 +1,41 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_prometheuses::LabelSelector;
|
||||
|
||||
/// MonitoringStack CRD for monitoring.rhobs/v1alpha1
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "monitoring.rhobs",
|
||||
version = "v1alpha1",
|
||||
kind = "MonitoringStack",
|
||||
plural = "monitoringstacks",
|
||||
namespaced
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct MonitoringStackSpec {
|
||||
/// Verbosity of logs (e.g. "debug", "info", "warn", "error").
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub log_level: Option<String>,
|
||||
|
||||
/// Retention period for Prometheus TSDB data (e.g. "1d").
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub retention: Option<String>,
|
||||
|
||||
/// Resource selector for workloads monitored by this stack.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub resource_selector: Option<LabelSelector>,
|
||||
}
|
||||
|
||||
impl Default for MonitoringStackSpec {
|
||||
fn default() -> Self {
|
||||
MonitoringStackSpec {
|
||||
log_level: Some("info".into()),
|
||||
retention: Some("7d".into()),
|
||||
resource_selector: Some(LabelSelector::default()),
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,57 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
|
||||
|
||||
#[derive(CustomResource, Debug, Serialize, Deserialize, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "monitoring.rhobs.com",
|
||||
version = "v1",
|
||||
kind = "PrometheusRule",
|
||||
plural = "prometheusrules",
|
||||
namespaced
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PrometheusRuleSpec {
|
||||
pub groups: Vec<RuleGroup>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
|
||||
pub struct RuleGroup {
|
||||
pub name: String,
|
||||
pub rules: Vec<Rule>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Rule {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub alert: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub expr: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub for_: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub labels: Option<std::collections::BTreeMap<String, String>>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub annotations: Option<std::collections::BTreeMap<String, String>>,
|
||||
}
|
||||
|
||||
impl From<PrometheusAlertRule> for Rule {
|
||||
fn from(value: PrometheusAlertRule) -> Self {
|
||||
Rule {
|
||||
alert: Some(value.alert),
|
||||
expr: Some(value.expr),
|
||||
for_: value.r#for,
|
||||
labels: Some(value.labels.into_iter().collect::<BTreeMap<_, _>>()),
|
||||
annotations: Some(value.annotations.into_iter().collect::<BTreeMap<_, _>>()),
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,118 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use kube::CustomResource;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::modules::monitoring::kube_prometheus::types::Operator;
|
||||
|
||||
#[derive(CustomResource, Serialize, Deserialize, Debug, Clone, JsonSchema)]
|
||||
#[kube(
|
||||
group = "monitoring.rhobs",
|
||||
version = "v1",
|
||||
kind = "Prometheus",
|
||||
plural = "prometheuses",
|
||||
namespaced
|
||||
)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct PrometheusSpec {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub alerting: Option<PrometheusSpecAlerting>,
|
||||
|
||||
pub service_account_name: String,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub service_monitor_namespace_selector: Option<LabelSelector>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub service_monitor_selector: Option<LabelSelector>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub service_discovery_role: Option<String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub pod_monitor_selector: Option<LabelSelector>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub rule_selector: Option<LabelSelector>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub rule_namespace_selector: Option<LabelSelector>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct NamespaceSelector {
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub match_names: Vec<String>,
|
||||
}
|
||||
|
||||
/// Contains alerting configuration, specifically Alertmanager endpoints.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||
pub struct PrometheusSpecAlerting {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub alertmanagers: Option<Vec<AlertmanagerEndpoints>>,
|
||||
}
|
||||
|
||||
/// Represents an Alertmanager endpoint configuration used by Prometheus.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||
pub struct AlertmanagerEndpoints {
|
||||
/// Name of the Alertmanager Service.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub name: Option<String>,
|
||||
|
||||
/// Namespace of the Alertmanager Service.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub namespace: Option<String>,
|
||||
|
||||
/// Port to access on the Alertmanager Service (e.g. "web").
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub port: Option<String>,
|
||||
|
||||
/// Scheme to use for connecting (e.g. "http").
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub scheme: Option<String>,
|
||||
// Other fields like `tls_config`, `path_prefix`, etc., can be added if needed.
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct LabelSelector {
|
||||
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
|
||||
pub match_labels: BTreeMap<String, String>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub match_expressions: Vec<LabelSelectorRequirement>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct LabelSelectorRequirement {
|
||||
pub key: String,
|
||||
pub operator: Operator,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub values: Vec<String>,
|
||||
}
|
||||
|
||||
impl Default for PrometheusSpec {
|
||||
fn default() -> Self {
|
||||
PrometheusSpec {
|
||||
alerting: None,
|
||||
|
||||
service_account_name: "prometheus".into(),
|
||||
|
||||
// null means "only my namespace"
|
||||
service_monitor_namespace_selector: None,
|
||||
|
||||
// empty selector means match all ServiceMonitors in that namespace
|
||||
service_monitor_selector: Some(LabelSelector::default()),
|
||||
|
||||
service_discovery_role: Some("Endpoints".into()),
|
||||
|
||||
pod_monitor_selector: None,
|
||||
|
||||
rule_selector: None,
|
||||
|
||||
rule_namespace_selector: Some(LabelSelector::default()),
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,62 @@
|
||||
use k8s_openapi::api::{
|
||||
core::v1::ServiceAccount,
|
||||
rbac::v1::{PolicyRule, Role, RoleBinding, RoleRef, Subject},
|
||||
};
|
||||
use kube::api::ObjectMeta;
|
||||
|
||||
pub fn build_prom_role(role_name: String, namespace: String) -> Role {
|
||||
Role {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(role_name),
|
||||
namespace: Some(namespace),
|
||||
..Default::default()
|
||||
},
|
||||
rules: Some(vec![PolicyRule {
|
||||
api_groups: Some(vec!["".into()]), // core API group
|
||||
resources: Some(vec!["services".into(), "endpoints".into(), "pods".into()]),
|
||||
verbs: vec!["get".into(), "list".into(), "watch".into()],
|
||||
..Default::default()
|
||||
}]),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_prom_rolebinding(
|
||||
role_name: String,
|
||||
namespace: String,
|
||||
service_account_name: String,
|
||||
) -> RoleBinding {
|
||||
RoleBinding {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(format!("{}-rolebinding", role_name)),
|
||||
namespace: Some(namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
role_ref: RoleRef {
|
||||
api_group: "rbac.authorization.k8s.io".into(),
|
||||
kind: "Role".into(),
|
||||
name: role_name,
|
||||
},
|
||||
subjects: Some(vec![Subject {
|
||||
kind: "ServiceAccount".into(),
|
||||
name: service_account_name,
|
||||
namespace: Some(namespace.clone()),
|
||||
..Default::default()
|
||||
}]),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_prom_service_account(
|
||||
service_account_name: String,
|
||||
namespace: String,
|
||||
) -> ServiceAccount {
|
||||
ServiceAccount {
|
||||
automount_service_account_token: None,
|
||||
image_pull_secrets: None,
|
||||
metadata: ObjectMeta {
|
||||
name: Some(service_account_name),
|
||||
namespace: Some(namespace),
|
||||
..Default::default()
|
||||
},
|
||||
secrets: None,
|
||||
}
|
||||
}
|
@ -2,3 +2,4 @@ pub mod alerts;
|
||||
pub mod k8s_prometheus_alerting_score;
|
||||
#[allow(clippy::module_inception)]
|
||||
pub mod prometheus;
|
||||
pub mod rhob_alerting_score;
|
||||
|
544
harmony/src/modules/prometheus/rhob_alerting_score.rs
Normal file
544
harmony/src/modules/prometheus/rhob_alerting_score.rs
Normal file
@ -0,0 +1,544 @@
|
||||
use std::fs;
|
||||
use std::{collections::BTreeMap, sync::Arc};
|
||||
use tempfile::tempdir;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use kube::api::ObjectMeta;
|
||||
use log::{debug, info};
|
||||
use serde::Serialize;
|
||||
use std::process::Command;
|
||||
|
||||
use crate::modules::monitoring::kube_prometheus::crd::grafana_default_dashboard::build_default_dashboard;
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::RHOBObservability;
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanagers::{
|
||||
Alertmanager, AlertmanagerSpec,
|
||||
};
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_grafana::{
|
||||
Grafana, GrafanaDashboard, GrafanaDashboardSpec, GrafanaDatasource, GrafanaDatasourceConfig,
|
||||
GrafanaDatasourceSpec, GrafanaSpec,
|
||||
};
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_monitoring_stack::{
|
||||
MonitoringStack, MonitoringStackSpec,
|
||||
};
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_prometheus_rules::{
|
||||
PrometheusRule, PrometheusRuleSpec, RuleGroup,
|
||||
};
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_prometheuses::{
|
||||
AlertmanagerEndpoints, LabelSelector, Prometheus, PrometheusSpec, PrometheusSpecAlerting,
|
||||
};
|
||||
use crate::modules::monitoring::kube_prometheus::crd::role::{
|
||||
build_prom_role, build_prom_rolebinding, build_prom_service_account,
|
||||
};
|
||||
use crate::modules::monitoring::kube_prometheus::crd::service_monitor::{
|
||||
ServiceMonitor, ServiceMonitorSpec,
|
||||
};
|
||||
use crate::score::Score;
|
||||
use crate::topology::oberservability::monitoring::AlertReceiver;
|
||||
use crate::topology::{K8sclient, Topology, k8s::K8sClient};
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
};
|
||||
use harmony_types::id::Id;
|
||||
|
||||
use super::prometheus::PrometheusApplicationMonitoring;
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct RHOBAlertingScore {
|
||||
pub sender: RHOBObservability,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<RHOBObservability>>>,
|
||||
pub service_monitors: Vec<ServiceMonitor>,
|
||||
pub prometheus_rules: Vec<RuleGroup>,
|
||||
}
|
||||
|
||||
impl<T: Topology + K8sclient + PrometheusApplicationMonitoring<RHOBObservability>> Score<T>
|
||||
for RHOBAlertingScore
|
||||
{
|
||||
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
|
||||
Box::new(RHOBAlertingInterpret {
|
||||
sender: self.sender.clone(),
|
||||
receivers: self.receivers.clone(),
|
||||
service_monitors: self.service_monitors.clone(),
|
||||
prometheus_rules: self.prometheus_rules.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"RHOB alerting [RHOBAlertingScore]".into()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RHOBAlertingInterpret {
|
||||
pub sender: RHOBObservability,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<RHOBObservability>>>,
|
||||
pub service_monitors: Vec<ServiceMonitor>,
|
||||
pub prometheus_rules: Vec<RuleGroup>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient + PrometheusApplicationMonitoring<RHOBObservability>> Interpret<T>
|
||||
for RHOBAlertingInterpret
|
||||
{
|
||||
async fn execute(
|
||||
&self,
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let client = topology.k8s_client().await.unwrap();
|
||||
self.ensure_grafana_operator().await?;
|
||||
self.install_prometheus(&client).await?;
|
||||
self.install_client_kube_metrics().await?;
|
||||
// self.install_grafana(&client).await?;
|
||||
// self.install_receivers(&self.sender, &self.receivers)
|
||||
// .await?;
|
||||
// self.install_rules(&self.prometheus_rules, &client).await?;
|
||||
// self.install_monitors(self.service_monitors.clone(), &client)
|
||||
// .await?;
|
||||
Ok(Outcome::success(
|
||||
"K8s monitoring components installed".to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::RHOBAlerting
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl RHOBAlertingInterpret {
|
||||
async fn crd_exists(&self, crd: &str) -> bool {
|
||||
let status = Command::new("sh")
|
||||
.args(["-c", &format!("kubectl get crd -A | grep -i {crd}")])
|
||||
.status()
|
||||
.map_err(|e| InterpretError::new(format!("could not connect to cluster: {}", e)))
|
||||
.unwrap();
|
||||
|
||||
status.success()
|
||||
}
|
||||
|
||||
async fn install_chart(
|
||||
&self,
|
||||
chart_path: String,
|
||||
chart_name: String,
|
||||
) -> Result<(), InterpretError> {
|
||||
let temp_dir =
|
||||
tempdir().map_err(|e| InterpretError::new(format!("Tempdir error: {}", e)))?;
|
||||
let temp_path = temp_dir.path().to_path_buf();
|
||||
debug!("Using temp directory: {}", temp_path.display());
|
||||
let chart = format!("{}/{}", chart_path, chart_name);
|
||||
let pull_output = Command::new("helm")
|
||||
.args(["pull", &chart, "--destination", temp_path.to_str().unwrap()])
|
||||
.output()
|
||||
.map_err(|e| InterpretError::new(format!("Helm pull error: {}", e)))?;
|
||||
|
||||
if !pull_output.status.success() {
|
||||
return Err(InterpretError::new(format!(
|
||||
"Helm pull failed: {}",
|
||||
String::from_utf8_lossy(&pull_output.stderr)
|
||||
)));
|
||||
}
|
||||
|
||||
let tgz_path = fs::read_dir(&temp_path)
|
||||
.unwrap()
|
||||
.filter_map(|entry| {
|
||||
let entry = entry.ok()?;
|
||||
let path = entry.path();
|
||||
if path.extension()? == "tgz" {
|
||||
Some(path)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.next()
|
||||
.ok_or_else(|| InterpretError::new("Could not find pulled Helm chart".into()))?;
|
||||
|
||||
debug!("Installing chart from: {}", tgz_path.display());
|
||||
|
||||
let install_output = Command::new("helm")
|
||||
.args([
|
||||
"upgrade",
|
||||
"--install",
|
||||
&chart_name,
|
||||
tgz_path.to_str().unwrap(),
|
||||
"--namespace",
|
||||
&self.sender.namespace.clone(),
|
||||
"--create-namespace",
|
||||
"--wait",
|
||||
"--atomic",
|
||||
])
|
||||
.output()
|
||||
.map_err(|e| InterpretError::new(format!("Helm install error: {}", e)))?;
|
||||
|
||||
if !install_output.status.success() {
|
||||
return Err(InterpretError::new(format!(
|
||||
"Helm install failed: {}",
|
||||
String::from_utf8_lossy(&install_output.stderr)
|
||||
)));
|
||||
}
|
||||
|
||||
debug!(
|
||||
"Installed chart {}/{} in namespace: {}",
|
||||
&chart_path,
|
||||
&chart_name,
|
||||
self.sender.namespace.clone()
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn ensure_grafana_operator(&self) -> Result<Outcome, InterpretError> {
|
||||
letian marked this conversation as resolved
letian
commented
considering we run the update below, I don't think we need to keep this considering we run the update below, I don't think we need to keep this
|
||||
// if self.crd_exists("grafanas.grafana.integreatly.org").await {
|
||||
// debug!("grafana CRDs already exist — skipping install.");
|
||||
// return Ok(Outcome::success("Grafana CRDs already exist".to_string()));
|
||||
// }
|
||||
|
||||
let _ = Command::new("helm")
|
||||
.args([
|
||||
"repo",
|
||||
"add",
|
||||
"grafana-operator",
|
||||
"https://grafana.github.io/helm-charts",
|
||||
])
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
let _ = Command::new("helm")
|
||||
.args(["repo", "update"])
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
let output = Command::new("helm")
|
||||
.args([
|
||||
"install",
|
||||
"grafana-operator",
|
||||
"grafana-operator/grafana-operator",
|
||||
"--namespace",
|
||||
&self.sender.namespace.clone(),
|
||||
"--create-namespace",
|
||||
"--set",
|
||||
"namespaceScope=true",
|
||||
])
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
if !output.status.success() {
|
||||
return Err(InterpretError::new(format!(
|
||||
"helm install failed:\nstdout: {}\nstderr: {}",
|
||||
String::from_utf8_lossy(&output.stdout),
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(Outcome::success(format!(
|
||||
"installed grafana operator in ns {}",
|
||||
self.sender.namespace.clone()
|
||||
)))
|
||||
}
|
||||
|
||||
async fn install_prometheus(&self, client: &Arc<K8sClient>) -> Result<Outcome, InterpretError> {
|
||||
debug!(
|
||||
"installing crd-prometheuses in namespace {}",
|
||||
self.sender.namespace.clone()
|
||||
);
|
||||
|
||||
let stack = MonitoringStack {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(format!("{}-monitoring", self.sender.namespace.clone()).into()),
|
||||
namespace: Some(self.sender.namespace.clone()),
|
||||
labels: Some([("coo".into(), "example".into())].into()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: MonitoringStackSpec {
|
||||
log_level: Some("debug".into()),
|
||||
retention: Some("1d".into()),
|
||||
resource_selector: Some(LabelSelector {
|
||||
match_labels: [("app".into(), "demo".into())].into(),
|
||||
..Default::default()
|
||||
}),
|
||||
},
|
||||
};
|
||||
|
||||
client
|
||||
.apply(&stack, Some(&self.sender.namespace.clone()))
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
info!("installed rhob monitoring stack",);
|
||||
// let prom = Prometheus {
|
||||
// metadata: ObjectMeta {
|
||||
// name: Some(self.sender.namespace.clone()),
|
||||
// labels: Some(std::collections::BTreeMap::from([
|
||||
// ("alertmanagerConfig".to_string(), "enabled".to_string()),
|
||||
// ("client".to_string(), "prometheus".to_string()),
|
||||
// ])),
|
||||
// namespace: Some(self.sender.namespace.clone()),
|
||||
// ..Default::default()
|
||||
// },
|
||||
// spec: prom_spec,
|
||||
// };
|
||||
// client
|
||||
// .apply(&role, Some(&self.sender.namespace.clone()))
|
||||
// .await
|
||||
// .map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
// info!(
|
||||
// "installed prometheus role: {:#?} in ns {:#?}",
|
||||
// role.metadata.name.unwrap(),
|
||||
// role.metadata.namespace.unwrap()
|
||||
// );
|
||||
// client
|
||||
// .apply(&rolebinding, Some(&self.sender.namespace.clone()))
|
||||
// .await
|
||||
// .map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
// info!(
|
||||
// "installed prometheus rolebinding: {:#?} in ns {:#?}",
|
||||
// rolebinding.metadata.name.unwrap(),
|
||||
// rolebinding.metadata.namespace.unwrap()
|
||||
// );
|
||||
// client
|
||||
// .apply(&sa, Some(&self.sender.namespace.clone()))
|
||||
// .await
|
||||
// .map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
// info!(
|
||||
// "installed prometheus service account: {:#?} in ns {:#?}",
|
||||
// sa.metadata.name.unwrap(),
|
||||
// sa.metadata.namespace.unwrap()
|
||||
// );
|
||||
// client
|
||||
// .apply(&prom, Some(&self.sender.namespace.clone()))
|
||||
// .await
|
||||
// .map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
// info!(
|
||||
// "installed prometheus: {:#?} in ns {:#?}",
|
||||
// &prom.metadata.name.clone().unwrap(),
|
||||
// &prom.metadata.namespace.clone().unwrap()
|
||||
// );
|
||||
//
|
||||
Ok(Outcome::success(format!(
|
||||
"successfully deployed rhob-prometheus {:#?}",
|
||||
stack
|
||||
)))
|
||||
}
|
||||
|
||||
async fn install_alert_manager(
|
||||
&self,
|
||||
client: &Arc<K8sClient>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let am = Alertmanager {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.sender.namespace.clone()),
|
||||
labels: Some(std::collections::BTreeMap::from([(
|
||||
"alertmanagerConfig".to_string(),
|
||||
"enabled".to_string(),
|
||||
)])),
|
||||
namespace: Some(self.sender.namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: AlertmanagerSpec::default(),
|
||||
};
|
||||
client
|
||||
.apply(&am, Some(&self.sender.namespace.clone()))
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
Ok(Outcome::success(format!(
|
||||
"successfully deployed service monitor {:#?}",
|
||||
am.metadata.name
|
||||
)))
|
||||
}
|
||||
async fn install_monitors(
|
||||
&self,
|
||||
mut monitors: Vec<ServiceMonitor>,
|
||||
client: &Arc<K8sClient>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let default_service_monitor = ServiceMonitor {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.sender.namespace.clone()),
|
||||
labels: Some(std::collections::BTreeMap::from([
|
||||
("alertmanagerConfig".to_string(), "enabled".to_string()),
|
||||
("client".to_string(), "prometheus".to_string()),
|
||||
(
|
||||
"app.kubernetes.io/name".to_string(),
|
||||
"kube-state-metrics".to_string(),
|
||||
),
|
||||
])),
|
||||
namespace: Some(self.sender.namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: ServiceMonitorSpec::default(),
|
||||
};
|
||||
monitors.push(default_service_monitor);
|
||||
for monitor in monitors.iter() {
|
||||
client
|
||||
.apply(monitor, Some(&self.sender.namespace.clone()))
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
}
|
||||
Ok(Outcome::success(
|
||||
"succesfully deployed service monitors".to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
async fn install_rules(
|
||||
&self,
|
||||
#[allow(clippy::ptr_arg)] rules: &Vec<RuleGroup>,
|
||||
client: &Arc<K8sClient>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let mut prom_rule_spec = PrometheusRuleSpec {
|
||||
groups: rules.clone(),
|
||||
};
|
||||
|
||||
let default_rules_group = RuleGroup {
|
||||
name: "default-rules".to_string(),
|
||||
rules: crate::modules::monitoring::kube_prometheus::crd::rhob_default_rules::build_default_application_rules(),
|
||||
};
|
||||
|
||||
prom_rule_spec.groups.push(default_rules_group);
|
||||
let prom_rules = PrometheusRule {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(self.sender.namespace.clone()),
|
||||
labels: Some(std::collections::BTreeMap::from([
|
||||
("alertmanagerConfig".to_string(), "enabled".to_string()),
|
||||
("role".to_string(), "prometheus-rule".to_string()),
|
||||
])),
|
||||
namespace: Some(self.sender.namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: prom_rule_spec,
|
||||
};
|
||||
client
|
||||
.apply(&prom_rules, Some(&self.sender.namespace.clone()))
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
Ok(Outcome::success(format!(
|
||||
"successfully deployed rules {:#?}",
|
||||
prom_rules.metadata.name
|
||||
)))
|
||||
}
|
||||
|
||||
async fn install_client_kube_metrics(&self) -> Result<Outcome, InterpretError> {
|
||||
self.install_chart(
|
||||
"oci://hub.nationtech.io/harmony".to_string(),
|
||||
"nt-kube-metrics".to_string(),
|
||||
)
|
||||
.await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"Installed client kube metrics in ns {}",
|
||||
&self.sender.namespace.clone()
|
||||
)))
|
||||
}
|
||||
|
||||
async fn install_grafana(&self, client: &Arc<K8sClient>) -> Result<Outcome, InterpretError> {
|
||||
let mut label = BTreeMap::new();
|
||||
label.insert("dashboards".to_string(), "grafana".to_string());
|
||||
let labels = LabelSelector {
|
||||
match_labels: label.clone(),
|
||||
match_expressions: vec![],
|
||||
};
|
||||
let mut json_data = BTreeMap::new();
|
||||
json_data.insert("timeInterval".to_string(), "5s".to_string());
|
||||
let namespace = self.sender.namespace.clone();
|
||||
|
||||
let json = build_default_dashboard(&namespace);
|
||||
|
||||
let graf_data_source = GrafanaDatasource {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(format!(
|
||||
"grafana-datasource-{}",
|
||||
self.sender.namespace.clone()
|
||||
)),
|
||||
namespace: Some(self.sender.namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: GrafanaDatasourceSpec {
|
||||
instance_selector: labels.clone(),
|
||||
allow_cross_namespace_import: Some(false),
|
||||
datasource: GrafanaDatasourceConfig {
|
||||
access: "proxy".to_string(),
|
||||
database: Some("prometheus".to_string()),
|
||||
json_data: Some(json_data),
|
||||
//this is fragile
|
||||
name: format!("prometheus-{}-0", self.sender.namespace.clone()),
|
||||
r#type: "prometheus".to_string(),
|
||||
url: format!(
|
||||
"http://prometheus-operated.{}.svc.cluster.local:9090",
|
||||
self.sender.namespace.clone()
|
||||
),
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
client
|
||||
.apply(&graf_data_source, Some(&self.sender.namespace.clone()))
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
|
||||
let graf_dashboard = GrafanaDashboard {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(format!(
|
||||
"grafana-dashboard-{}",
|
||||
self.sender.namespace.clone()
|
||||
)),
|
||||
namespace: Some(self.sender.namespace.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: GrafanaDashboardSpec {
|
||||
resync_period: Some("30s".to_string()),
|
||||
instance_selector: labels.clone(),
|
||||
json,
|
||||
},
|
||||
};
|
||||
|
||||
client
|
||||
.apply(&graf_dashboard, Some(&self.sender.namespace.clone()))
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
|
||||
let grafana = Grafana {
|
||||
metadata: ObjectMeta {
|
||||
name: Some(format!("grafana-{}", self.sender.namespace.clone())),
|
||||
namespace: Some(self.sender.namespace.clone()),
|
||||
labels: Some(label.clone()),
|
||||
..Default::default()
|
||||
},
|
||||
spec: GrafanaSpec {
|
||||
config: None,
|
||||
admin_user: None,
|
||||
admin_password: None,
|
||||
ingress: None,
|
||||
persistence: None,
|
||||
resources: None,
|
||||
},
|
||||
};
|
||||
client
|
||||
.apply(&grafana, Some(&self.sender.namespace.clone()))
|
||||
.await
|
||||
.map_err(|e| InterpretError::new(e.to_string()))?;
|
||||
Ok(Outcome::success(format!(
|
||||
"successfully deployed grafana instance {:#?}",
|
||||
grafana.metadata.name
|
||||
)))
|
||||
}
|
||||
|
||||
async fn install_receivers(
|
||||
&self,
|
||||
sender: &RHOBObservability,
|
||||
receivers: &Vec<Box<dyn AlertReceiver<RHOBObservability>>>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
for receiver in receivers.iter() {
|
||||
receiver.install(sender).await.map_err(|err| {
|
||||
InterpretError::new(format!("failed to install receiver: {}", err))
|
||||
})?;
|
||||
}
|
||||
Ok(Outcome::success("successfully deployed receivers".into()))
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user
Do we really want to keep different monitoring strategies? Instead of just providing one (opinionated)
Monitoring
feature?Especially when some part of it is copy-paste from the previous monitoring strategy.