Merge pull request 'monitoring-alerting' (#30) from monitoring-alerting into master
Reviewed-on: https://git.nationtech.io/NationTech/harmony/pulls/30
This commit is contained in:
commit
4a9b95acad
@ -1,3 +1,4 @@
|
|||||||
|
pub mod monitoring_alerting;
|
||||||
mod ha_cluster;
|
mod ha_cluster;
|
||||||
mod host_binding;
|
mod host_binding;
|
||||||
mod http;
|
mod http;
|
||||||
|
|||||||
108
harmony/src/domain/topology/monitoring_alerting.rs
Normal file
108
harmony/src/domain/topology/monitoring_alerting.rs
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use log::warn;
|
||||||
|
use tokio::sync::OnceCell;
|
||||||
|
|
||||||
|
use k8s_openapi::api::core::v1::Pod;
|
||||||
|
use kube::{
|
||||||
|
Client,
|
||||||
|
api::{Api, ListParams},
|
||||||
|
};
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
interpret::{InterpretError, Outcome},
|
||||||
|
inventory::Inventory,
|
||||||
|
maestro::Maestro,
|
||||||
|
modules::monitoring::monitoring_alerting::MonitoringAlertingStackScore,
|
||||||
|
score::Score,
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::{HelmCommand, K8sAnywhereTopology, Topology, k8s::K8sClient};
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
struct MonitoringState {
|
||||||
|
message: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct MonitoringAlertingTopology {
|
||||||
|
monitoring_state: OnceCell<Option<MonitoringState>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MonitoringAlertingTopology {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
monitoring_state: OnceCell::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_monitoring_state(&self) -> Result<Option<MonitoringState>, InterpretError> {
|
||||||
|
let client = Client::try_default()
|
||||||
|
.await
|
||||||
|
.map_err(|e| InterpretError::new(format!("Kubernetes client error: {}", e)))?;
|
||||||
|
|
||||||
|
for ns in &["monitoring", "openshift-monitoring"] {
|
||||||
|
let pods: Api<Pod> = Api::namespaced(client.clone(), ns);
|
||||||
|
//TODO hardcoding the label is a problem
|
||||||
|
//check all pods are ready
|
||||||
|
let lp = ListParams::default().labels("app.kubernetes.io/name=prometheus");
|
||||||
|
|
||||||
|
match pods.list(&lp).await {
|
||||||
|
Ok(pod_list) => {
|
||||||
|
for p in pod_list.items {
|
||||||
|
if let Some(status) = p.status {
|
||||||
|
if let Some(conditions) = status.conditions {
|
||||||
|
if conditions
|
||||||
|
.iter()
|
||||||
|
.any(|c| c.type_ == "Ready" && c.status == "True")
|
||||||
|
{
|
||||||
|
return Ok(Some(MonitoringState {
|
||||||
|
message: format!(
|
||||||
|
"Prometheus is ready in namespace: {}",
|
||||||
|
ns
|
||||||
|
),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
warn!("Failed to query pods in ns {}: {}", ns, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Topology> Clone for Box<dyn Score<T>> {
|
||||||
|
fn clone(&self) -> Box<dyn Score<T>> {
|
||||||
|
self.clone_box()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Topology for MonitoringAlertingTopology {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"MonitoringAlertingTopology"
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn ensure_ready(&self) -> Result<Outcome, InterpretError> {
|
||||||
|
if let Some(state) = self.get_monitoring_state().await? {
|
||||||
|
// Monitoring stack is already ready — stop app.
|
||||||
|
println!("{}", state.message);
|
||||||
|
std::process::exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Monitoring not found — proceed with installation.
|
||||||
|
Ok(Outcome::success(
|
||||||
|
"Monitoring stack installation started.".to_string(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl HelmCommand for MonitoringAlertingTopology {}
|
||||||
@ -12,3 +12,4 @@ pub mod load_balancer;
|
|||||||
pub mod okd;
|
pub mod okd;
|
||||||
pub mod opnsense;
|
pub mod opnsense;
|
||||||
pub mod tftp;
|
pub mod tftp;
|
||||||
|
pub mod monitoring;
|
||||||
|
|||||||
49
harmony/src/modules/monitoring/kube_prometheus.rs
Normal file
49
harmony/src/modules/monitoring/kube_prometheus.rs
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use non_blank_string_rs::NonBlankString;
|
||||||
|
|
||||||
|
use crate::modules::helm::chart::HelmChartScore;
|
||||||
|
|
||||||
|
pub fn kube_prometheus_score(ns: &str) -> HelmChartScore {
|
||||||
|
//TODO this should be make into a rule with default formatting that can be easily passed as a vec
|
||||||
|
//to the overrides or something leaving the user to deal with formatting here seems bad
|
||||||
|
let values = r#"
|
||||||
|
additionalPrometheusRulesMap:
|
||||||
|
pvc-alerts:
|
||||||
|
groups:
|
||||||
|
- name: pvc-alerts
|
||||||
|
rules:
|
||||||
|
- alert: 'PVC Fill Over 95 Percent In 2 Days'
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
kubelet_volume_stats_used_bytes
|
||||||
|
/
|
||||||
|
kubelet_volume_stats_capacity_bytes
|
||||||
|
) > 0.95
|
||||||
|
AND
|
||||||
|
predict_linear(kubelet_volume_stats_used_bytes[2d], 2 * 24 * 60 * 60)
|
||||||
|
/
|
||||||
|
kubelet_volume_stats_capacity_bytes
|
||||||
|
> 0.95
|
||||||
|
for: 1m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
description: The PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} is predicted to fill over 95% in less than 2 days.
|
||||||
|
title: PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} will fill over 95% in less than 2 days
|
||||||
|
"#;
|
||||||
|
HelmChartScore {
|
||||||
|
namespace: Some(NonBlankString::from_str(ns).unwrap()),
|
||||||
|
release_name: NonBlankString::from_str("kube-prometheus").unwrap(),
|
||||||
|
chart_name: NonBlankString::from_str(
|
||||||
|
"oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack", //use kube prometheus chart which includes grafana, prometheus, alert
|
||||||
|
//manager, etc
|
||||||
|
)
|
||||||
|
.unwrap(),
|
||||||
|
chart_version: None,
|
||||||
|
values_overrides: None,
|
||||||
|
values_yaml: Some(values.to_string()),
|
||||||
|
create_namespace: true,
|
||||||
|
install_only: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
3
harmony/src/modules/monitoring/mod.rs
Normal file
3
harmony/src/modules/monitoring/mod.rs
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
pub mod monitoring_alerting;
|
||||||
|
mod kube_prometheus;
|
||||||
|
|
||||||
144
harmony/src/modules/monitoring/monitoring_alerting.rs
Normal file
144
harmony/src/modules/monitoring/monitoring_alerting.rs
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
use async_trait::async_trait;
|
||||||
|
use log::info;
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
data::{Id, Version},
|
||||||
|
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||||
|
inventory::Inventory,
|
||||||
|
maestro::Maestro,
|
||||||
|
score::{CloneBoxScore, Score},
|
||||||
|
topology::{HelmCommand, Topology, monitoring_alerting::MonitoringAlertingTopology},
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::kube_prometheus::kube_prometheus_score;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct MonitoringAlertingStackScore {
|
||||||
|
//TODO add documenation to explain why its here
|
||||||
|
//keeps it open for the end user to specify which stack they want
|
||||||
|
//if it isnt default kube-prometheus
|
||||||
|
pub monitoring_stack: Vec<Box<dyn Score<MonitoringAlertingTopology>>>,
|
||||||
|
pub namespace: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MonitoringAlertingStackScore {
|
||||||
|
pub fn new(
|
||||||
|
monitoring_stack: Vec<Box<dyn Score<MonitoringAlertingTopology>>>,
|
||||||
|
namespace: String,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
monitoring_stack,
|
||||||
|
namespace,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for MonitoringAlertingStackScore {
|
||||||
|
fn default() -> Self {
|
||||||
|
let ns = "monitoring";
|
||||||
|
Self {
|
||||||
|
monitoring_stack: vec![Box::new(kube_prometheus_score(ns))],
|
||||||
|
namespace: ns.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl Clone for MonitoringAlertingStackScore {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
Self {
|
||||||
|
monitoring_stack: self
|
||||||
|
.monitoring_stack
|
||||||
|
.iter()
|
||||||
|
.map(|s| s.clone_box())
|
||||||
|
.collect(),
|
||||||
|
namespace: self.namespace.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Serialize for MonitoringAlertingStackScore {
|
||||||
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||||
|
where
|
||||||
|
S: serde::Serializer,
|
||||||
|
{
|
||||||
|
use serde::ser::SerializeStruct;
|
||||||
|
let mut s = serializer.serialize_struct("MonitoringAlertingStackScore", 1)?;
|
||||||
|
let monitoring_values: Vec<_> = self
|
||||||
|
.monitoring_stack
|
||||||
|
.iter()
|
||||||
|
.map(|m| m.serialize())
|
||||||
|
.collect();
|
||||||
|
s.serialize_field("monitoring", &monitoring_values)?;
|
||||||
|
s.end()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T:Topology> Score<T> for MonitoringAlertingStackScore {
|
||||||
|
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||||
|
Box::new(MonitoringAlertingStackInterpret {
|
||||||
|
score: MonitoringAlertingStackScore {
|
||||||
|
monitoring_stack: self
|
||||||
|
.monitoring_stack
|
||||||
|
.iter()
|
||||||
|
.map(|s| s.clone_box())
|
||||||
|
.collect(),
|
||||||
|
namespace: self.namespace.clone(),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn name(&self) -> String {
|
||||||
|
format!("MonitoringAlertingStackScore")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct MonitoringAlertingStackInterpret {
|
||||||
|
pub score: MonitoringAlertingStackScore,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl <T: Topology> Interpret<T> for MonitoringAlertingStackInterpret {
|
||||||
|
async fn execute(
|
||||||
|
&self,
|
||||||
|
_inventory: &Inventory,
|
||||||
|
_topology: &T,
|
||||||
|
) -> Result<Outcome, InterpretError> {
|
||||||
|
let inventory = Inventory::autoload();
|
||||||
|
let topology = MonitoringAlertingTopology::new();
|
||||||
|
let maestro = match Maestro::initialize(inventory, topology).await {
|
||||||
|
Ok(m) => m,
|
||||||
|
Err(e) => {
|
||||||
|
println!("failed to initialize Maestro: {}", e);
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let scores_vec = self.score.monitoring_stack.clone();
|
||||||
|
for s in scores_vec{
|
||||||
|
info!("Running: {}", s.name());
|
||||||
|
maestro.interpret(s).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Outcome::success(format!(
|
||||||
|
"monitoring stack installed in {} namespace",
|
||||||
|
self.score.namespace
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_name(&self) -> InterpretName {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_version(&self) -> Version {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_status(&self) -> InterpretStatus {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_children(&self) -> Vec<Id> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue
Block a user