Merge pull request 'feat/monitoring-application-feature' (#83) from feat/monitoring-application-feature into master
Some checks are pending
Compile and package harmony_composer / package_harmony_composer (push) Has started running
Run Check Script / check (push) Successful in 2s

Reviewed-on: https://git.nationtech.io/NationTech/harmony/pulls/83
Reviewed-by: johnride <jg@nationtech.io>
This commit is contained in:
johnride 2025-07-05 01:16:08 +00:00
commit 9b7456e148
20 changed files with 499 additions and 33 deletions

View File

@ -4,7 +4,8 @@ use harmony::{
inventory::Inventory,
maestro::Maestro,
modules::application::{
ApplicationScore, RustWebFramework, RustWebapp, features::ContinuousDelivery,
ApplicationScore, RustWebFramework, RustWebapp,
features::{ContinuousDelivery, Monitoring},
},
topology::{K8sAnywhereTopology, Url},
};
@ -24,6 +25,7 @@ async fn main() {
Box::new(ContinuousDelivery {
application: application.clone(),
}),
Box::new(Monitoring {}),
// TODO add monitoring, backups, multisite ha, etc
],
application,

View File

@ -2,18 +2,45 @@ use async_trait::async_trait;
use log::info;
use crate::{
modules::application::ApplicationFeature,
topology::{HelmCommand, Topology},
inventory::Inventory,
modules::{
application::{Application, ApplicationFeature},
monitoring::{
application_monitoring::k8s_application_monitoring_score::ApplicationPrometheusMonitoringScore,
kube_prometheus::{
helm_prometheus_alert_score::HelmPrometheusAlertingScore,
types::{NamespaceSelector, ServiceMonitor},
},
},
},
score::Score,
topology::{HelmCommand, Topology, tenant::TenantManager},
};
#[derive(Debug, Default, Clone)]
pub struct Monitoring {}
#[async_trait]
impl<T: Topology + HelmCommand + 'static> ApplicationFeature<T> for Monitoring {
async fn ensure_installed(&self, _topology: &T) -> Result<(), String> {
impl<T: Topology + HelmCommand + 'static + TenantManager> ApplicationFeature<T> for Monitoring {
async fn ensure_installed(&self, topology: &T) -> Result<(), String> {
info!("Ensuring monitoring is available for application");
todo!("create and execute k8s prometheus score, depends on Will's work")
let mut service_monitor = ServiceMonitor::default();
service_monitor.namespace_selector = Some(NamespaceSelector {
any: true,
match_names: vec![],
});
let alerting_score = ApplicationPrometheusMonitoringScore {
receivers: vec![],
rules: vec![],
service_monitors: vec![service_monitor],
};
alerting_score
.create_interpret()
.execute(&Inventory::empty(), topology)
.await
.unwrap();
Ok(())
}
fn name(&self) -> String {
"Monitoring".to_string()

View File

@ -4,9 +4,12 @@ use serde_yaml::{Mapping, Value};
use crate::{
interpret::{InterpretError, Outcome},
modules::monitoring::kube_prometheus::{
prometheus::{Prometheus, PrometheusReceiver},
types::{AlertChannelConfig, AlertManagerChannelConfig},
modules::monitoring::{
kube_prometheus::{
prometheus::{KubePrometheus, KubePrometheusReceiver},
types::{AlertChannelConfig, AlertManagerChannelConfig},
},
prometheus::prometheus::{Prometheus, PrometheusReceiver},
},
topology::{Url, oberservability::monitoring::AlertReceiver},
};
@ -37,6 +40,26 @@ impl PrometheusReceiver for DiscordWebhook {
}
}
#[async_trait]
impl AlertReceiver<KubePrometheus> for DiscordWebhook {
async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> {
sender.install_receiver(self).await
}
fn clone_box(&self) -> Box<dyn AlertReceiver<KubePrometheus>> {
Box::new(self.clone())
}
}
#[async_trait]
impl KubePrometheusReceiver for DiscordWebhook {
fn name(&self) -> String {
self.name.clone()
}
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
self.get_config().await
}
}
#[async_trait]
impl AlertChannelConfig for DiscordWebhook {
async fn get_config(&self) -> AlertManagerChannelConfig {

View File

@ -4,9 +4,12 @@ use serde_yaml::{Mapping, Value};
use crate::{
interpret::{InterpretError, Outcome},
modules::monitoring::kube_prometheus::{
prometheus::{Prometheus, PrometheusReceiver},
types::{AlertChannelConfig, AlertManagerChannelConfig},
modules::monitoring::{
kube_prometheus::{
prometheus::{KubePrometheus, KubePrometheusReceiver},
types::{AlertChannelConfig, AlertManagerChannelConfig},
},
prometheus::prometheus::{Prometheus, PrometheusReceiver},
},
topology::{Url, oberservability::monitoring::AlertReceiver},
};
@ -36,6 +39,25 @@ impl PrometheusReceiver for WebhookReceiver {
self.get_config().await
}
}
#[async_trait]
impl AlertReceiver<KubePrometheus> for WebhookReceiver {
async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> {
sender.install_receiver(self).await
}
fn clone_box(&self) -> Box<dyn AlertReceiver<KubePrometheus>> {
Box::new(self.clone())
}
}
#[async_trait]
impl KubePrometheusReceiver for WebhookReceiver {
fn name(&self) -> String {
self.name.clone()
}
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
self.get_config().await
}
}
#[async_trait]
impl AlertChannelConfig for WebhookReceiver {

View File

@ -5,13 +5,26 @@ use serde::Serialize;
use crate::{
interpret::{InterpretError, Outcome},
modules::monitoring::kube_prometheus::{
prometheus::{Prometheus, PrometheusRule},
types::{AlertGroup, AlertManagerAdditionalPromRules},
modules::monitoring::{
kube_prometheus::{
prometheus::{KubePrometheus, KubePrometheusRule},
types::{AlertGroup, AlertManagerAdditionalPromRules},
},
prometheus::prometheus::{Prometheus, PrometheusRule},
},
topology::oberservability::monitoring::AlertRule,
};
#[async_trait]
impl AlertRule<KubePrometheus> for AlertManagerRuleGroup {
async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> {
sender.install_rule(&self).await
}
fn clone_box(&self) -> Box<dyn AlertRule<KubePrometheus>> {
Box::new(self.clone())
}
}
#[async_trait]
impl AlertRule<Prometheus> for AlertManagerRuleGroup {
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
@ -41,6 +54,25 @@ impl PrometheusRule for AlertManagerRuleGroup {
}
}
}
#[async_trait]
impl KubePrometheusRule for AlertManagerRuleGroup {
fn name(&self) -> String {
self.name.clone()
}
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules {
let mut additional_prom_rules = BTreeMap::new();
additional_prom_rules.insert(
self.name.clone(),
AlertGroup {
groups: vec![self.clone()],
},
);
AlertManagerAdditionalPromRules {
rules: additional_prom_rules,
}
}
}
impl AlertManagerRuleGroup {
pub fn new(name: &str, rules: Vec<PrometheusAlertRule>) -> AlertManagerRuleGroup {

View File

@ -0,0 +1,41 @@
use std::sync::{Arc, Mutex};
use serde::Serialize;
use crate::{
modules::monitoring::{
kube_prometheus::types::ServiceMonitor,
prometheus::{prometheus::Prometheus, prometheus_config::PrometheusConfig},
},
score::Score,
topology::{
HelmCommand, Topology,
oberservability::monitoring::{AlertReceiver, AlertRule, AlertingInterpret},
tenant::TenantManager,
},
};
#[derive(Clone, Debug, Serialize)]
pub struct ApplicationPrometheusMonitoringScore {
pub receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>,
pub rules: Vec<Box<dyn AlertRule<Prometheus>>>,
pub service_monitors: Vec<ServiceMonitor>,
}
impl<T: Topology + HelmCommand + TenantManager> Score<T> for ApplicationPrometheusMonitoringScore {
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
let config = Arc::new(Mutex::new(PrometheusConfig::new()));
config
.try_lock()
.expect("couldn't lock config")
.additional_service_monitors = self.service_monitors.clone();
Box::new(AlertingInterpret {
sender: Prometheus::new(),
receivers: self.receivers.clone(),
rules: self.rules.clone(),
})
}
fn name(&self) -> String {
"ApplicationPrometheusMonitoringScore".to_string()
}
}

View File

@ -0,0 +1 @@
pub mod k8s_application_monitoring_score;

View File

@ -0,0 +1,28 @@
use non_blank_string_rs::NonBlankString;
use std::str::FromStr;
use crate::modules::helm::chart::HelmChartScore;
pub fn grafana_helm_chart_score(ns: &str) -> HelmChartScore {
let values = format!(
r#"
rbac:
namespaced: true
sidecar:
dashboards:
enabled: true
"#
);
HelmChartScore {
namespace: Some(NonBlankString::from_str(ns).unwrap()),
release_name: NonBlankString::from_str("grafana").unwrap(),
chart_name: NonBlankString::from_str("oci://ghcr.io/grafana/helm-charts/grafana").unwrap(),
chart_version: None,
values_overrides: None,
values_yaml: Some(values.to_string()),
create_namespace: true,
install_only: true,
repository: None,
}
}

View File

@ -0,0 +1 @@
pub mod helm_grafana;

View File

@ -0,0 +1 @@
pub mod helm;

View File

@ -68,6 +68,9 @@ pub fn kube_prometheus_helm_chart_score(
let mut values = format!(
r#"
global:
rbac:
create: false
prometheus:
enabled: {prometheus}
prometheusSpec:
@ -242,7 +245,7 @@ prometheus-node-exporter:
cpu: 200m
memory: 250Mi
prometheusOperator:
enabled: {prometheus_operator}
enabled: false
resources:
requests:
cpu: 100m

View File

@ -2,7 +2,7 @@ use std::sync::{Arc, Mutex};
use serde::Serialize;
use super::{helm::config::KubePrometheusConfig, prometheus::Prometheus};
use super::{helm::config::KubePrometheusConfig, prometheus::KubePrometheus};
use crate::{
modules::monitoring::kube_prometheus::types::ServiceMonitor,
score::Score,
@ -15,8 +15,8 @@ use crate::{
#[derive(Clone, Debug, Serialize)]
pub struct HelmPrometheusAlertingScore {
pub receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>,
pub rules: Vec<Box<dyn AlertRule<Prometheus>>>,
pub receivers: Vec<Box<dyn AlertReceiver<KubePrometheus>>>,
pub rules: Vec<Box<dyn AlertRule<KubePrometheus>>>,
pub service_monitors: Vec<ServiceMonitor>,
}
@ -28,7 +28,7 @@ impl<T: Topology + HelmCommand + TenantManager> Score<T> for HelmPrometheusAlert
.expect("couldn't lock config")
.additional_service_monitors = self.service_monitors.clone();
Box::new(AlertingInterpret {
sender: Prometheus::new(),
sender: KubePrometheus::new(),
receivers: self.receivers.clone(),
rules: self.rules.clone(),
})

View File

@ -27,14 +27,14 @@ use super::{
};
#[async_trait]
impl AlertSender for Prometheus {
impl AlertSender for KubePrometheus {
fn name(&self) -> String {
"HelmKubePrometheus".to_string()
}
}
#[async_trait]
impl<T: Topology + HelmCommand + TenantManager> Installable<T> for Prometheus {
impl<T: Topology + HelmCommand + TenantManager> Installable<T> for KubePrometheus {
async fn configure(&self, _inventory: &Inventory, topology: &T) -> Result<(), InterpretError> {
self.configure_with_topology(topology).await;
Ok(())
@ -51,11 +51,11 @@ impl<T: Topology + HelmCommand + TenantManager> Installable<T> for Prometheus {
}
#[derive(Debug)]
pub struct Prometheus {
pub struct KubePrometheus {
pub config: Arc<Mutex<KubePrometheusConfig>>,
}
impl Prometheus {
impl KubePrometheus {
pub fn new() -> Self {
Self {
config: Arc::new(Mutex::new(KubePrometheusConfig::new())),
@ -75,7 +75,7 @@ impl Prometheus {
pub async fn install_receiver(
&self,
prometheus_receiver: &dyn PrometheusReceiver,
prometheus_receiver: &dyn KubePrometheusReceiver,
) -> Result<Outcome, InterpretError> {
let prom_receiver = prometheus_receiver.configure_receiver().await;
debug!(
@ -120,12 +120,12 @@ impl Prometheus {
}
#[async_trait]
pub trait PrometheusReceiver: Send + Sync + std::fmt::Debug {
pub trait KubePrometheusReceiver: Send + Sync + std::fmt::Debug {
fn name(&self) -> String;
async fn configure_receiver(&self) -> AlertManagerChannelConfig;
}
impl Serialize for Box<dyn AlertReceiver<Prometheus>> {
impl Serialize for Box<dyn AlertReceiver<KubePrometheus>> {
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
@ -134,19 +134,19 @@ impl Serialize for Box<dyn AlertReceiver<Prometheus>> {
}
}
impl Clone for Box<dyn AlertReceiver<Prometheus>> {
impl Clone for Box<dyn AlertReceiver<KubePrometheus>> {
fn clone(&self) -> Self {
self.clone_box()
}
}
#[async_trait]
pub trait PrometheusRule: Send + Sync + std::fmt::Debug {
pub trait KubePrometheusRule: Send + Sync + std::fmt::Debug {
fn name(&self) -> String;
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules;
}
impl Serialize for Box<dyn AlertRule<Prometheus>> {
impl Serialize for Box<dyn AlertRule<KubePrometheus>> {
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
@ -155,7 +155,7 @@ impl Serialize for Box<dyn AlertRule<Prometheus>> {
}
}
impl Clone for Box<dyn AlertRule<Prometheus>> {
impl Clone for Box<dyn AlertRule<KubePrometheus>> {
fn clone(&self) -> Self {
self.clone_box()
}

View File

@ -212,7 +212,7 @@ pub struct ServiceMonitor {
pub name: String,
// # Additional labels to set used for the ServiceMonitorSelector. Together with standard labels from the chart
pub additional_labels: Option<Mapping>,
pub additional_labels: Option<HashMap<String, String>>,
// # Service label for use in assembling a job name of the form <label value>-<port>
// # If no label is specified, the service name is used.
@ -240,7 +240,7 @@ pub struct ServiceMonitor {
// any: bool,
// # Explicit list of namespace names to select
// matchNames: Vec,
pub namespace_selector: Option<Mapping>,
pub namespace_selector: Option<NamespaceSelector>,
// # Endpoints of the selected service to be monitored
pub endpoints: Vec<ServiceMonitorEndpoint>,
@ -250,6 +250,13 @@ pub struct ServiceMonitor {
pub fallback_scrape_protocol: Option<String>,
}
#[derive(Debug, Serialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct NamespaceSelector {
pub any: bool,
pub match_names: Vec<String>,
}
impl Default for ServiceMonitor {
fn default() -> Self {
Self {

View File

@ -1,4 +1,7 @@
pub mod alert_channel;
pub mod alert_rule;
pub mod application_monitoring;
pub mod grafana;
pub mod kube_prometheus;
pub mod ntfy;
pub mod prometheus;

View File

@ -0,0 +1 @@
pub mod prometheus_helm;

View File

@ -0,0 +1,47 @@
use std::str::FromStr;
use std::sync::{Arc, Mutex};
use non_blank_string_rs::NonBlankString;
use crate::modules::{
helm::chart::HelmChartScore, monitoring::prometheus::prometheus_config::PrometheusConfig,
};
pub fn prometheus_helm_chart_score(config: Arc<Mutex<PrometheusConfig>>) -> HelmChartScore {
let config = config.lock().unwrap();
let ns = config.namespace.clone().unwrap();
let values = format!(
r#"
rbac:
create: true
kube-state-metrics:
enabled: false
nodeExporter:
enabled: false
alertmanager:
enabled: false
pushgateway:
enabled: false
server:
serviceAccount:
create: false
rbac:
create: true
fullnameOverride: prometheus-{ns}
"#
);
HelmChartScore {
namespace: Some(NonBlankString::from_str(&config.namespace.clone().unwrap()).unwrap()),
release_name: NonBlankString::from_str("prometheus").unwrap(),
chart_name: NonBlankString::from_str(
"oci://ghcr.io/prometheus-community/charts/prometheus",
)
.unwrap(),
chart_version: None,
values_overrides: None,
values_yaml: Some(values.to_string()),
create_namespace: true,
install_only: true,
repository: None,
}
}

View File

@ -0,0 +1,3 @@
pub mod helm;
pub mod prometheus;
pub mod prometheus_config;

View File

@ -0,0 +1,190 @@
use std::sync::{Arc, Mutex};
use async_trait::async_trait;
use log::{debug, error};
use serde::Serialize;
use crate::{
interpret::{InterpretError, Outcome},
inventory::Inventory,
modules::monitoring::{
alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
grafana::helm::helm_grafana::grafana_helm_chart_score,
kube_prometheus::types::{AlertManagerAdditionalPromRules, AlertManagerChannelConfig},
},
score::Score,
topology::{
HelmCommand, Topology,
installable::Installable,
oberservability::monitoring::{AlertReceiver, AlertRule, AlertSender},
tenant::TenantManager,
},
};
use super::{
helm::prometheus_helm::prometheus_helm_chart_score, prometheus_config::PrometheusConfig,
};
#[derive(Debug)]
pub struct Prometheus {
pub config: Arc<Mutex<PrometheusConfig>>,
}
#[async_trait]
impl AlertSender for Prometheus {
fn name(&self) -> String {
"Prometheus".to_string()
}
}
impl Prometheus {
pub fn new() -> Self {
Self {
config: Arc::new(Mutex::new(PrometheusConfig::new())),
}
}
pub async fn configure_with_topology<T: TenantManager>(&self, topology: &T) {
let ns = topology
.get_tenant_config()
.await
.map(|cfg| cfg.name.clone())
.unwrap_or_else(|| "monitoring".to_string());
error!("This must be refactored, see comments in pr #74");
debug!("NS: {}", ns);
self.config.lock().unwrap().namespace = Some(ns);
}
pub async fn install_receiver(
&self,
prometheus_receiver: &dyn PrometheusReceiver,
) -> Result<Outcome, InterpretError> {
let prom_receiver = prometheus_receiver.configure_receiver().await;
debug!(
"adding alert receiver to prometheus config: {:#?}",
&prom_receiver
);
let mut config = self.config.lock().unwrap();
config.alert_receiver_configs.push(prom_receiver);
let prom_receiver_name = prometheus_receiver.name();
debug!("installed alert receiver {}", &prom_receiver_name);
Ok(Outcome::success(format!(
"Sucessfully installed receiver {}",
prom_receiver_name
)))
}
pub async fn install_rule(
&self,
prometheus_rule: &AlertManagerRuleGroup,
) -> Result<Outcome, InterpretError> {
let prometheus_rule = prometheus_rule.configure_rule().await;
let mut config = self.config.lock().unwrap();
config.alert_rules.push(prometheus_rule.clone());
Ok(Outcome::success(format!(
"Successfully installed alert rule: {:#?},",
prometheus_rule
)))
}
pub async fn install_prometheus<T: Topology + HelmCommand + Send + Sync>(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
prometheus_helm_chart_score(self.config.clone())
.create_interpret()
.execute(inventory, topology)
.await
}
pub async fn install_grafana<T: Topology + HelmCommand + Send + Sync>(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
let namespace = {
let config = self.config.lock().unwrap();
config.namespace.clone()
};
if let Some(ns) = namespace.as_deref() {
grafana_helm_chart_score(ns)
.create_interpret()
.execute(inventory, topology)
.await
} else {
Err(InterpretError::new(format!(
"could not install grafana, missing namespace",
)))
}
}
}
#[async_trait]
impl<T: Topology + HelmCommand + TenantManager> Installable<T> for Prometheus {
async fn configure(&self, _inventory: &Inventory, topology: &T) -> Result<(), InterpretError> {
self.configure_with_topology(topology).await;
Ok(())
}
async fn ensure_installed(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<(), InterpretError> {
self.install_prometheus(inventory, topology).await?;
let install_grafana = {
let config = self.config.lock().unwrap();
config.grafana
};
if install_grafana {
self.install_grafana(inventory, topology).await?;
}
Ok(())
}
}
#[async_trait]
pub trait PrometheusReceiver: Send + Sync + std::fmt::Debug {
fn name(&self) -> String;
async fn configure_receiver(&self) -> AlertManagerChannelConfig;
}
impl Serialize for Box<dyn AlertReceiver<Prometheus>> {
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
todo!()
}
}
impl Clone for Box<dyn AlertReceiver<Prometheus>> {
fn clone(&self) -> Self {
self.clone_box()
}
}
#[async_trait]
pub trait PrometheusRule: Send + Sync + std::fmt::Debug {
fn name(&self) -> String;
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules;
}
impl Serialize for Box<dyn AlertRule<Prometheus>> {
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
todo!()
}
}
impl Clone for Box<dyn AlertRule<Prometheus>> {
fn clone(&self) -> Self {
self.clone_box()
}
}

View File

@ -0,0 +1,34 @@
use crate::modules::monitoring::kube_prometheus::types::{
AlertManagerAdditionalPromRules, AlertManagerChannelConfig, ServiceMonitor,
};
#[derive(Debug)]
pub struct PrometheusConfig {
pub namespace: Option<String>,
pub default_rules: bool,
pub alert_manager: bool,
pub node_exporter: bool,
pub kube_state_metrics: bool,
pub grafana: bool,
pub prometheus_pushgateway: bool,
pub alert_receiver_configs: Vec<AlertManagerChannelConfig>,
pub alert_rules: Vec<AlertManagerAdditionalPromRules>,
pub additional_service_monitors: Vec<ServiceMonitor>,
}
impl PrometheusConfig {
pub fn new() -> Self {
Self {
namespace: None,
default_rules: true,
alert_manager: true,
node_exporter: false,
kube_state_metrics: false,
grafana: true,
prometheus_pushgateway: false,
alert_receiver_configs: vec![],
alert_rules: vec![],
additional_service_monitors: vec![],
}
}
}