Monitor an application within a tenant #86

Merged
letian merged 22 commits from feat/crd-alertmanager-configs into master 2025-08-04 21:42:05 +00:00
10 changed files with 247 additions and 48 deletions
Showing only changes of commit b56a30de3c - Show all commits

27
Cargo.lock generated
View File

@ -96,6 +96,12 @@ dependencies = [
"libc",
]
[[package]]
name = "ansi_term"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b3568b48b7cefa6b8ce125f9bb4989e52fbcc29ebea88df04cc7c5f12f70455"
[[package]]
name = "anstream"
version = "0.6.19"
@ -1240,6 +1246,18 @@ dependencies = [
name = "example"
version = "0.0.0"
[[package]]
name = "example-application-monitoring-with-tenant"
version = "0.1.0"
dependencies = [
"env_logger",
"harmony",
"harmony_cli",
"logging",
"tokio",
"url",
]
[[package]]
name = "example-cli"
version = "0.1.0"
@ -2808,6 +2826,15 @@ dependencies = [
"log",
]
[[package]]
name = "logging"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "461a8beca676e8ab1bd468c92e9b4436d6368e11e96ae038209e520cfe665e46"
dependencies = [
"ansi_term",
]
[[package]]
name = "lru"
version = "0.12.5"

View File

@ -0,0 +1,14 @@
[package]
name = "example-application-monitoring-with-tenant"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
env_logger.workspace = true
harmony = { version = "0.1.0", path = "../../harmony" }
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
logging = "0.1.0"
tokio.workspace = true
url.workspace = true

View File

@ -0,0 +1,64 @@
use std::{path::PathBuf, sync::Arc};
use harmony::{
data::Id,
inventory::Inventory,
maestro::Maestro,
modules::{
application::{
ApplicationScore, RustWebFramework, RustWebapp,
features::{ContinuousDelivery, Monitoring},
},
monitoring::alert_channel::{
discord_alert_channel::DiscordWebhook, webhook_receiver::WebhookReceiver,
},
tenant::TenantScore,
},
topology::{K8sAnywhereTopology, Url, tenant::TenantConfig},
};
#[tokio::main]
async fn main() {
env_logger::init();
let topology = K8sAnywhereTopology::from_env();
let mut maestro = Maestro::initialize(Inventory::autoload(), topology)
.await
.unwrap();
//TODO there is a bug where the application is deployed into the namespace matching the
//application name and the tenant is created in the namesapce matching the tenant name
//in order for the application to be deployed in the tenant namespace the application.name and
//the TenantConfig.name must match
Review

Personal opinion, but I'm not even sure the end user should have to worry about providing names for the tenant, the application, or any other scores. It would be simpler if there is one "global" name for the whole configuration.

Something like this:

    let tenant = TenantScore {
        config: TenantConfig {
            id: Id::from_str("test-tenant-id"),
            ..Default::default()
        },
    };

    let application = Arc::new(RustWebapp {
        domain: Url::Url(url::Url::parse("https://rustapp.harmony.example.com").unwrap()),
        project_root: PathBuf::from("./examples/rust/webapp"),
        framework: Some(RustWebFramework::Leptos),
    });

    let webhook_receiver = WebhookReceiver {
        url: Url::Url(url::Url::parse("https://webhook-doesnt-exist.com").unwrap()),
    };

    let app = ApplicationScore {
        features: vec![Box::new(Monitoring {
            alert_receiver: vec![Box::new(webhook_receiver)],
            application: application.clone(),
        })],
        application,
    };

    harmony_cli::run(
        "example-monitoring",
        vec![Box::new(tenant), Box::new(app)],
        None
    ).await.unwrap();

That would simplify the setup and we would be able to enforce that the tenant name is the same as the application name behind the scene.

Personal opinion, but I'm not even sure the end user should have to worry about providing names for the tenant, the application, or any other scores. It would be simpler if there is one "global" name for the whole configuration. Something like this: ```rs let tenant = TenantScore { config: TenantConfig { id: Id::from_str("test-tenant-id"), ..Default::default() }, }; let application = Arc::new(RustWebapp { domain: Url::Url(url::Url::parse("https://rustapp.harmony.example.com").unwrap()), project_root: PathBuf::from("./examples/rust/webapp"), framework: Some(RustWebFramework::Leptos), }); let webhook_receiver = WebhookReceiver { url: Url::Url(url::Url::parse("https://webhook-doesnt-exist.com").unwrap()), }; let app = ApplicationScore { features: vec![Box::new(Monitoring { alert_receiver: vec![Box::new(webhook_receiver)], application: application.clone(), })], application, }; harmony_cli::run( "example-monitoring", vec![Box::new(tenant), Box::new(app)], None ).await.unwrap(); ``` That would simplify the setup and we would be able to enforce that the tenant name is the same as the application name behind the scene.
let tenant = TenantScore {
config: TenantConfig {
id: Id::from_str("test-tenant-id"),
name: "example-monitoring".to_string(),
..Default::default()
},
};
let application = Arc::new(RustWebapp {
name: "example-monitoring".to_string(),
domain: Url::Url(url::Url::parse("https://rustapp.harmony.example.com").unwrap()),
project_root: PathBuf::from("./examples/rust/webapp"),
framework: Some(RustWebFramework::Leptos),
});
let webhook_receiver = WebhookReceiver {
name: "sample-webhook-receiver".to_string(),
url: Url::Url(url::Url::parse("https://webhook-doesnt-exist.com").unwrap()),
};
let app = ApplicationScore {
features: vec![Box::new(Monitoring {
application: application.clone(),
alert_receiver: vec![Box::new(webhook_receiver)],
service_monitors: vec![],
alert_rules: vec![],
})],
application,
};
maestro.register_all(vec![Box::new(tenant), Box::new(app)]);
harmony_cli::init(maestro, None).await.unwrap();
}

View File

@ -1,9 +1,10 @@
use std::{process::Command, sync::Arc};
use std::{fs, process::Command, sync::Arc};
use async_trait::async_trait;
use inquire::Confirm;
use log::{debug, info, warn};
use serde::Serialize;
use tempfile::tempdir;
use tokio::sync::OnceCell;
use crate::{
@ -11,20 +12,24 @@ use crate::{
interpret::{InterpretError, Outcome},
inventory::Inventory,
maestro::Maestro,
modules::k3d::K3DInstallationScore,
modules::{
k3d::K3DInstallationScore,
monitoring::kube_prometheus::crd::prometheus_operator::prometheus_operator_helm_chart_score,
},
topology::LocalhostTopology,
};
use super::{
DeploymentTarget, HelmCommand, K8sclient, MultiTargetTopology, Topology,
k8s::K8sClient,
oberservability::monitoring::PrometheusK8sAnywhere,
tenant::{TenantConfig, TenantManager, k8s::K8sTenantManager},
};
#[derive(Clone, Debug)]
struct K8sState {
client: Arc<K8sClient>,
_source: K8sSource,
source: K8sSource,
message: String,
}
@ -58,6 +63,47 @@ impl K8sclient for K8sAnywhereTopology {
}
}
#[async_trait]
impl PrometheusK8sAnywhere for K8sAnywhereTopology {
async fn ensure_prometheus_operator(
&self,
namespace: Option<String>,
) -> Result<Outcome, InterpretError> {
if let Some(Some(k8s_state)) = self.k8s_state.get() {
match k8s_state.source {
K8sSource::LocalK3d => {
debug!("Working on LocalK3d, installing prometheus operator");
let output = Command::new("sh")
.args(["-c", "kubectl get all -A | grep -i kube-prome-operator"])
.output()
.map_err(|e| {
InterpretError::new(format!("could not connect to cluster: {}", e))
})?;
if output.status.success() && !output.stdout.is_empty() {
debug!("Prometheus operator is already present, skipping install");
return Ok(Outcome::noop());
}
self.install_k3d_prometheus_operator(namespace).await?;
Ok(Outcome::success(format!("prometheus operator available")))
}
K8sSource::Kubeconfig => {
//TODO this doesnt feel robust enough to ensure that the operator is indeed
//available
debug!(
"Working outside of LocalK3d topology, skipping install of client prometheus operator"
);
Ok(Outcome::success(format!("prometheus operator available")))
}
}
} else {
Err(InterpretError::new(
"failed to install prometheus operator".to_string(),
))
}
}
}
impl Serialize for K8sAnywhereTopology {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
@ -84,6 +130,20 @@ impl K8sAnywhereTopology {
}
}
async fn install_k3d_prometheus_operator(
&self,
namespace: Option<String>,
) -> Result<Outcome, InterpretError> {
let maestro = Maestro::initialize(Inventory::autoload(), LocalhostTopology::new()).await?;
let tenant = self.get_k8s_tenant_manager().unwrap();
let namespace_name = tenant.get_tenant_config().await;
let namespace = namespace_name
.map(|ns| ns.name.clone())
.unwrap_or_else(|| namespace.unwrap_or_else(|| "default".to_string()));
let score = crate::modules::monitoring::kube_prometheus::crd::prometheus_operator::prometheus_operator_helm_chart_score(namespace);
maestro.interpret(Box::new(score)).await
}
fn is_helm_available(&self) -> Result<(), String> {
let version_result = Command::new("helm")
.arg("version")
@ -134,7 +194,7 @@ impl K8sAnywhereTopology {
Some(client) => {
return Ok(Some(K8sState {
client: Arc::new(client),
_source: K8sSource::Kubeconfig,
source: K8sSource::Kubeconfig,
message: format!("Loaded k8s client from kubeconfig {kubeconfig}"),
}));
}
@ -185,7 +245,7 @@ impl K8sAnywhereTopology {
let state = match k3d.get_client().await {
Ok(client) => K8sState {
client: Arc::new(K8sClient::new(client)),
_source: K8sSource::LocalK3d,
source: K8sSource::LocalK3d,
message: "Successfully installed K3D cluster and acquired client".to_string(),
},
Err(_) => todo!(),

View File

@ -76,3 +76,11 @@ pub trait AlertRule<S: AlertSender>: std::fmt::Debug + Send + Sync {
pub trait ScrapeTarget<S: AlertSender> {
async fn install(&self, sender: &S) -> Result<(), InterpretError>;
}
#[async_trait]
pub trait PrometheusK8sAnywhere {
async fn ensure_prometheus_operator(
&self,
namespace: Option<String>,
) -> Result<Outcome, InterpretError>;
}

View File

@ -231,8 +231,13 @@ impl K8sTenantManager {
{
"to": [
{
//TODO this ip is from the docker network that k3d is running on
//since k3d does not deploy kube-api-server as a pod it needs to ahve the ip
//address opened up
//need to find a way to automatically detect the ip address from the docker
//network
"ipBlock": {
"cidr": "172.23.0.0/16",
"cidr": "172.24.0.0/16",
Review

Selon le contexte (e.g. K3dLocal), introduire une nouvelle implementation K3d/sTenantManager qui permet d'aller chercher dans docker l'adresse ip a definir ici.

Selon le contexte (e.g. K3dLocal), introduire une nouvelle implementation `K3d/sTenantManager` qui permet d'aller chercher dans docker l'adresse ip a definir ici.
}
}
]

View File

@ -1,5 +1,6 @@
use std::sync::Arc;
use crate::modules::application::{ApplicationFeature, OCICompliant};
use crate::modules::monitoring::application_monitoring::crd_application_monitoring_alerting::CRDApplicationAlertingScore;
use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::CRDAlertManagerReceiver;
use crate::modules::monitoring::kube_prometheus::crd::crd_default_rules::build_default_application_rules;
@ -7,12 +8,11 @@ use crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::Rule
use crate::modules::monitoring::kube_prometheus::crd::service_monitor::{
ServiceMonitor, ServiceMonitorSpec,
};
use crate::modules::monitoring::kube_prometheus::types::ServiceMonitorEndpoint;
use crate::topology::oberservability::monitoring::PrometheusK8sAnywhere;
use crate::{
inventory::Inventory,
modules::{
application::{Application, ApplicationFeature, OCICompliant},
monitoring::{alert_channel::webhook_receiver::WebhookReceiver, ntfy::ntfy::NtfyScore},
modules::monitoring::{
alert_channel::webhook_receiver::WebhookReceiver, ntfy::ntfy::NtfyScore,
},
score::Score,
topology::{HelmCommand, K8sclient, Topology, Url, tenant::TenantManager},
@ -31,8 +31,15 @@ pub struct Monitoring {
}
#[async_trait]
impl<T: Topology + HelmCommand + 'static + TenantManager + K8sclient + std::fmt::Debug>
ApplicationFeature<T> for Monitoring
impl<
T: Topology
+ HelmCommand
+ 'static
+ TenantManager
+ K8sclient
+ std::fmt::Debug
+ PrometheusK8sAnywhere,
> ApplicationFeature<T> for Monitoring
{
async fn ensure_installed(&self, topology: &T) -> Result<(), String> {
info!("Ensuring monitoring is available for application");

View File

@ -1,7 +1,6 @@
use std::fs;
use std::{collections::BTreeMap, sync::Arc};
use tempfile::tempdir;
use tokio::io::AsyncWriteExt;
use async_trait::async_trait;
use kube::api::ObjectMeta;
@ -21,6 +20,7 @@ use crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::{
};
use crate::modules::monitoring::kube_prometheus::crd::grafana_default_dashboard::build_default_dashboard;
use crate::modules::monitoring::kube_prometheus::crd::service_monitor::ServiceMonitor;
use crate::topology::oberservability::monitoring::PrometheusK8sAnywhere;
use crate::topology::{K8sclient, Topology, k8s::K8sClient};
use crate::{
data::{Id, Version},
@ -45,7 +45,7 @@ pub struct CRDApplicationAlertingScore {
pub prometheus_rules: Vec<RuleGroup>,
}
impl<T: Topology + K8sclient> Score<T> for CRDApplicationAlertingScore {
impl<T: Topology + K8sclient + PrometheusK8sAnywhere> Score<T> for CRDApplicationAlertingScore {
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
Box::new(CRDApplicationAlertingInterpret {
namespace: self.namespace.clone(),
@ -69,17 +69,22 @@ pub struct CRDApplicationAlertingInterpret {
}
#[async_trait]
impl<T: Topology + K8sclient> Interpret<T> for CRDApplicationAlertingInterpret {
impl<T: Topology + K8sclient + PrometheusK8sAnywhere> Interpret<T>
for CRDApplicationAlertingInterpret
{
async fn execute(
&self,
_inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
let client = topology.k8s_client().await.unwrap();
self.ensure_prometheus_operator().await?;
topology
.ensure_prometheus_operator(Some(self.namespace.clone()))
.await?;
self.ensure_grafana_operator().await?;
self.install_prometheus(&client).await?;
self.install_alert_manager(&client).await?;
self.install_client_kube_metrics().await?;
self.install_grafana(&client).await?;
self.install_receivers(&self.receivers, &client).await?;
self.install_rules(&self.prometheus_rules, &client).await?;
@ -117,26 +122,18 @@ impl CRDApplicationAlertingInterpret {
matches!(output, Ok(o) if o.status.success())
}
async fn ensure_prometheus_operator(&self) -> Result<Outcome, InterpretError> {
if self.crd_exists("prometheuses.monitoring.coreos.com").await {
debug!("Prometheus CRDs already exist — skipping install.");
return Ok(Outcome::success(
"Prometheus CRDs already exist".to_string(),
));
}
async fn install_chart(
&self,
chart_path: String,
chart_name: String,
) -> Result<(), InterpretError> {
let temp_dir =
tempdir().map_err(|e| InterpretError::new(format!("Tempdir error: {}", e)))?;
let temp_path = temp_dir.path().to_path_buf();
debug!("Using temp directory: {}", temp_path.display());
let chart = format!("{}/{}", chart_path, chart_name);
let pull_output = Command::new("helm")
.args(&[
"pull",
"oci://hub.nationtech.io/harmony/nt-prometheus-operator",
"--destination",
temp_path.to_str().unwrap(),
])
.args(&["pull", &chart, "--destination", temp_path.to_str().unwrap()])
.output()
.await
.map_err(|e| InterpretError::new(format!("Helm pull error: {}", e)))?;
@ -167,7 +164,7 @@ impl CRDApplicationAlertingInterpret {
let install_output = Command::new("helm")
.args(&[
"install",
"nt-prometheus-operator",
&chart_name,
tgz_path.to_str().unwrap(),
"--namespace",
&self.namespace,
@ -187,13 +184,10 @@ impl CRDApplicationAlertingInterpret {
}
debug!(
"Installed prometheus operator in namespace: {}",
self.namespace
"Installed chart {}/{} in namespace: {}",
&chart_path, &chart_name, self.namespace
);
Ok(Outcome::success(format!(
"Installed prometheus operator in namespace {}",
self.namespace
)))
Ok(())
}
async fn ensure_grafana_operator(&self) -> Result<Outcome, InterpretError> {
@ -219,7 +213,7 @@ impl CRDApplicationAlertingInterpret {
.await
.unwrap();
let _ = Command::new("helm")
let output = Command::new("helm")
.args(&[
"install",
"grafana-operator",
@ -227,11 +221,21 @@ impl CRDApplicationAlertingInterpret {
"--namespace",
&self.namespace,
"--create-namespace",
"--set",
"namespaceScope=true",
])
.output()
.await
.unwrap();
if !output.status.success() {
return Err(InterpretError::new(format!(
"helm install failed:\nstdout: {}\nstderr: {}",
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
)));
}
Ok(Outcome::success(format!(
"installed grafana operator in ns {}",
self.namespace.clone()
@ -294,10 +298,10 @@ impl CRDApplicationAlertingInterpret {
let prom = Prometheus {
metadata: ObjectMeta {
name: Some(self.namespace.clone()),
labels: Some(std::collections::BTreeMap::from([(
"alertmanagerConfig".to_string(),
"enabled".to_string(),
)])),
labels: Some(std::collections::BTreeMap::from([
("alertmanagerConfig".to_string(), "enabled".to_string()),
("client".to_string(), "prometheus".to_string()),
])),
namespace: Some(self.namespace.clone()),
..Default::default()
},
@ -418,6 +422,18 @@ impl CRDApplicationAlertingInterpret {
)))
}
async fn install_client_kube_metrics(&self) -> Result<Outcome, InterpretError> {
self.install_chart(
"oci://hub.nationtech.io/harmony".to_string(),
"nt-kube-metrics".to_string(),
)
.await?;
Ok(Outcome::success(format!(
"Installed client kube metrics in ns {}",
&self.namespace
)))
}
async fn install_grafana(&self, client: &Arc<K8sClient>) -> Result<Outcome, InterpretError> {
let mut label = BTreeMap::new();
label.insert("dashboards".to_string(), "grafana".to_string());

View File

@ -8,10 +8,8 @@ pub fn grafana_operator_helm_chart_score(ns: String) -> HelmChartScore {
HelmChartScore {
namespace: Some(NonBlankString::from_str(&ns).unwrap()),
release_name: NonBlankString::from_str("grafana_operator").unwrap(),
chart_name: NonBlankString::from_str(
"grafana-operator oci://ghcr.io/grafana/helm-charts/grafana-operator",
)
.unwrap(),
chart_name: NonBlankString::from_str("oci://ghcr.io/grafana/helm-charts/grafana-operator")
.unwrap(),
chart_version: None,
values_overrides: None,
values_yaml: None,

View File

@ -9,7 +9,7 @@ pub fn prometheus_operator_helm_chart_score(ns: String) -> HelmChartScore {
namespace: Some(NonBlankString::from_str(&ns).unwrap()),
release_name: NonBlankString::from_str("prometheus-operator").unwrap(),
chart_name: NonBlankString::from_str(
"grafana-operator oci://ghcr.io/grafana/helm-charts/grafana-operator",
"oci://hub.nationtech.io/harmony/nt-prometheus-operator",
)
.unwrap(),
chart_version: None,