fix: prometheus operator and grafana operator deploy application namespace on local k3d

if kube-prometheus-operator is present installation of prometheus-operator will skip
outside of local k3d installation installation of operator is skipped
This commit is contained in:
Willem 2025-07-28 15:15:10 -04:00
parent b9e208f4cf
commit b56a30de3c
10 changed files with 247 additions and 48 deletions

27
Cargo.lock generated
View File

@ -96,6 +96,12 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "ansi_term"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b3568b48b7cefa6b8ce125f9bb4989e52fbcc29ebea88df04cc7c5f12f70455"
[[package]] [[package]]
name = "anstream" name = "anstream"
version = "0.6.19" version = "0.6.19"
@ -1240,6 +1246,18 @@ dependencies = [
name = "example" name = "example"
version = "0.0.0" version = "0.0.0"
[[package]]
name = "example-application-monitoring-with-tenant"
version = "0.1.0"
dependencies = [
"env_logger",
"harmony",
"harmony_cli",
"logging",
"tokio",
"url",
]
[[package]] [[package]]
name = "example-cli" name = "example-cli"
version = "0.1.0" version = "0.1.0"
@ -2808,6 +2826,15 @@ dependencies = [
"log", "log",
] ]
[[package]]
name = "logging"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "461a8beca676e8ab1bd468c92e9b4436d6368e11e96ae038209e520cfe665e46"
dependencies = [
"ansi_term",
]
[[package]] [[package]]
name = "lru" name = "lru"
version = "0.12.5" version = "0.12.5"

View File

@ -0,0 +1,14 @@
[package]
name = "example-application-monitoring-with-tenant"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
env_logger.workspace = true
harmony = { version = "0.1.0", path = "../../harmony" }
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
logging = "0.1.0"
tokio.workspace = true
url.workspace = true

View File

@ -0,0 +1,64 @@
use std::{path::PathBuf, sync::Arc};
use harmony::{
data::Id,
inventory::Inventory,
maestro::Maestro,
modules::{
application::{
ApplicationScore, RustWebFramework, RustWebapp,
features::{ContinuousDelivery, Monitoring},
},
monitoring::alert_channel::{
discord_alert_channel::DiscordWebhook, webhook_receiver::WebhookReceiver,
},
tenant::TenantScore,
},
topology::{K8sAnywhereTopology, Url, tenant::TenantConfig},
};
#[tokio::main]
async fn main() {
env_logger::init();
let topology = K8sAnywhereTopology::from_env();
let mut maestro = Maestro::initialize(Inventory::autoload(), topology)
.await
.unwrap();
//TODO there is a bug where the application is deployed into the namespace matching the
//application name and the tenant is created in the namesapce matching the tenant name
//in order for the application to be deployed in the tenant namespace the application.name and
//the TenantConfig.name must match
let tenant = TenantScore {
config: TenantConfig {
id: Id::from_str("test-tenant-id"),
name: "example-monitoring".to_string(),
..Default::default()
},
};
let application = Arc::new(RustWebapp {
name: "example-monitoring".to_string(),
domain: Url::Url(url::Url::parse("https://rustapp.harmony.example.com").unwrap()),
project_root: PathBuf::from("./examples/rust/webapp"),
framework: Some(RustWebFramework::Leptos),
});
let webhook_receiver = WebhookReceiver {
name: "sample-webhook-receiver".to_string(),
url: Url::Url(url::Url::parse("https://webhook-doesnt-exist.com").unwrap()),
};
let app = ApplicationScore {
features: vec![Box::new(Monitoring {
application: application.clone(),
alert_receiver: vec![Box::new(webhook_receiver)],
service_monitors: vec![],
alert_rules: vec![],
})],
application,
};
maestro.register_all(vec![Box::new(tenant), Box::new(app)]);
harmony_cli::init(maestro, None).await.unwrap();
}

View File

@ -1,9 +1,10 @@
use std::{process::Command, sync::Arc}; use std::{fs, process::Command, sync::Arc};
use async_trait::async_trait; use async_trait::async_trait;
use inquire::Confirm; use inquire::Confirm;
use log::{debug, info, warn}; use log::{debug, info, warn};
use serde::Serialize; use serde::Serialize;
use tempfile::tempdir;
use tokio::sync::OnceCell; use tokio::sync::OnceCell;
use crate::{ use crate::{
@ -11,20 +12,24 @@ use crate::{
interpret::{InterpretError, Outcome}, interpret::{InterpretError, Outcome},
inventory::Inventory, inventory::Inventory,
maestro::Maestro, maestro::Maestro,
modules::k3d::K3DInstallationScore, modules::{
k3d::K3DInstallationScore,
monitoring::kube_prometheus::crd::prometheus_operator::prometheus_operator_helm_chart_score,
},
topology::LocalhostTopology, topology::LocalhostTopology,
}; };
use super::{ use super::{
DeploymentTarget, HelmCommand, K8sclient, MultiTargetTopology, Topology, DeploymentTarget, HelmCommand, K8sclient, MultiTargetTopology, Topology,
k8s::K8sClient, k8s::K8sClient,
oberservability::monitoring::PrometheusK8sAnywhere,
tenant::{TenantConfig, TenantManager, k8s::K8sTenantManager}, tenant::{TenantConfig, TenantManager, k8s::K8sTenantManager},
}; };
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
struct K8sState { struct K8sState {
client: Arc<K8sClient>, client: Arc<K8sClient>,
_source: K8sSource, source: K8sSource,
message: String, message: String,
} }
@ -58,6 +63,47 @@ impl K8sclient for K8sAnywhereTopology {
} }
} }
#[async_trait]
impl PrometheusK8sAnywhere for K8sAnywhereTopology {
async fn ensure_prometheus_operator(
&self,
namespace: Option<String>,
) -> Result<Outcome, InterpretError> {
if let Some(Some(k8s_state)) = self.k8s_state.get() {
match k8s_state.source {
K8sSource::LocalK3d => {
debug!("Working on LocalK3d, installing prometheus operator");
let output = Command::new("sh")
.args(["-c", "kubectl get all -A | grep -i kube-prome-operator"])
.output()
.map_err(|e| {
InterpretError::new(format!("could not connect to cluster: {}", e))
})?;
if output.status.success() && !output.stdout.is_empty() {
debug!("Prometheus operator is already present, skipping install");
return Ok(Outcome::noop());
}
self.install_k3d_prometheus_operator(namespace).await?;
Ok(Outcome::success(format!("prometheus operator available")))
}
K8sSource::Kubeconfig => {
//TODO this doesnt feel robust enough to ensure that the operator is indeed
//available
debug!(
"Working outside of LocalK3d topology, skipping install of client prometheus operator"
);
Ok(Outcome::success(format!("prometheus operator available")))
}
}
} else {
Err(InterpretError::new(
"failed to install prometheus operator".to_string(),
))
}
}
}
impl Serialize for K8sAnywhereTopology { impl Serialize for K8sAnywhereTopology {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where where
@ -84,6 +130,20 @@ impl K8sAnywhereTopology {
} }
} }
async fn install_k3d_prometheus_operator(
&self,
namespace: Option<String>,
) -> Result<Outcome, InterpretError> {
let maestro = Maestro::initialize(Inventory::autoload(), LocalhostTopology::new()).await?;
let tenant = self.get_k8s_tenant_manager().unwrap();
let namespace_name = tenant.get_tenant_config().await;
let namespace = namespace_name
.map(|ns| ns.name.clone())
.unwrap_or_else(|| namespace.unwrap_or_else(|| "default".to_string()));
let score = crate::modules::monitoring::kube_prometheus::crd::prometheus_operator::prometheus_operator_helm_chart_score(namespace);
maestro.interpret(Box::new(score)).await
}
fn is_helm_available(&self) -> Result<(), String> { fn is_helm_available(&self) -> Result<(), String> {
let version_result = Command::new("helm") let version_result = Command::new("helm")
.arg("version") .arg("version")
@ -134,7 +194,7 @@ impl K8sAnywhereTopology {
Some(client) => { Some(client) => {
return Ok(Some(K8sState { return Ok(Some(K8sState {
client: Arc::new(client), client: Arc::new(client),
_source: K8sSource::Kubeconfig, source: K8sSource::Kubeconfig,
message: format!("Loaded k8s client from kubeconfig {kubeconfig}"), message: format!("Loaded k8s client from kubeconfig {kubeconfig}"),
})); }));
} }
@ -185,7 +245,7 @@ impl K8sAnywhereTopology {
let state = match k3d.get_client().await { let state = match k3d.get_client().await {
Ok(client) => K8sState { Ok(client) => K8sState {
client: Arc::new(K8sClient::new(client)), client: Arc::new(K8sClient::new(client)),
_source: K8sSource::LocalK3d, source: K8sSource::LocalK3d,
message: "Successfully installed K3D cluster and acquired client".to_string(), message: "Successfully installed K3D cluster and acquired client".to_string(),
}, },
Err(_) => todo!(), Err(_) => todo!(),

View File

@ -76,3 +76,11 @@ pub trait AlertRule<S: AlertSender>: std::fmt::Debug + Send + Sync {
pub trait ScrapeTarget<S: AlertSender> { pub trait ScrapeTarget<S: AlertSender> {
async fn install(&self, sender: &S) -> Result<(), InterpretError>; async fn install(&self, sender: &S) -> Result<(), InterpretError>;
} }
#[async_trait]
pub trait PrometheusK8sAnywhere {
async fn ensure_prometheus_operator(
&self,
namespace: Option<String>,
) -> Result<Outcome, InterpretError>;
}

View File

@ -231,8 +231,13 @@ impl K8sTenantManager {
{ {
"to": [ "to": [
{ {
//TODO this ip is from the docker network that k3d is running on
//since k3d does not deploy kube-api-server as a pod it needs to ahve the ip
//address opened up
//need to find a way to automatically detect the ip address from the docker
//network
"ipBlock": { "ipBlock": {
"cidr": "172.23.0.0/16", "cidr": "172.24.0.0/16",
} }
} }
] ]

View File

@ -1,5 +1,6 @@
use std::sync::Arc; use std::sync::Arc;
use crate::modules::application::{ApplicationFeature, OCICompliant};
use crate::modules::monitoring::application_monitoring::crd_application_monitoring_alerting::CRDApplicationAlertingScore; use crate::modules::monitoring::application_monitoring::crd_application_monitoring_alerting::CRDApplicationAlertingScore;
use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::CRDAlertManagerReceiver; use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::CRDAlertManagerReceiver;
use crate::modules::monitoring::kube_prometheus::crd::crd_default_rules::build_default_application_rules; use crate::modules::monitoring::kube_prometheus::crd::crd_default_rules::build_default_application_rules;
@ -7,12 +8,11 @@ use crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::Rule
use crate::modules::monitoring::kube_prometheus::crd::service_monitor::{ use crate::modules::monitoring::kube_prometheus::crd::service_monitor::{
ServiceMonitor, ServiceMonitorSpec, ServiceMonitor, ServiceMonitorSpec,
}; };
use crate::modules::monitoring::kube_prometheus::types::ServiceMonitorEndpoint; use crate::topology::oberservability::monitoring::PrometheusK8sAnywhere;
use crate::{ use crate::{
inventory::Inventory, inventory::Inventory,
modules::{ modules::monitoring::{
application::{Application, ApplicationFeature, OCICompliant}, alert_channel::webhook_receiver::WebhookReceiver, ntfy::ntfy::NtfyScore,
monitoring::{alert_channel::webhook_receiver::WebhookReceiver, ntfy::ntfy::NtfyScore},
}, },
score::Score, score::Score,
topology::{HelmCommand, K8sclient, Topology, Url, tenant::TenantManager}, topology::{HelmCommand, K8sclient, Topology, Url, tenant::TenantManager},
@ -31,8 +31,15 @@ pub struct Monitoring {
} }
#[async_trait] #[async_trait]
impl<T: Topology + HelmCommand + 'static + TenantManager + K8sclient + std::fmt::Debug> impl<
ApplicationFeature<T> for Monitoring T: Topology
+ HelmCommand
+ 'static
+ TenantManager
+ K8sclient
+ std::fmt::Debug
+ PrometheusK8sAnywhere,
> ApplicationFeature<T> for Monitoring
{ {
async fn ensure_installed(&self, topology: &T) -> Result<(), String> { async fn ensure_installed(&self, topology: &T) -> Result<(), String> {
info!("Ensuring monitoring is available for application"); info!("Ensuring monitoring is available for application");

View File

@ -1,7 +1,6 @@
use std::fs; use std::fs;
use std::{collections::BTreeMap, sync::Arc}; use std::{collections::BTreeMap, sync::Arc};
use tempfile::tempdir; use tempfile::tempdir;
use tokio::io::AsyncWriteExt;
use async_trait::async_trait; use async_trait::async_trait;
use kube::api::ObjectMeta; use kube::api::ObjectMeta;
@ -21,6 +20,7 @@ use crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::{
}; };
use crate::modules::monitoring::kube_prometheus::crd::grafana_default_dashboard::build_default_dashboard; use crate::modules::monitoring::kube_prometheus::crd::grafana_default_dashboard::build_default_dashboard;
use crate::modules::monitoring::kube_prometheus::crd::service_monitor::ServiceMonitor; use crate::modules::monitoring::kube_prometheus::crd::service_monitor::ServiceMonitor;
use crate::topology::oberservability::monitoring::PrometheusK8sAnywhere;
use crate::topology::{K8sclient, Topology, k8s::K8sClient}; use crate::topology::{K8sclient, Topology, k8s::K8sClient};
use crate::{ use crate::{
data::{Id, Version}, data::{Id, Version},
@ -45,7 +45,7 @@ pub struct CRDApplicationAlertingScore {
pub prometheus_rules: Vec<RuleGroup>, pub prometheus_rules: Vec<RuleGroup>,
} }
impl<T: Topology + K8sclient> Score<T> for CRDApplicationAlertingScore { impl<T: Topology + K8sclient + PrometheusK8sAnywhere> Score<T> for CRDApplicationAlertingScore {
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> { fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
Box::new(CRDApplicationAlertingInterpret { Box::new(CRDApplicationAlertingInterpret {
namespace: self.namespace.clone(), namespace: self.namespace.clone(),
@ -69,17 +69,22 @@ pub struct CRDApplicationAlertingInterpret {
} }
#[async_trait] #[async_trait]
impl<T: Topology + K8sclient> Interpret<T> for CRDApplicationAlertingInterpret { impl<T: Topology + K8sclient + PrometheusK8sAnywhere> Interpret<T>
for CRDApplicationAlertingInterpret
{
async fn execute( async fn execute(
&self, &self,
_inventory: &Inventory, _inventory: &Inventory,
topology: &T, topology: &T,
) -> Result<Outcome, InterpretError> { ) -> Result<Outcome, InterpretError> {
let client = topology.k8s_client().await.unwrap(); let client = topology.k8s_client().await.unwrap();
self.ensure_prometheus_operator().await?; topology
.ensure_prometheus_operator(Some(self.namespace.clone()))
.await?;
self.ensure_grafana_operator().await?; self.ensure_grafana_operator().await?;
self.install_prometheus(&client).await?; self.install_prometheus(&client).await?;
self.install_alert_manager(&client).await?; self.install_alert_manager(&client).await?;
self.install_client_kube_metrics().await?;
self.install_grafana(&client).await?; self.install_grafana(&client).await?;
self.install_receivers(&self.receivers, &client).await?; self.install_receivers(&self.receivers, &client).await?;
self.install_rules(&self.prometheus_rules, &client).await?; self.install_rules(&self.prometheus_rules, &client).await?;
@ -117,26 +122,18 @@ impl CRDApplicationAlertingInterpret {
matches!(output, Ok(o) if o.status.success()) matches!(output, Ok(o) if o.status.success())
} }
async fn ensure_prometheus_operator(&self) -> Result<Outcome, InterpretError> { async fn install_chart(
if self.crd_exists("prometheuses.monitoring.coreos.com").await { &self,
debug!("Prometheus CRDs already exist — skipping install."); chart_path: String,
return Ok(Outcome::success( chart_name: String,
"Prometheus CRDs already exist".to_string(), ) -> Result<(), InterpretError> {
));
}
let temp_dir = let temp_dir =
tempdir().map_err(|e| InterpretError::new(format!("Tempdir error: {}", e)))?; tempdir().map_err(|e| InterpretError::new(format!("Tempdir error: {}", e)))?;
let temp_path = temp_dir.path().to_path_buf(); let temp_path = temp_dir.path().to_path_buf();
debug!("Using temp directory: {}", temp_path.display()); debug!("Using temp directory: {}", temp_path.display());
let chart = format!("{}/{}", chart_path, chart_name);
let pull_output = Command::new("helm") let pull_output = Command::new("helm")
.args(&[ .args(&["pull", &chart, "--destination", temp_path.to_str().unwrap()])
"pull",
"oci://hub.nationtech.io/harmony/nt-prometheus-operator",
"--destination",
temp_path.to_str().unwrap(),
])
.output() .output()
.await .await
.map_err(|e| InterpretError::new(format!("Helm pull error: {}", e)))?; .map_err(|e| InterpretError::new(format!("Helm pull error: {}", e)))?;
@ -167,7 +164,7 @@ impl CRDApplicationAlertingInterpret {
let install_output = Command::new("helm") let install_output = Command::new("helm")
.args(&[ .args(&[
"install", "install",
"nt-prometheus-operator", &chart_name,
tgz_path.to_str().unwrap(), tgz_path.to_str().unwrap(),
"--namespace", "--namespace",
&self.namespace, &self.namespace,
@ -187,13 +184,10 @@ impl CRDApplicationAlertingInterpret {
} }
debug!( debug!(
"Installed prometheus operator in namespace: {}", "Installed chart {}/{} in namespace: {}",
self.namespace &chart_path, &chart_name, self.namespace
); );
Ok(Outcome::success(format!( Ok(())
"Installed prometheus operator in namespace {}",
self.namespace
)))
} }
async fn ensure_grafana_operator(&self) -> Result<Outcome, InterpretError> { async fn ensure_grafana_operator(&self) -> Result<Outcome, InterpretError> {
@ -219,7 +213,7 @@ impl CRDApplicationAlertingInterpret {
.await .await
.unwrap(); .unwrap();
let _ = Command::new("helm") let output = Command::new("helm")
.args(&[ .args(&[
"install", "install",
"grafana-operator", "grafana-operator",
@ -227,11 +221,21 @@ impl CRDApplicationAlertingInterpret {
"--namespace", "--namespace",
&self.namespace, &self.namespace,
"--create-namespace", "--create-namespace",
"--set",
"namespaceScope=true",
]) ])
.output() .output()
.await .await
.unwrap(); .unwrap();
if !output.status.success() {
return Err(InterpretError::new(format!(
"helm install failed:\nstdout: {}\nstderr: {}",
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
)));
}
Ok(Outcome::success(format!( Ok(Outcome::success(format!(
"installed grafana operator in ns {}", "installed grafana operator in ns {}",
self.namespace.clone() self.namespace.clone()
@ -294,10 +298,10 @@ impl CRDApplicationAlertingInterpret {
let prom = Prometheus { let prom = Prometheus {
metadata: ObjectMeta { metadata: ObjectMeta {
name: Some(self.namespace.clone()), name: Some(self.namespace.clone()),
labels: Some(std::collections::BTreeMap::from([( labels: Some(std::collections::BTreeMap::from([
"alertmanagerConfig".to_string(), ("alertmanagerConfig".to_string(), "enabled".to_string()),
"enabled".to_string(), ("client".to_string(), "prometheus".to_string()),
)])), ])),
namespace: Some(self.namespace.clone()), namespace: Some(self.namespace.clone()),
..Default::default() ..Default::default()
}, },
@ -418,6 +422,18 @@ impl CRDApplicationAlertingInterpret {
))) )))
} }
async fn install_client_kube_metrics(&self) -> Result<Outcome, InterpretError> {
self.install_chart(
"oci://hub.nationtech.io/harmony".to_string(),
"nt-kube-metrics".to_string(),
)
.await?;
Ok(Outcome::success(format!(
"Installed client kube metrics in ns {}",
&self.namespace
)))
}
async fn install_grafana(&self, client: &Arc<K8sClient>) -> Result<Outcome, InterpretError> { async fn install_grafana(&self, client: &Arc<K8sClient>) -> Result<Outcome, InterpretError> {
let mut label = BTreeMap::new(); let mut label = BTreeMap::new();
label.insert("dashboards".to_string(), "grafana".to_string()); label.insert("dashboards".to_string(), "grafana".to_string());

View File

@ -8,10 +8,8 @@ pub fn grafana_operator_helm_chart_score(ns: String) -> HelmChartScore {
HelmChartScore { HelmChartScore {
namespace: Some(NonBlankString::from_str(&ns).unwrap()), namespace: Some(NonBlankString::from_str(&ns).unwrap()),
release_name: NonBlankString::from_str("grafana_operator").unwrap(), release_name: NonBlankString::from_str("grafana_operator").unwrap(),
chart_name: NonBlankString::from_str( chart_name: NonBlankString::from_str("oci://ghcr.io/grafana/helm-charts/grafana-operator")
"grafana-operator oci://ghcr.io/grafana/helm-charts/grafana-operator", .unwrap(),
)
.unwrap(),
chart_version: None, chart_version: None,
values_overrides: None, values_overrides: None,
values_yaml: None, values_yaml: None,

View File

@ -9,7 +9,7 @@ pub fn prometheus_operator_helm_chart_score(ns: String) -> HelmChartScore {
namespace: Some(NonBlankString::from_str(&ns).unwrap()), namespace: Some(NonBlankString::from_str(&ns).unwrap()),
release_name: NonBlankString::from_str("prometheus-operator").unwrap(), release_name: NonBlankString::from_str("prometheus-operator").unwrap(),
chart_name: NonBlankString::from_str( chart_name: NonBlankString::from_str(
"grafana-operator oci://ghcr.io/grafana/helm-charts/grafana-operator", "oci://hub.nationtech.io/harmony/nt-prometheus-operator",
) )
.unwrap(), .unwrap(),
chart_version: None, chart_version: None,