fix: prometheus operator and grafana operator deploy application namespace on local k3d
Some checks failed
Run Check Script / check (pull_request) Failing after -1m5s

if kube-prometheus-operator is present installation of prometheus-operator will skip
outside of local k3d installation installation of operator is skipped
This commit is contained in:
Willem 2025-07-28 15:15:10 -04:00
parent b9e208f4cf
commit b56a30de3c
10 changed files with 247 additions and 48 deletions

27
Cargo.lock generated
View File

@ -96,6 +96,12 @@ dependencies = [
"libc",
]
[[package]]
name = "ansi_term"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b3568b48b7cefa6b8ce125f9bb4989e52fbcc29ebea88df04cc7c5f12f70455"
[[package]]
name = "anstream"
version = "0.6.19"
@ -1240,6 +1246,18 @@ dependencies = [
name = "example"
version = "0.0.0"
[[package]]
name = "example-application-monitoring-with-tenant"
version = "0.1.0"
dependencies = [
"env_logger",
"harmony",
"harmony_cli",
"logging",
"tokio",
"url",
]
[[package]]
name = "example-cli"
version = "0.1.0"
@ -2808,6 +2826,15 @@ dependencies = [
"log",
]
[[package]]
name = "logging"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "461a8beca676e8ab1bd468c92e9b4436d6368e11e96ae038209e520cfe665e46"
dependencies = [
"ansi_term",
]
[[package]]
name = "lru"
version = "0.12.5"

View File

@ -0,0 +1,14 @@
[package]
name = "example-application-monitoring-with-tenant"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
env_logger.workspace = true
harmony = { version = "0.1.0", path = "../../harmony" }
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
logging = "0.1.0"
tokio.workspace = true
url.workspace = true

View File

@ -0,0 +1,64 @@
use std::{path::PathBuf, sync::Arc};
use harmony::{
data::Id,
inventory::Inventory,
maestro::Maestro,
modules::{
application::{
ApplicationScore, RustWebFramework, RustWebapp,
features::{ContinuousDelivery, Monitoring},
},
monitoring::alert_channel::{
discord_alert_channel::DiscordWebhook, webhook_receiver::WebhookReceiver,
},
tenant::TenantScore,
},
topology::{K8sAnywhereTopology, Url, tenant::TenantConfig},
};
#[tokio::main]
async fn main() {
env_logger::init();
let topology = K8sAnywhereTopology::from_env();
let mut maestro = Maestro::initialize(Inventory::autoload(), topology)
.await
.unwrap();
//TODO there is a bug where the application is deployed into the namespace matching the
//application name and the tenant is created in the namesapce matching the tenant name
//in order for the application to be deployed in the tenant namespace the application.name and
//the TenantConfig.name must match
let tenant = TenantScore {
config: TenantConfig {
id: Id::from_str("test-tenant-id"),
name: "example-monitoring".to_string(),
..Default::default()
},
};
let application = Arc::new(RustWebapp {
name: "example-monitoring".to_string(),
domain: Url::Url(url::Url::parse("https://rustapp.harmony.example.com").unwrap()),
project_root: PathBuf::from("./examples/rust/webapp"),
framework: Some(RustWebFramework::Leptos),
});
let webhook_receiver = WebhookReceiver {
name: "sample-webhook-receiver".to_string(),
url: Url::Url(url::Url::parse("https://webhook-doesnt-exist.com").unwrap()),
};
let app = ApplicationScore {
features: vec![Box::new(Monitoring {
application: application.clone(),
alert_receiver: vec![Box::new(webhook_receiver)],
service_monitors: vec![],
alert_rules: vec![],
})],
application,
};
maestro.register_all(vec![Box::new(tenant), Box::new(app)]);
harmony_cli::init(maestro, None).await.unwrap();
}

View File

@ -1,9 +1,10 @@
use std::{process::Command, sync::Arc};
use std::{fs, process::Command, sync::Arc};
use async_trait::async_trait;
use inquire::Confirm;
use log::{debug, info, warn};
use serde::Serialize;
use tempfile::tempdir;
use tokio::sync::OnceCell;
use crate::{
@ -11,20 +12,24 @@ use crate::{
interpret::{InterpretError, Outcome},
inventory::Inventory,
maestro::Maestro,
modules::k3d::K3DInstallationScore,
modules::{
k3d::K3DInstallationScore,
monitoring::kube_prometheus::crd::prometheus_operator::prometheus_operator_helm_chart_score,
},
topology::LocalhostTopology,
};
use super::{
DeploymentTarget, HelmCommand, K8sclient, MultiTargetTopology, Topology,
k8s::K8sClient,
oberservability::monitoring::PrometheusK8sAnywhere,
tenant::{TenantConfig, TenantManager, k8s::K8sTenantManager},
};
#[derive(Clone, Debug)]
struct K8sState {
client: Arc<K8sClient>,
_source: K8sSource,
source: K8sSource,
message: String,
}
@ -58,6 +63,47 @@ impl K8sclient for K8sAnywhereTopology {
}
}
#[async_trait]
impl PrometheusK8sAnywhere for K8sAnywhereTopology {
async fn ensure_prometheus_operator(
&self,
namespace: Option<String>,
) -> Result<Outcome, InterpretError> {
if let Some(Some(k8s_state)) = self.k8s_state.get() {
match k8s_state.source {
K8sSource::LocalK3d => {
debug!("Working on LocalK3d, installing prometheus operator");
let output = Command::new("sh")
.args(["-c", "kubectl get all -A | grep -i kube-prome-operator"])
.output()
.map_err(|e| {
InterpretError::new(format!("could not connect to cluster: {}", e))
})?;
if output.status.success() && !output.stdout.is_empty() {
debug!("Prometheus operator is already present, skipping install");
return Ok(Outcome::noop());
}
self.install_k3d_prometheus_operator(namespace).await?;
Ok(Outcome::success(format!("prometheus operator available")))
}
K8sSource::Kubeconfig => {
//TODO this doesnt feel robust enough to ensure that the operator is indeed
//available
debug!(
"Working outside of LocalK3d topology, skipping install of client prometheus operator"
);
Ok(Outcome::success(format!("prometheus operator available")))
}
}
} else {
Err(InterpretError::new(
"failed to install prometheus operator".to_string(),
))
}
}
}
impl Serialize for K8sAnywhereTopology {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
@ -84,6 +130,20 @@ impl K8sAnywhereTopology {
}
}
async fn install_k3d_prometheus_operator(
&self,
namespace: Option<String>,
) -> Result<Outcome, InterpretError> {
let maestro = Maestro::initialize(Inventory::autoload(), LocalhostTopology::new()).await?;
let tenant = self.get_k8s_tenant_manager().unwrap();
let namespace_name = tenant.get_tenant_config().await;
let namespace = namespace_name
.map(|ns| ns.name.clone())
.unwrap_or_else(|| namespace.unwrap_or_else(|| "default".to_string()));
let score = crate::modules::monitoring::kube_prometheus::crd::prometheus_operator::prometheus_operator_helm_chart_score(namespace);
maestro.interpret(Box::new(score)).await
}
fn is_helm_available(&self) -> Result<(), String> {
let version_result = Command::new("helm")
.arg("version")
@ -134,7 +194,7 @@ impl K8sAnywhereTopology {
Some(client) => {
return Ok(Some(K8sState {
client: Arc::new(client),
_source: K8sSource::Kubeconfig,
source: K8sSource::Kubeconfig,
message: format!("Loaded k8s client from kubeconfig {kubeconfig}"),
}));
}
@ -185,7 +245,7 @@ impl K8sAnywhereTopology {
let state = match k3d.get_client().await {
Ok(client) => K8sState {
client: Arc::new(K8sClient::new(client)),
_source: K8sSource::LocalK3d,
source: K8sSource::LocalK3d,
message: "Successfully installed K3D cluster and acquired client".to_string(),
},
Err(_) => todo!(),

View File

@ -76,3 +76,11 @@ pub trait AlertRule<S: AlertSender>: std::fmt::Debug + Send + Sync {
pub trait ScrapeTarget<S: AlertSender> {
async fn install(&self, sender: &S) -> Result<(), InterpretError>;
}
#[async_trait]
pub trait PrometheusK8sAnywhere {
async fn ensure_prometheus_operator(
&self,
namespace: Option<String>,
) -> Result<Outcome, InterpretError>;
}

View File

@ -231,8 +231,13 @@ impl K8sTenantManager {
{
"to": [
{
//TODO this ip is from the docker network that k3d is running on
//since k3d does not deploy kube-api-server as a pod it needs to ahve the ip
//address opened up
//need to find a way to automatically detect the ip address from the docker
//network
"ipBlock": {
"cidr": "172.23.0.0/16",
"cidr": "172.24.0.0/16",
}
}
]

View File

@ -1,5 +1,6 @@
use std::sync::Arc;
use crate::modules::application::{ApplicationFeature, OCICompliant};
use crate::modules::monitoring::application_monitoring::crd_application_monitoring_alerting::CRDApplicationAlertingScore;
use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::CRDAlertManagerReceiver;
use crate::modules::monitoring::kube_prometheus::crd::crd_default_rules::build_default_application_rules;
@ -7,12 +8,11 @@ use crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::Rule
use crate::modules::monitoring::kube_prometheus::crd::service_monitor::{
ServiceMonitor, ServiceMonitorSpec,
};
use crate::modules::monitoring::kube_prometheus::types::ServiceMonitorEndpoint;
use crate::topology::oberservability::monitoring::PrometheusK8sAnywhere;
use crate::{
inventory::Inventory,
modules::{
application::{Application, ApplicationFeature, OCICompliant},
monitoring::{alert_channel::webhook_receiver::WebhookReceiver, ntfy::ntfy::NtfyScore},
modules::monitoring::{
alert_channel::webhook_receiver::WebhookReceiver, ntfy::ntfy::NtfyScore,
},
score::Score,
topology::{HelmCommand, K8sclient, Topology, Url, tenant::TenantManager},
@ -31,8 +31,15 @@ pub struct Monitoring {
}
#[async_trait]
impl<T: Topology + HelmCommand + 'static + TenantManager + K8sclient + std::fmt::Debug>
ApplicationFeature<T> for Monitoring
impl<
T: Topology
+ HelmCommand
+ 'static
+ TenantManager
+ K8sclient
+ std::fmt::Debug
+ PrometheusK8sAnywhere,
> ApplicationFeature<T> for Monitoring
{
async fn ensure_installed(&self, topology: &T) -> Result<(), String> {
info!("Ensuring monitoring is available for application");

View File

@ -1,7 +1,6 @@
use std::fs;
use std::{collections::BTreeMap, sync::Arc};
use tempfile::tempdir;
use tokio::io::AsyncWriteExt;
use async_trait::async_trait;
use kube::api::ObjectMeta;
@ -21,6 +20,7 @@ use crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::{
};
use crate::modules::monitoring::kube_prometheus::crd::grafana_default_dashboard::build_default_dashboard;
use crate::modules::monitoring::kube_prometheus::crd::service_monitor::ServiceMonitor;
use crate::topology::oberservability::monitoring::PrometheusK8sAnywhere;
use crate::topology::{K8sclient, Topology, k8s::K8sClient};
use crate::{
data::{Id, Version},
@ -45,7 +45,7 @@ pub struct CRDApplicationAlertingScore {
pub prometheus_rules: Vec<RuleGroup>,
}
impl<T: Topology + K8sclient> Score<T> for CRDApplicationAlertingScore {
impl<T: Topology + K8sclient + PrometheusK8sAnywhere> Score<T> for CRDApplicationAlertingScore {
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
Box::new(CRDApplicationAlertingInterpret {
namespace: self.namespace.clone(),
@ -69,17 +69,22 @@ pub struct CRDApplicationAlertingInterpret {
}
#[async_trait]
impl<T: Topology + K8sclient> Interpret<T> for CRDApplicationAlertingInterpret {
impl<T: Topology + K8sclient + PrometheusK8sAnywhere> Interpret<T>
for CRDApplicationAlertingInterpret
{
async fn execute(
&self,
_inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
let client = topology.k8s_client().await.unwrap();
self.ensure_prometheus_operator().await?;
topology
.ensure_prometheus_operator(Some(self.namespace.clone()))
.await?;
self.ensure_grafana_operator().await?;
self.install_prometheus(&client).await?;
self.install_alert_manager(&client).await?;
self.install_client_kube_metrics().await?;
self.install_grafana(&client).await?;
self.install_receivers(&self.receivers, &client).await?;
self.install_rules(&self.prometheus_rules, &client).await?;
@ -117,26 +122,18 @@ impl CRDApplicationAlertingInterpret {
matches!(output, Ok(o) if o.status.success())
}
async fn ensure_prometheus_operator(&self) -> Result<Outcome, InterpretError> {
if self.crd_exists("prometheuses.monitoring.coreos.com").await {
debug!("Prometheus CRDs already exist — skipping install.");
return Ok(Outcome::success(
"Prometheus CRDs already exist".to_string(),
));
}
async fn install_chart(
&self,
chart_path: String,
chart_name: String,
) -> Result<(), InterpretError> {
let temp_dir =
tempdir().map_err(|e| InterpretError::new(format!("Tempdir error: {}", e)))?;
let temp_path = temp_dir.path().to_path_buf();
debug!("Using temp directory: {}", temp_path.display());
let chart = format!("{}/{}", chart_path, chart_name);
let pull_output = Command::new("helm")
.args(&[
"pull",
"oci://hub.nationtech.io/harmony/nt-prometheus-operator",
"--destination",
temp_path.to_str().unwrap(),
])
.args(&["pull", &chart, "--destination", temp_path.to_str().unwrap()])
.output()
.await
.map_err(|e| InterpretError::new(format!("Helm pull error: {}", e)))?;
@ -167,7 +164,7 @@ impl CRDApplicationAlertingInterpret {
let install_output = Command::new("helm")
.args(&[
"install",
"nt-prometheus-operator",
&chart_name,
tgz_path.to_str().unwrap(),
"--namespace",
&self.namespace,
@ -187,13 +184,10 @@ impl CRDApplicationAlertingInterpret {
}
debug!(
"Installed prometheus operator in namespace: {}",
self.namespace
"Installed chart {}/{} in namespace: {}",
&chart_path, &chart_name, self.namespace
);
Ok(Outcome::success(format!(
"Installed prometheus operator in namespace {}",
self.namespace
)))
Ok(())
}
async fn ensure_grafana_operator(&self) -> Result<Outcome, InterpretError> {
@ -219,7 +213,7 @@ impl CRDApplicationAlertingInterpret {
.await
.unwrap();
let _ = Command::new("helm")
let output = Command::new("helm")
.args(&[
"install",
"grafana-operator",
@ -227,11 +221,21 @@ impl CRDApplicationAlertingInterpret {
"--namespace",
&self.namespace,
"--create-namespace",
"--set",
"namespaceScope=true",
])
.output()
.await
.unwrap();
if !output.status.success() {
return Err(InterpretError::new(format!(
"helm install failed:\nstdout: {}\nstderr: {}",
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
)));
}
Ok(Outcome::success(format!(
"installed grafana operator in ns {}",
self.namespace.clone()
@ -294,10 +298,10 @@ impl CRDApplicationAlertingInterpret {
let prom = Prometheus {
metadata: ObjectMeta {
name: Some(self.namespace.clone()),
labels: Some(std::collections::BTreeMap::from([(
"alertmanagerConfig".to_string(),
"enabled".to_string(),
)])),
labels: Some(std::collections::BTreeMap::from([
("alertmanagerConfig".to_string(), "enabled".to_string()),
("client".to_string(), "prometheus".to_string()),
])),
namespace: Some(self.namespace.clone()),
..Default::default()
},
@ -418,6 +422,18 @@ impl CRDApplicationAlertingInterpret {
)))
}
async fn install_client_kube_metrics(&self) -> Result<Outcome, InterpretError> {
self.install_chart(
"oci://hub.nationtech.io/harmony".to_string(),
"nt-kube-metrics".to_string(),
)
.await?;
Ok(Outcome::success(format!(
"Installed client kube metrics in ns {}",
&self.namespace
)))
}
async fn install_grafana(&self, client: &Arc<K8sClient>) -> Result<Outcome, InterpretError> {
let mut label = BTreeMap::new();
label.insert("dashboards".to_string(), "grafana".to_string());

View File

@ -8,10 +8,8 @@ pub fn grafana_operator_helm_chart_score(ns: String) -> HelmChartScore {
HelmChartScore {
namespace: Some(NonBlankString::from_str(&ns).unwrap()),
release_name: NonBlankString::from_str("grafana_operator").unwrap(),
chart_name: NonBlankString::from_str(
"grafana-operator oci://ghcr.io/grafana/helm-charts/grafana-operator",
)
.unwrap(),
chart_name: NonBlankString::from_str("oci://ghcr.io/grafana/helm-charts/grafana-operator")
.unwrap(),
chart_version: None,
values_overrides: None,
values_yaml: None,

View File

@ -9,7 +9,7 @@ pub fn prometheus_operator_helm_chart_score(ns: String) -> HelmChartScore {
namespace: Some(NonBlankString::from_str(&ns).unwrap()),
release_name: NonBlankString::from_str("prometheus-operator").unwrap(),
chart_name: NonBlankString::from_str(
"grafana-operator oci://ghcr.io/grafana/helm-charts/grafana-operator",
"oci://hub.nationtech.io/harmony/nt-prometheus-operator",
)
.unwrap(),
chart_version: None,