refactor/ns #74

Merged
wjro merged 8 commits from refactor/ns into master 2025-07-02 19:54:33 +00:00
9 changed files with 246 additions and 13 deletions
Showing only changes of commit 460c8b59e1 - Show all commits

View File

@ -0,0 +1,13 @@
[package]
name = "example-monitoring-with-tenant"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
cidr.workspace = true
harmony = { version = "0.1.0", path = "../../harmony" }
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
tokio.workspace = true
url.workspace = true

View File

@ -0,0 +1,63 @@
use cidr::Ipv4Cidr;
use harmony::{
data::Id,
inventory::Inventory,
maestro::Maestro,
modules::{
monitoring::{
alert_channel::discord_alert_channel::DiscordWebhook,
alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
kube_prometheus::helm_prometheus_alert_score::HelmPrometheusAlertingScore,
},
prometheus::alerts::k8s::pvc::high_pvc_fill_rate_over_two_days,
tenant::TenantScore,
},
topology::{
K8sAnywhereTopology, Url,
tenant::{InternetEgressPolicy, ResourceLimits, TenantConfig, TenantNetworkPolicy},
},
};
use std::net::Ipv4Addr;
use std::str::FromStr;
#[tokio::main]
async fn main() {
let tenant = TenantScore {
config: TenantConfig {
id: Id::from_string("1234".to_string()),
name: "test-tenant".to_string(),
resource_limits: ResourceLimits {
cpu_request_cores: 4.0,
cpu_limit_cores: 4.0,
memory_request_gb: 4.0,
memory_limit_gb: 4.0,
storage_total_gb: 10.0,
},
network_policy: TenantNetworkPolicy::default(),
},
};
let discord_receiver = DiscordWebhook {
name: "test-discord".to_string(),
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
};
let high_pvc_fill_rate_over_two_days_alert = high_pvc_fill_rate_over_two_days();
let additional_rules =
AlertManagerRuleGroup::new("pvc-alerts", vec![high_pvc_fill_rate_over_two_days_alert]);
let alerting_score = HelmPrometheusAlertingScore {
receivers: vec![Box::new(discord_receiver)],
rules: vec![Box::new(additional_rules)],
};
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
Inventory::autoload(),
K8sAnywhereTopology::from_env(),
)
.await
.unwrap();
maestro.register_all(vec![Box::new(tenant), Box::new(alerting_score)]);
harmony_cli::init(maestro, None).await.unwrap();
}

View File

@ -185,13 +185,18 @@ impl K8sAnywhereTopology {
self.tenant_manager
.get_or_try_init(async || -> Result<K8sTenantManager, String> {
let k8s_client = self.k8s_client().await?;
Ok(K8sTenantManager::new(k8s_client, TenantConfig::default()))
Ok(K8sTenantManager::new(k8s_client))
})
.await
.unwrap();
Ok(())
}
async fn store_tenant_config(&self, config: TenantConfig) {
self.tenant_manager_config
.get_or_init(|| async { config })
.await;
}
fn get_k8s_tenant_manager(&self) -> Result<&K8sTenantManager, ExecutorError> {
match self.tenant_manager.get() {
@ -271,6 +276,7 @@ impl HelmCommand for K8sAnywhereTopology {}
#[async_trait]
impl TenantManager for K8sAnywhereTopology {
async fn provision_tenant(&self, config: &TenantConfig) -> Result<(), ExecutorError> {
self.store_tenant_config(config.clone()).await;
self.get_k8s_tenant_manager()?
.provision_tenant(config)
.await

View File

@ -25,7 +25,6 @@ use super::{TenantConfig, TenantManager};
#[derive(new)]
pub struct K8sTenantManager {
k8s_client: Arc<K8sClient>,
k8s_tenant_config: TenantConfig,
}
impl K8sTenantManager {
@ -326,6 +325,6 @@ impl TenantManager for K8sTenantManager {
Ok(())
}
fn get_tenant_config(&self) -> Option<TenantConfig> {
Some(self.k8s_tenant_config.clone())
todo!()
}
}

View File

@ -1,8 +1,8 @@
use serde::Serialize;
use crate::modules::monitoring::
kube_prometheus::types::{AlertManagerAdditionalPromRules, AlertManagerChannelConfig}
;
use crate::modules::monitoring::kube_prometheus::types::{
AlertManagerAdditionalPromRules, AlertManagerChannelConfig,
};
#[derive(Debug, Clone, Serialize)]
pub struct KubePrometheusConfig {

View File

@ -12,7 +12,8 @@ use crate::modules::{
helm::chart::HelmChartScore,
monitoring::kube_prometheus::types::{
AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerConfig,
AlertManagerRoute, AlertManagerValues,
AlertManagerRoute, AlertManagerSpec, AlertManagerValues, Cpu, CpuUnit, Limits, Memory,
MemoryUnit, Requests, Resources,
},
};
@ -36,8 +37,53 @@ pub fn kube_prometheus_helm_chart_score(
let node_exporter = config.node_exporter.to_string();
let prometheus_operator = config.prometheus_operator.to_string();
let prometheus = config.prometheus.to_string();
let resource_limit = Resources {
limits: Limits {
memory: Memory {
value: 100,
unit: MemoryUnit::Mi,
},
cpu: Cpu {
value: 100,
unit: CpuUnit::Milli,
},
},
requests: Requests {
memory: Memory {
value: 100,
unit: MemoryUnit::Mi,
},
cpu: Cpu {
value: 100,
unit: CpuUnit::Milli,
},
},
};
fn indent_lines(s: &str, spaces: usize) -> String {
let pad = " ".repeat(spaces);
s.lines()
.map(|line| format!("{pad}{line}"))
.collect::<Vec<_>>()
.join("\n")
}
fn resource_block(resource: &Resources, indent_level: usize) -> String {
let yaml = serde_yaml::to_string(resource).unwrap();
format!(
"{}resources:\n{}",
" ".repeat(indent_level),
indent_lines(&yaml, indent_level + 2)
)
}
let resource_section = resource_block(&resource_limit, 2);
let mut values = format!(
r#"
prometheus:
enabled: {prometheus}
prometheusSpec:
{resource_section}
defaultRules:
create: {default_rules}
rules:
@ -77,32 +123,59 @@ defaultRules:
windows: true
windowsMonitoring:
enabled: {windows_monitoring}
{resource_section}
grafana:
enabled: {grafana}
{resource_section}
kubernetesServiceMonitors:
enabled: {kubernetes_service_monitors}
{resource_section}
kubeApiServer:
enabled: {kubernetes_api_server}
{resource_section}
kubelet:
enabled: {kubelet}
{resource_section}
kubeControllerManager:
enabled: {kube_controller_manager}
{resource_section}
coreDns:
enabled: {core_dns}
{resource_section}
kubeEtcd:
enabled: {kube_etcd}
{resource_section}
kubeScheduler:
enabled: {kube_scheduler}
{resource_section}
kubeProxy:
enabled: {kube_proxy}
{resource_section}
kubeStateMetrics:
enabled: {kube_state_metrics}
{resource_section}
nodeExporter:
enabled: {node_exporter}
{resource_section}
prometheusOperator:
enabled: {prometheus_operator}
prometheus:
enabled: {prometheus}
admissionWebhooks:
deployment:
resources:
limits:
cpu: 10m
memory: 100Mi
requests:
cpu: 10m
memory: 100Mi
patch:
resources:
limits:
cpu: 10m
memory: 100Mi
requests:
cpu: 10m
memory: 100Mi
"#,
);
@ -145,6 +218,30 @@ prometheus:
alertmanager: AlertManager {
enabled: config.alert_manager,
config: alert_manager_channel_config,
alertManagerSpec: AlertManagerSpec {
resources: Resources {
limits: Limits {
memory: Memory {
value: 100,
unit: MemoryUnit::Mi,
},
cpu: Cpu {
value: 100,
unit: CpuUnit::Milli,
},
},
requests: Requests {
memory: Memory {
value: 100,
unit: MemoryUnit::Mi,
},
cpu: Cpu {
value: 100,
unit: CpuUnit::Milli,
},
},
},
},
},
};
@ -184,7 +281,6 @@ prometheus:
values.push_str(&alert_manager_additional_rules_yaml);
debug!("full values.yaml: \n {:#}", values);
HelmChartScore {
namespace: Some(NonBlankString::from_str(&config.namespace.clone().unwrap()).unwrap()),
release_name: NonBlankString::from_str("kube-prometheus").unwrap(),

View File

@ -21,7 +21,7 @@ pub struct HelmPrometheusAlertingScore {
impl<T: Topology + HelmCommand + TenantManager> Score<T> for HelmPrometheusAlertingScore {
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
Box::new(AlertingInterpret {
Review

as noted in a comment below, should remove TenantManager later

as noted in a comment below, should remove TenantManager later
sender: Prometheus::new() ,
sender: Prometheus::new(),
receivers: self.receivers.clone(),
rules: self.rules.clone(),
})

View File

@ -63,8 +63,11 @@ impl Prometheus {
}
pub fn configure_with_topology<T: TenantManager>(&self, topology: &T) {
let ns = topology.get_tenant_config().map(|cfg| cfg.name.clone())
.unwrap_or_else(|| "monitoring".to_string());
let ns = topology
Review

Higher level components such as Monitoring, Alerting, etc. Should not be aware of the Tenant concept.

The topology itself should manage internally the logic related to the tenant.

error!("This must be refactored, see comments in pr #74");

Higher level components such as Monitoring, Alerting, etc. Should not be aware of the Tenant concept. The topology itself should manage internally the logic related to the tenant. error!("This must be refactored, see comments in pr #74");
.get_tenant_config()
.map(|cfg| cfg.name.clone())
.unwrap_or_else(|| "monitoring".to_string());
debug!("NS: {}", ns);
self.config.lock().unwrap().namespace = Some(ns);
}

View File

@ -19,6 +19,7 @@ pub struct AlertManagerValues {
pub struct AlertManager {
pub enabled: bool,
pub config: AlertManagerConfig,
pub alertManagerSpec: AlertManagerSpec,
}
#[derive(Debug, Clone, Serialize)]
@ -43,6 +44,58 @@ pub struct AlertManagerChannelConfig {
pub channel_receiver: Value,
}
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerSpec {
pub(crate) resources: Resources,
}
#[derive(Debug, Clone, Serialize)]
pub struct Resources {
pub limits: Limits,
pub requests: Requests,
}
#[derive(Debug, Clone, Serialize)]
pub struct Limits {
pub memory: Memory,
pub cpu: Cpu,
Review

I wonder if we can replace this with structs from k8s-openapi or kube-rs

I wonder if we can replace this with structs from k8s-openapi or kube-rs
}
#[derive(Debug, Clone, Serialize)]
pub struct Requests {
pub memory: Memory,
pub cpu: Cpu,
}
#[derive(Debug, Clone, Serialize)]
pub struct Memory {
pub value: u64,
pub unit: MemoryUnit,
}
#[derive(Debug, Clone, Serialize)]
pub struct Cpu {
pub value: u64,
pub unit: CpuUnit,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum MemoryUnit {
Ki,
Mi,
Gi,
Ti,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum CpuUnit {
// 1 = 1 core, m = millicore
Core,
Milli,
}
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerAdditionalPromRules {
#[serde(flatten)]