wip: helm chart deploys to namespace with resource limits and requests, trying to fix connection refused to api error

This commit is contained in:
Willem 2025-06-27 14:47:28 -04:00
parent 8e857bc72a
commit 460c8b59e1
9 changed files with 246 additions and 13 deletions

View File

@ -0,0 +1,13 @@
[package]
name = "example-monitoring-with-tenant"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
cidr.workspace = true
harmony = { version = "0.1.0", path = "../../harmony" }
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
tokio.workspace = true
url.workspace = true

View File

@ -0,0 +1,63 @@
use cidr::Ipv4Cidr;
use harmony::{
data::Id,
inventory::Inventory,
maestro::Maestro,
modules::{
monitoring::{
alert_channel::discord_alert_channel::DiscordWebhook,
alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
kube_prometheus::helm_prometheus_alert_score::HelmPrometheusAlertingScore,
},
prometheus::alerts::k8s::pvc::high_pvc_fill_rate_over_two_days,
tenant::TenantScore,
},
topology::{
K8sAnywhereTopology, Url,
tenant::{InternetEgressPolicy, ResourceLimits, TenantConfig, TenantNetworkPolicy},
},
};
use std::net::Ipv4Addr;
use std::str::FromStr;
#[tokio::main]
async fn main() {
let tenant = TenantScore {
config: TenantConfig {
id: Id::from_string("1234".to_string()),
name: "test-tenant".to_string(),
resource_limits: ResourceLimits {
cpu_request_cores: 4.0,
cpu_limit_cores: 4.0,
memory_request_gb: 4.0,
memory_limit_gb: 4.0,
storage_total_gb: 10.0,
},
network_policy: TenantNetworkPolicy::default(),
},
};
let discord_receiver = DiscordWebhook {
name: "test-discord".to_string(),
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
};
let high_pvc_fill_rate_over_two_days_alert = high_pvc_fill_rate_over_two_days();
let additional_rules =
AlertManagerRuleGroup::new("pvc-alerts", vec![high_pvc_fill_rate_over_two_days_alert]);
let alerting_score = HelmPrometheusAlertingScore {
receivers: vec![Box::new(discord_receiver)],
rules: vec![Box::new(additional_rules)],
};
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
Inventory::autoload(),
K8sAnywhereTopology::from_env(),
)
.await
.unwrap();
maestro.register_all(vec![Box::new(tenant), Box::new(alerting_score)]);
harmony_cli::init(maestro, None).await.unwrap();
}

View File

@ -185,13 +185,18 @@ impl K8sAnywhereTopology {
self.tenant_manager
.get_or_try_init(async || -> Result<K8sTenantManager, String> {
let k8s_client = self.k8s_client().await?;
Ok(K8sTenantManager::new(k8s_client, TenantConfig::default()))
Ok(K8sTenantManager::new(k8s_client))
})
.await
.unwrap();
Ok(())
}
async fn store_tenant_config(&self, config: TenantConfig) {
self.tenant_manager_config
.get_or_init(|| async { config })
.await;
}
fn get_k8s_tenant_manager(&self) -> Result<&K8sTenantManager, ExecutorError> {
match self.tenant_manager.get() {
@ -271,6 +276,7 @@ impl HelmCommand for K8sAnywhereTopology {}
#[async_trait]
impl TenantManager for K8sAnywhereTopology {
async fn provision_tenant(&self, config: &TenantConfig) -> Result<(), ExecutorError> {
self.store_tenant_config(config.clone()).await;
self.get_k8s_tenant_manager()?
.provision_tenant(config)
.await

View File

@ -25,7 +25,6 @@ use super::{TenantConfig, TenantManager};
#[derive(new)]
pub struct K8sTenantManager {
k8s_client: Arc<K8sClient>,
k8s_tenant_config: TenantConfig,
}
impl K8sTenantManager {
@ -326,6 +325,6 @@ impl TenantManager for K8sTenantManager {
Ok(())
}
fn get_tenant_config(&self) -> Option<TenantConfig> {
Some(self.k8s_tenant_config.clone())
todo!()
}
}

View File

@ -1,8 +1,8 @@
use serde::Serialize;
use crate::modules::monitoring::
kube_prometheus::types::{AlertManagerAdditionalPromRules, AlertManagerChannelConfig}
;
use crate::modules::monitoring::kube_prometheus::types::{
AlertManagerAdditionalPromRules, AlertManagerChannelConfig,
};
#[derive(Debug, Clone, Serialize)]
pub struct KubePrometheusConfig {

View File

@ -12,7 +12,8 @@ use crate::modules::{
helm::chart::HelmChartScore,
monitoring::kube_prometheus::types::{
AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerConfig,
AlertManagerRoute, AlertManagerValues,
AlertManagerRoute, AlertManagerSpec, AlertManagerValues, Cpu, CpuUnit, Limits, Memory,
MemoryUnit, Requests, Resources,
},
};
@ -36,8 +37,53 @@ pub fn kube_prometheus_helm_chart_score(
let node_exporter = config.node_exporter.to_string();
let prometheus_operator = config.prometheus_operator.to_string();
let prometheus = config.prometheus.to_string();
let resource_limit = Resources {
limits: Limits {
memory: Memory {
value: 100,
unit: MemoryUnit::Mi,
},
cpu: Cpu {
value: 100,
unit: CpuUnit::Milli,
},
},
requests: Requests {
memory: Memory {
value: 100,
unit: MemoryUnit::Mi,
},
cpu: Cpu {
value: 100,
unit: CpuUnit::Milli,
},
},
};
fn indent_lines(s: &str, spaces: usize) -> String {
let pad = " ".repeat(spaces);
s.lines()
.map(|line| format!("{pad}{line}"))
.collect::<Vec<_>>()
.join("\n")
}
fn resource_block(resource: &Resources, indent_level: usize) -> String {
let yaml = serde_yaml::to_string(resource).unwrap();
format!(
"{}resources:\n{}",
" ".repeat(indent_level),
indent_lines(&yaml, indent_level + 2)
)
}
let resource_section = resource_block(&resource_limit, 2);
let mut values = format!(
r#"
prometheus:
enabled: {prometheus}
prometheusSpec:
{resource_section}
defaultRules:
create: {default_rules}
rules:
@ -77,32 +123,59 @@ defaultRules:
windows: true
windowsMonitoring:
enabled: {windows_monitoring}
{resource_section}
grafana:
enabled: {grafana}
{resource_section}
kubernetesServiceMonitors:
enabled: {kubernetes_service_monitors}
{resource_section}
kubeApiServer:
enabled: {kubernetes_api_server}
{resource_section}
kubelet:
enabled: {kubelet}
{resource_section}
kubeControllerManager:
enabled: {kube_controller_manager}
{resource_section}
coreDns:
enabled: {core_dns}
{resource_section}
kubeEtcd:
enabled: {kube_etcd}
{resource_section}
kubeScheduler:
enabled: {kube_scheduler}
{resource_section}
kubeProxy:
enabled: {kube_proxy}
{resource_section}
kubeStateMetrics:
enabled: {kube_state_metrics}
{resource_section}
nodeExporter:
enabled: {node_exporter}
{resource_section}
prometheusOperator:
enabled: {prometheus_operator}
prometheus:
enabled: {prometheus}
admissionWebhooks:
deployment:
resources:
limits:
cpu: 10m
memory: 100Mi
requests:
cpu: 10m
memory: 100Mi
patch:
resources:
limits:
cpu: 10m
memory: 100Mi
requests:
cpu: 10m
memory: 100Mi
"#,
);
@ -145,6 +218,30 @@ prometheus:
alertmanager: AlertManager {
enabled: config.alert_manager,
config: alert_manager_channel_config,
alertManagerSpec: AlertManagerSpec {
resources: Resources {
limits: Limits {
memory: Memory {
value: 100,
unit: MemoryUnit::Mi,
},
cpu: Cpu {
value: 100,
unit: CpuUnit::Milli,
},
},
requests: Requests {
memory: Memory {
value: 100,
unit: MemoryUnit::Mi,
},
cpu: Cpu {
value: 100,
unit: CpuUnit::Milli,
},
},
},
},
},
};
@ -184,7 +281,6 @@ prometheus:
values.push_str(&alert_manager_additional_rules_yaml);
debug!("full values.yaml: \n {:#}", values);
HelmChartScore {
namespace: Some(NonBlankString::from_str(&config.namespace.clone().unwrap()).unwrap()),
release_name: NonBlankString::from_str("kube-prometheus").unwrap(),

View File

@ -63,8 +63,11 @@ impl Prometheus {
}
pub fn configure_with_topology<T: TenantManager>(&self, topology: &T) {
let ns = topology.get_tenant_config().map(|cfg| cfg.name.clone())
let ns = topology
.get_tenant_config()
.map(|cfg| cfg.name.clone())
.unwrap_or_else(|| "monitoring".to_string());
debug!("NS: {}", ns);
self.config.lock().unwrap().namespace = Some(ns);
}

View File

@ -19,6 +19,7 @@ pub struct AlertManagerValues {
pub struct AlertManager {
pub enabled: bool,
pub config: AlertManagerConfig,
pub alertManagerSpec: AlertManagerSpec,
}
#[derive(Debug, Clone, Serialize)]
@ -43,6 +44,58 @@ pub struct AlertManagerChannelConfig {
pub channel_receiver: Value,
}
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerSpec {
pub(crate) resources: Resources,
}
#[derive(Debug, Clone, Serialize)]
pub struct Resources {
pub limits: Limits,
pub requests: Requests,
}
#[derive(Debug, Clone, Serialize)]
pub struct Limits {
pub memory: Memory,
pub cpu: Cpu,
}
#[derive(Debug, Clone, Serialize)]
pub struct Requests {
pub memory: Memory,
pub cpu: Cpu,
}
#[derive(Debug, Clone, Serialize)]
pub struct Memory {
pub value: u64,
pub unit: MemoryUnit,
}
#[derive(Debug, Clone, Serialize)]
pub struct Cpu {
pub value: u64,
pub unit: CpuUnit,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum MemoryUnit {
Ki,
Mi,
Gi,
Ti,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum CpuUnit {
// 1 = 1 core, m = millicore
Core,
Milli,
}
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerAdditionalPromRules {
#[serde(flatten)]