Merge pull request 'refactor/ns' (#74) from refactor/ns into master
All checks were successful
Run Check Script / check (push) Successful in 0s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 4m7s

Reviewed-on: https://git.nationtech.io/NationTech/harmony/pulls/74
Reviewed-by: taha <taha@noreply.git.nationtech.io>
This commit is contained in:
wjro 2025-07-02 19:54:28 +00:00
commit 6bf10b093c
14 changed files with 502 additions and 67 deletions

11
Cargo.lock generated
View File

@ -1300,6 +1300,17 @@ dependencies = [
"url",
]
[[package]]
name = "example-monitoring-with-tenant"
version = "0.1.0"
dependencies = [
"cidr",
"harmony",
"harmony_cli",
"tokio",
"url",
]
[[package]]
name = "example-nanodc"
version = "0.1.0"

View File

@ -7,7 +7,13 @@ use harmony::{
monitoring::{
alert_channel::discord_alert_channel::DiscordWebhook,
alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
kube_prometheus::helm_prometheus_alert_score::HelmPrometheusAlertingScore,
kube_prometheus::{
helm_prometheus_alert_score::HelmPrometheusAlertingScore,
types::{
HTTPScheme, MatchExpression, Operator, Selector, ServiceMonitor,
ServiceMonitorEndpoint,
},
},
},
prometheus::alerts::{
infra::dell_server::{

View File

@ -0,0 +1,13 @@
[package]
name = "example-monitoring-with-tenant"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
cidr.workspace = true
harmony = { version = "0.1.0", path = "../../harmony" }
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
tokio.workspace = true
url.workspace = true

View File

@ -0,0 +1,90 @@
use std::collections::HashMap;
use harmony::{
data::Id,
inventory::Inventory,
maestro::Maestro,
modules::{
monitoring::{
alert_channel::discord_alert_channel::DiscordWebhook,
alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
kube_prometheus::{
helm_prometheus_alert_score::HelmPrometheusAlertingScore,
types::{
HTTPScheme, MatchExpression, Operator, Selector, ServiceMonitor,
ServiceMonitorEndpoint,
},
},
},
prometheus::alerts::k8s::pvc::high_pvc_fill_rate_over_two_days,
tenant::TenantScore,
},
topology::{
K8sAnywhereTopology, Url,
tenant::{ResourceLimits, TenantConfig, TenantNetworkPolicy},
},
};
#[tokio::main]
async fn main() {
let tenant = TenantScore {
config: TenantConfig {
id: Id::from_string("1234".to_string()),
name: "test-tenant".to_string(),
resource_limits: ResourceLimits {
cpu_request_cores: 6.0,
cpu_limit_cores: 4.0,
memory_request_gb: 4.0,
memory_limit_gb: 4.0,
storage_total_gb: 10.0,
},
network_policy: TenantNetworkPolicy::default(),
},
};
let discord_receiver = DiscordWebhook {
name: "test-discord".to_string(),
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
};
let high_pvc_fill_rate_over_two_days_alert = high_pvc_fill_rate_over_two_days();
let additional_rules =
AlertManagerRuleGroup::new("pvc-alerts", vec![high_pvc_fill_rate_over_two_days_alert]);
let service_monitor_endpoint = ServiceMonitorEndpoint {
port: Some("80".to_string()),
path: "/metrics".to_string(),
scheme: HTTPScheme::HTTP,
..Default::default()
};
let service_monitor = ServiceMonitor {
name: "test-service-monitor".to_string(),
selector: Selector {
match_labels: HashMap::new(),
match_expressions: vec![MatchExpression {
key: "test".to_string(),
operator: Operator::In,
values: vec!["test-service".to_string()],
}],
},
endpoints: vec![service_monitor_endpoint],
..Default::default()
};
let alerting_score = HelmPrometheusAlertingScore {
receivers: vec![Box::new(discord_receiver)],
rules: vec![Box::new(additional_rules)],
service_monitors: vec![service_monitor],
};
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
Inventory::autoload(),
K8sAnywhereTopology::from_env(),
)
.await
.unwrap();
maestro.register_all(vec![Box::new(tenant), Box::new(alerting_score)]);
harmony_cli::init(maestro, None).await.unwrap();
}

View File

@ -4,6 +4,8 @@ use crate::{interpret::InterpretError, inventory::Inventory};
#[async_trait]
pub trait Installable<T>: Send + Sync {
async fn configure(&self, inventory: &Inventory, topology: &T) -> Result<(), InterpretError>;
async fn ensure_installed(
&self,
inventory: &Inventory,

View File

@ -196,8 +196,7 @@ impl K8sAnywhereTopology {
let k8s_client = self.k8s_client().await?;
Ok(K8sTenantManager::new(k8s_client))
})
.await
.unwrap();
.await?;
Ok(())
}
@ -285,4 +284,11 @@ impl TenantManager for K8sAnywhereTopology {
.provision_tenant(config)
.await
}
async fn get_tenant_config(&self) -> Option<TenantConfig> {
self.get_k8s_tenant_manager()
.ok()?
.get_tenant_config()
.await
}
}

View File

@ -27,6 +27,7 @@ impl<S: AlertSender + Installable<T>, T: Topology> Interpret<T> for AlertingInte
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
self.sender.configure(inventory, topology).await?;
for receiver in self.receivers.iter() {
receiver.install(&self.sender).await?;
}

View File

@ -5,7 +5,6 @@ use crate::{
topology::k8s::{ApplyStrategy, K8sClient},
};
use async_trait::async_trait;
use derive_new::new;
use k8s_openapi::{
api::{
core::v1::{LimitRange, Namespace, ResourceQuota},
@ -19,12 +18,23 @@ use kube::Resource;
use log::{debug, info, warn};
use serde::de::DeserializeOwned;
use serde_json::json;
use tokio::sync::OnceCell;
use super::{TenantConfig, TenantManager};
#[derive(new, Clone, Debug)]
#[derive(Clone, Debug)]
pub struct K8sTenantManager {
k8s_client: Arc<K8sClient>,
k8s_tenant_config: Arc<OnceCell<TenantConfig>>,
}
impl K8sTenantManager {
pub fn new(client: Arc<K8sClient>) -> Self {
Self {
k8s_client: client,
k8s_tenant_config: Arc::new(OnceCell::new()),
}
}
}
impl K8sTenantManager {
@ -112,8 +122,8 @@ impl K8sTenantManager {
"requests.storage": format!("{:.3}Gi", config.resource_limits.storage_total_gb),
"pods": "20",
"services": "10",
"configmaps": "30",
"secrets": "30",
"configmaps": "60",
"secrets": "60",
"persistentvolumeclaims": "15",
"services.loadbalancers": "2",
"services.nodeports": "5",
@ -147,7 +157,7 @@ impl K8sTenantManager {
"spec": {
"limits": [
{
"type": "Container",
"type": "Container",
"default": {
"cpu": "500m",
"memory": "500Mi"
@ -180,60 +190,94 @@ impl K8sTenantManager {
"spec": {
"podSelector": {},
"egress": [
{ "to": [ {"podSelector": {}}]},
{ "to":
[
{
"podSelector": {},
"namespaceSelector": {
"matchLabels": {
"kubernetes.io/metadata.name":"openshift-dns"
}
}
},
]
},
{ "to": [
{
"ipBlock": {
"cidr": "0.0.0.0/0",
// See https://en.wikipedia.org/wiki/Reserved_IP_addresses
"except": [
"10.0.0.0/8",
"172.16.0.0/12",
"192.168.0.0/16",
"192.0.0.0/24",
"192.0.2.0/24",
"192.88.99.0/24",
"192.18.0.0/15",
"198.51.100.0/24",
"169.254.0.0/16",
"203.0.113.0/24",
"127.0.0.0/8",
// Not sure we should block this one as it is
// used for multicast. But better block more than less.
"224.0.0.0/4",
"240.0.0.0/4",
"100.64.0.0/10",
"233.252.0.0/24",
"0.0.0.0/8",
],
}
{
"to": [
{ "podSelector": {} }
]
},
{
"to": [
{
"podSelector": {},
"namespaceSelector": {
"matchLabels": {
"kubernetes.io/metadata.name": "kube-system"
}
]
},
}
}
]
},
{
"to": [
{
"podSelector": {},
"namespaceSelector": {
"matchLabels": {
"kubernetes.io/metadata.name": "openshift-dns"
}
}
}
]
},
{
"to": [
{
"ipBlock": {
"cidr": "10.43.0.1/32",
}
}
]
},
{
"to": [
{
"ipBlock": {
"cidr": "172.23.0.0/16",
}
}
]
},
{
"to": [
{
"ipBlock": {
"cidr": "0.0.0.0/0",
"except": [
"10.0.0.0/8",
"172.16.0.0/12",
"192.168.0.0/16",
"192.0.0.0/24",
"192.0.2.0/24",
"192.88.99.0/24",
"192.18.0.0/15",
"198.51.100.0/24",
"169.254.0.0/16",
"203.0.113.0/24",
"127.0.0.0/8",
"224.0.0.0/4",
"240.0.0.0/4",
"100.64.0.0/10",
"233.252.0.0/24",
"0.0.0.0/8"
]
}
}
]
}
],
"ingress": [
{ "from": [ {"podSelector": {}}]}
{
"from": [
{ "podSelector": {} }
]
}
],
"policyTypes": [
"Ingress", "Egress",
"Ingress",
"Egress"
]
}
});
let mut network_policy: NetworkPolicy =
serde_json::from_value(network_policy).map_err(|e| {
ExecutorError::ConfigurationError(format!(
@ -357,6 +401,9 @@ impl K8sTenantManager {
Ok(network_policy)
}
fn store_config(&self, config: &TenantConfig) {
let _ = self.k8s_tenant_config.set(config.clone());
}
}
#[async_trait]
@ -385,6 +432,10 @@ impl TenantManager for K8sTenantManager {
"Success provisionning K8s tenant id {} name {}",
config.id, config.name
);
self.store_config(config);
Ok(())
}
async fn get_tenant_config(&self) -> Option<TenantConfig> {
self.k8s_tenant_config.get().cloned()
}
}

View File

@ -15,4 +15,6 @@ pub trait TenantManager {
/// # Arguments
/// * `config`: The desired configuration for the new tenant.
async fn provision_tenant(&self, config: &TenantConfig) -> Result<(), ExecutorError>;
async fn get_tenant_config(&self) -> Option<TenantConfig>;
}

View File

@ -1,12 +1,12 @@
use serde::Serialize;
use crate::modules::monitoring::kube_prometheus::types::{
AlertManagerAdditionalPromRules, AlertManagerChannelConfig,
AlertManagerAdditionalPromRules, AlertManagerChannelConfig, ServiceMonitor,
};
#[derive(Debug, Clone, Serialize)]
pub struct KubePrometheusConfig {
pub namespace: String,
pub namespace: Option<String>,
pub default_rules: bool,
pub windows_monitoring: bool,
pub alert_manager: bool,
@ -30,7 +30,7 @@ pub struct KubePrometheusConfig {
impl KubePrometheusConfig {
pub fn new() -> Self {
Self {
namespace: "monitoring".into(),
namespace: None,
default_rules: true,
windows_monitoring: false,
alert_manager: true,
@ -39,7 +39,7 @@ impl KubePrometheusConfig {
prometheus: true,
kubernetes_service_monitors: true,
kubernetes_api_server: false,
kubelet: false,
kubelet: true,
kube_controller_manager: false,
kube_etcd: false,
kube_proxy: false,

View File

@ -12,7 +12,8 @@ use crate::modules::{
helm::chart::HelmChartScore,
monitoring::kube_prometheus::types::{
AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerConfig,
AlertManagerRoute, AlertManagerValues, PrometheusConfig,
AlertManagerRoute, AlertManagerSpec, AlertManagerValues, ConfigReloader, Limits,
PrometheusConfig, Requests, Resources,
},
};
@ -36,8 +37,47 @@ pub fn kube_prometheus_helm_chart_score(
let node_exporter = config.node_exporter.to_string();
let prometheus_operator = config.prometheus_operator.to_string();
let prometheus = config.prometheus.to_string();
let resource_limit = Resources {
limits: Limits {
memory: "100Mi".to_string(),
cpu: "100m".to_string(),
},
requests: Requests {
memory: "100Mi".to_string(),
cpu: "100m".to_string(),
},
};
fn indent_lines(s: &str, spaces: usize) -> String {
let pad = " ".repeat(spaces);
s.lines()
.map(|line| format!("{pad}{line}"))
.collect::<Vec<_>>()
.join("\n")
}
fn resource_block(resource: &Resources, indent_level: usize) -> String {
let yaml = serde_yaml::to_string(resource).unwrap();
format!(
"{}resources:\n{}",
" ".repeat(indent_level),
indent_lines(&yaml, indent_level + 2)
)
}
let resource_section = resource_block(&resource_limit, 2);
let mut values = format!(
r#"
prometheus:
enabled: {prometheus}
prometheusSpec:
resources:
requests:
cpu: 100m
memory: 500Mi
limits:
cpu: 200m
memory: 1000Mi
defaultRules:
create: {default_rules}
rules:
@ -77,31 +117,164 @@ defaultRules:
windows: true
windowsMonitoring:
enabled: {windows_monitoring}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
grafana:
enabled: {grafana}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
initChownData:
resources:
requests:
cpu: 10m
memory: 50Mi
limits:
cpu: 50m
memory: 100Mi
sidecar:
resources:
requests:
cpu: 10m
memory: 50Mi
limits:
cpu: 50m
memory: 100Mi
kubernetesServiceMonitors:
enabled: {kubernetes_service_monitors}
kubeApiServer:
enabled: {kubernetes_api_server}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
kubelet:
enabled: {kubelet}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
kubeControllerManager:
enabled: {kube_controller_manager}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
coreDns:
enabled: {core_dns}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
kubeEtcd:
enabled: {kube_etcd}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
kubeScheduler:
enabled: {kube_scheduler}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
kubeProxy:
enabled: {kube_proxy}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
kubeStateMetrics:
enabled: {kube_state_metrics}
kube-state-metrics:
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
nodeExporter:
enabled: {node_exporter}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
prometheus-node-exporter:
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 200m
memory: 250Mi
prometheusOperator:
enabled: {prometheus_operator}
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 100m
memory: 200Mi
prometheusConfigReloader:
resources:
requests:
cpu: 100m
memory: 150Mi
limits:
cpu: 100m
memory: 200Mi
admissionWebhooks:
deployment:
resources:
limits:
cpu: 10m
memory: 100Mi
requests:
cpu: 10m
memory: 100Mi
patch:
resources:
limits:
cpu: 10m
memory: 100Mi
requests:
cpu: 10m
memory: 100Mi
"#,
);
@ -160,6 +333,30 @@ prometheusOperator:
alertmanager: AlertManager {
enabled: config.alert_manager,
config: alert_manager_channel_config,
alertmanager_spec: AlertManagerSpec {
resources: Resources {
limits: Limits {
memory: "100Mi".to_string(),
cpu: "100m".to_string(),
},
requests: Requests {
memory: "100Mi".to_string(),
cpu: "100m".to_string(),
},
},
},
init_config_reloader: ConfigReloader {
resources: Resources {
limits: Limits {
memory: "100Mi".to_string(),
cpu: "100m".to_string(),
},
requests: Requests {
memory: "100Mi".to_string(),
cpu: "100m".to_string(),
},
},
},
},
};
@ -200,7 +397,7 @@ prometheusOperator:
debug!("full values.yaml: \n {:#}", values);
HelmChartScore {
namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()),
namespace: Some(NonBlankString::from_str(&config.namespace.clone().unwrap()).unwrap()),
release_name: NonBlankString::from_str("kube-prometheus").unwrap(),
chart_name: NonBlankString::from_str(
"oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack",

View File

@ -9,6 +9,7 @@ use crate::{
topology::{
HelmCommand, Topology,
oberservability::monitoring::{AlertReceiver, AlertRule, AlertingInterpret},
tenant::TenantManager,
},
};
@ -19,7 +20,7 @@ pub struct HelmPrometheusAlertingScore {
pub service_monitors: Vec<ServiceMonitor>,
}
impl<T: Topology + HelmCommand> Score<T> for HelmPrometheusAlertingScore {
impl<T: Topology + HelmCommand + TenantManager> Score<T> for HelmPrometheusAlertingScore {
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
let config = Arc::new(Mutex::new(KubePrometheusConfig::new()));
config
@ -27,7 +28,7 @@ impl<T: Topology + HelmCommand> Score<T> for HelmPrometheusAlertingScore {
.expect("couldn't lock config")
.additional_service_monitors = self.service_monitors.clone();
Box::new(AlertingInterpret {
sender: Prometheus { config },
sender: Prometheus::new(),
receivers: self.receivers.clone(),
rules: self.rules.clone(),
})

View File

@ -1,7 +1,7 @@
use std::sync::{Arc, Mutex};
use async_trait::async_trait;
use log::debug;
use log::{debug, error};
use serde::Serialize;
use crate::{
@ -10,9 +10,10 @@ use crate::{
modules::monitoring::alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
score,
topology::{
HelmCommand, Topology,
HelmCommand, K8sAnywhereTopology, Topology,
installable::Installable,
oberservability::monitoring::{AlertReceiver, AlertRule, AlertSender},
tenant::TenantManager,
},
};
@ -33,7 +34,12 @@ impl AlertSender for Prometheus {
}
#[async_trait]
impl<T: Topology + HelmCommand> Installable<T> for Prometheus {
impl<T: Topology + HelmCommand + TenantManager> Installable<T> for Prometheus {
async fn configure(&self, _inventory: &Inventory, topology: &T) -> Result<(), InterpretError> {
self.configure_with_topology(topology).await;
Ok(())
}
async fn ensure_installed(
&self,
inventory: &Inventory,
@ -50,6 +56,23 @@ pub struct Prometheus {
}
impl Prometheus {
pub fn new() -> Self {
Self {
config: Arc::new(Mutex::new(KubePrometheusConfig::new())),
}
}
pub async fn configure_with_topology<T: TenantManager>(&self, topology: &T) {
let ns = topology
.get_tenant_config()
.await
.map(|cfg| cfg.name.clone())
.unwrap_or_else(|| "monitoring".to_string());
error!("This must be refactored, see comments in pr #74");
debug!("NS: {}", ns);
self.config.lock().unwrap().namespace = Some(ns);
}
pub async fn install_receiver(
&self,
prometheus_receiver: &dyn PrometheusReceiver,

View File

@ -1,4 +1,4 @@
use std::collections::BTreeMap;
use std::collections::{BTreeMap, HashMap};
use async_trait::async_trait;
use serde::Serialize;
@ -16,9 +16,17 @@ pub struct AlertManagerValues {
pub alertmanager: AlertManager,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct AlertManager {
pub enabled: bool,
pub config: AlertManagerConfig,
pub alertmanager_spec: AlertManagerSpec,
pub init_config_reloader: ConfigReloader,
}
#[derive(Debug, Clone, Serialize)]
pub struct ConfigReloader {
pub resources: Resources,
}
#[derive(Debug, Clone, Serialize)]
@ -43,6 +51,30 @@ pub struct AlertManagerChannelConfig {
pub channel_receiver: Value,
}
#[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct AlertManagerSpec {
pub(crate) resources: Resources,
}
#[derive(Debug, Clone, Serialize)]
pub struct Resources {
pub limits: Limits,
pub requests: Requests,
}
#[derive(Debug, Clone, Serialize)]
pub struct Limits {
pub memory: String,
pub cpu: String,
}
#[derive(Debug, Clone, Serialize)]
pub struct Requests {
pub memory: String,
pub cpu: String,
}
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerAdditionalPromRules {
#[serde(flatten)]