Compare commits

..

12 Commits

Author SHA1 Message Date
14fc4345c1 feat: Initialize k8s tenant properly
All checks were successful
Run Check Script / check (push) Successful in 1m48s
Run Check Script / check (pull_request) Successful in 1m49s
2025-06-08 23:49:08 -04:00
8e472e4c65 feat: Add Default implementation for Harmony Id along with documentation.
Some checks failed
Run Check Script / check (push) Failing after 47s
Run Check Script / check (pull_request) Failing after 45s
This Id implementation is optimized for ease of use. Ids are prefixed with the unix epoch and suffixed with 7 alphanumeric characters. But Ids can also contain any String the user wants to pass it
2025-06-08 21:23:29 -04:00
ec17ccc246 feat: Add example-tenant (WIP)
All checks were successful
Run Check Script / check (push) Successful in 1m48s
Run Check Script / check (pull_request) Successful in 1m53s
2025-06-06 13:59:48 -04:00
5127f44ab3 docs: Add note about pod privilege escalation in ADR 011 Tenant
All checks were successful
Run Check Script / check (push) Successful in 1m47s
Run Check Script / check (pull_request) Successful in 1m46s
2025-06-06 13:56:40 -04:00
2ff70db0b1 wip: Tenant example project
All checks were successful
Run Check Script / check (push) Successful in 1m49s
Run Check Script / check (pull_request) Successful in 1m48s
2025-06-06 13:52:40 -04:00
e17ac1af83 Merge remote-tracking branch 'origin/master' into TenantManager_impl_k8s_anywhere
All checks were successful
Run Check Script / check (push) Successful in 1m48s
Run Check Script / check (pull_request) Successful in 1m47s
2025-06-04 16:14:21 -04:00
045954f8d3 start network policy
All checks were successful
Run Check Script / check (push) Successful in 1m50s
Run Check Script / check (pull_request) Successful in 1m46s
2025-05-29 18:06:16 -04:00
7c809bf18a Make k8stenantmanager a oncecell
All checks were successful
Run Check Script / check (push) Successful in 1m49s
Run Check Script / check (pull_request) Successful in 1m46s
2025-05-29 16:03:58 -04:00
6490e5e82a Hardcode some limits to protect the overall cluster
Some checks failed
Run Check Script / check (push) Failing after 42s
Run Check Script / check (pull_request) Failing after 47s
2025-05-29 15:49:46 -04:00
5e51f7490c Update request quota
Some checks failed
Run Check Script / check (push) Failing after 46s
2025-05-29 15:41:57 -04:00
97fba07f4e feat: adding kubernetes implentation of tenant manager
Some checks failed
Run Check Script / check (push) Failing after 43s
2025-05-29 14:35:58 -04:00
624e4330bb boilerplate
All checks were successful
Run Check Script / check (push) Successful in 1m47s
2025-05-29 13:36:30 -04:00
29 changed files with 656 additions and 485 deletions

24
Cargo.lock generated
View File

@@ -1070,6 +1070,21 @@ dependencies = [
"url",
]
[[package]]
name = "example-tenant"
version = "0.1.0"
dependencies = [
"cidr",
"env_logger",
"harmony",
"harmony_cli",
"harmony_macros",
"harmony_types",
"log",
"tokio",
"url",
]
[[package]]
name = "example-tui"
version = "0.1.0"
@@ -1409,12 +1424,14 @@ dependencies = [
"derive-new",
"directories",
"dockerfile_builder",
"dyn-clone",
"email_address",
"env_logger",
"fqdn",
"harmony_macros",
"harmony_types",
"helm-wrapper-rs",
"hex",
"http 1.3.1",
"inquire",
"k3d-rs",
@@ -1426,6 +1443,7 @@ dependencies = [
"non-blank-string-rs",
"opnsense-config",
"opnsense-config-xml",
"rand 0.9.1",
"reqwest 0.11.27",
"russh",
"rust-ipmi",
@@ -1550,6 +1568,12 @@ version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
[[package]]
name = "hex"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
[[package]]
name = "hex-literal"
version = "0.4.1"

View File

@@ -137,8 +137,9 @@ Our approach addresses both customer and team multi-tenancy requirements:
### Implementation Roadmap
1. **Phase 1**: Implement VPN access and manual tenant provisioning
2. **Phase 2**: Deploy TenantScore automation for namespace, RBAC, and NetworkPolicy management
3. **Phase 3**: Integrate Keycloak for centralized identity management
4. **Phase 4**: Add advanced monitoring and per-tenant observability
4. **Phase 3**: Work on privilege escalation from pods, audit for weaknesses, enforce security policies on pod runtimes
3. **Phase 4**: Integrate Keycloak for centralized identity management
4. **Phase 5**: Add advanced monitoring and per-tenant observability
### TenantScore Structure Preview
```rust

View File

@@ -0,0 +1,41 @@
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: tenant-isolation-policy
namespace: testtenant
spec:
podSelector: {} # Selects all pods in the namespace
policyTypes:
- Ingress
- Egress
ingress:
- from:
- podSelector: {} # Allow from all pods in the same namespace
egress:
- to:
- podSelector: {} # Allow to all pods in the same namespace
- to:
- podSelector: {}
namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: openshift-dns # Target the openshift-dns namespace
# Note, only opening port 53 is not enough, will have to dig deeper into this one eventually
# ports:
# - protocol: UDP
# port: 53
# - protocol: TCP
# port: 53
# Allow egress to public internet only
- to:
- ipBlock:
cidr: 0.0.0.0/0
except:
- 10.0.0.0/8 # RFC1918
- 172.16.0.0/12 # RFC1918
- 192.168.0.0/16 # RFC1918
- 169.254.0.0/16 # Link-local
- 127.0.0.0/8 # Loopback
- 224.0.0.0/4 # Multicast
- 240.0.0.0/4 # Reserved
- 100.64.0.0/10 # Carrier-grade NAT
- 0.0.0.0/8 # Reserved

View File

@@ -0,0 +1,95 @@
apiVersion: v1
kind: Namespace
metadata:
name: testtenant
---
apiVersion: v1
kind: Namespace
metadata:
name: testtenant2
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: test-web
namespace: testtenant
spec:
replicas: 1
selector:
matchLabels:
app: test-web
template:
metadata:
labels:
app: test-web
spec:
containers:
- name: nginx
image: nginxinc/nginx-unprivileged
ports:
- containerPort: 80
---
apiVersion: v1
kind: Service
metadata:
name: test-web
namespace: testtenant
spec:
selector:
app: test-web
ports:
- port: 80
targetPort: 8080
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: test-client
namespace: testtenant
spec:
replicas: 1
selector:
matchLabels:
app: test-client
template:
metadata:
labels:
app: test-client
spec:
containers:
- name: curl
image: curlimages/curl:latest
command: ["/bin/sh", "-c", "sleep 3600"]
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: test-web
namespace: testtenant2
spec:
replicas: 1
selector:
matchLabels:
app: test-web
template:
metadata:
labels:
app: test-web
spec:
containers:
- name: nginx
image: nginxinc/nginx-unprivileged
ports:
- containerPort: 80
---
apiVersion: v1
kind: Service
metadata:
name: test-web
namespace: testtenant2
spec:
selector:
app: test-web
ports:
- port: 80
targetPort: 8080

View File

@@ -2,7 +2,10 @@ use harmony::{
data::Version,
inventory::Inventory,
maestro::Maestro,
modules::lamp::{LAMPConfig, LAMPScore},
modules::{
lamp::{LAMPConfig, LAMPScore},
monitoring::monitoring_alerting::{AlertChannel, MonitoringAlertingStackScore},
},
topology::{K8sAnywhereTopology, Url},
};
@@ -40,7 +43,13 @@ async fn main() {
.await
.unwrap();
maestro.register_all(vec![Box::new(lamp_stack)]);
let url = url::Url::parse("https://discord.com/api/webhooks/dummy_channel/dummy_token")
.expect("invalid URL");
let mut monitoring_stack_score = MonitoringAlertingStackScore::new();
monitoring_stack_score.namespace = Some(lamp_stack.config.namespace.clone());
maestro.register_all(vec![Box::new(lamp_stack), Box::new(monitoring_stack_score)]);
// Here we bootstrap the CLI, this gives some nice features if you need them
harmony_cli::init(maestro, None).await.unwrap();
}

View File

@@ -1,12 +0,0 @@
[package]
name = "webhook_sender"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
harmony = { version = "0.1.0", path = "../../harmony" }
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
tokio.workspace = true
url.workspace = true

View File

@@ -1,23 +0,0 @@
use harmony::{
inventory::Inventory,
maestro::Maestro,
modules::monitoring::monitoring_alerting::MonitoringAlertingScore,
topology::{K8sAnywhereTopology, oberservability::K8sMonitorConfig},
};
#[tokio::main]
async fn main() {
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
Inventory::autoload(),
K8sAnywhereTopology::new(),
)
.await
.unwrap();
let monitoring = MonitoringAlertingScore {
alert_channel_configs: None,
};
maestro.register_all(vec![Box::new(monitoring)]);
harmony_cli::init(maestro, None).await.unwrap();
}

View File

@@ -0,0 +1,18 @@
[package]
name = "example-tenant"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
publish = false
[dependencies]
harmony = { path = "../../harmony" }
harmony_cli = { path = "../../harmony_cli" }
harmony_types = { path = "../../harmony_types" }
cidr = { workspace = true }
tokio = { workspace = true }
harmony_macros = { path = "../../harmony_macros" }
log = { workspace = true }
env_logger = { workspace = true }
url = { workspace = true }

View File

@@ -0,0 +1,41 @@
use harmony::{
data::Id,
inventory::Inventory,
maestro::Maestro,
modules::tenant::TenantScore,
topology::{K8sAnywhereTopology, tenant::TenantConfig},
};
#[tokio::main]
async fn main() {
let tenant = TenantScore {
config: TenantConfig {
id: Id::default(),
name: "TestTenant".to_string(),
..Default::default()
},
};
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
Inventory::autoload(),
K8sAnywhereTopology::new(),
)
.await
.unwrap();
maestro.register_all(vec![Box::new(tenant)]);
harmony_cli::init(maestro, None).await.unwrap();
}
// TODO write tests
// - Create Tenant with default config mostly, make sure namespace is created
// - deploy sample client/server app with nginx unprivileged and a service
// - exec in the client pod and validate the following
// - can reach internet
// - can reach server pod
// - can resolve dns queries to internet
// - can resolve dns queries to services
// - cannot reach services and pods in other namespaces
// - Create Tenant with specific cpu/ram/storage requests / limits and make sure they are enforced by trying to
// deploy a pod with lower requests/limits (accepted) and higher requests/limits (rejected)
// - Create TenantCredentials and make sure they give only access to the correct tenant

View File

@@ -6,6 +6,8 @@ readme.workspace = true
license.workspace = true
[dependencies]
rand = "0.9"
hex = "0.4"
libredfish = "0.1.1"
reqwest = { version = "0.11", features = ["blocking", "json"] }
russh = "0.45.0"

View File

@@ -1,5 +1,23 @@
use rand::distr::Alphanumeric;
use rand::distr::SampleString;
use std::time::SystemTime;
use std::time::UNIX_EPOCH;
use serde::{Deserialize, Serialize};
/// A unique identifier designed for ease of use.
///
/// You can pass it any String to use and Id, or you can use the default format with `Id::default()`
///
/// The default format looks like this
///
/// `462d4c_g2COgai`
///
/// The first part is the unix timesamp in hexadecimal which makes Id easily sorted by creation time.
/// Second part is a serie of 7 random characters.
///
/// **It is not meant to be very secure or unique**, it is suitable to generate up to 10 000 items per
/// second with a reasonable collision rate of 0,000014 % as calculated by this calculator : https://kevingal.com/apps/collision.html
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Id {
value: String,
@@ -16,3 +34,20 @@ impl std::fmt::Display for Id {
f.write_str(&self.value)
}
}
impl Default for Id {
fn default() -> Self {
let start = SystemTime::now();
let since_the_epoch = start
.duration_since(UNIX_EPOCH)
.expect("Time went backwards");
let timestamp = since_the_epoch.as_secs();
let hex_timestamp = format!("{:x}", timestamp & 0xffffff);
let random_part: String = Alphanumeric.sample_string(&mut rand::rng(), 7);
let value = format!("{}_{}", hex_timestamp, random_part);
Self { value }
}
}

View File

@@ -1,4 +1,4 @@
use std::{process::Command, sync::Arc};
use std::{io::Error, process::Command, sync::Arc};
use async_trait::async_trait;
use inquire::Confirm;
@@ -10,21 +10,13 @@ use crate::{
interpret::{InterpretError, Outcome},
inventory::Inventory,
maestro::Maestro,
modules::{
k3d::K3DInstallationScore,
monitoring::kube_prometheus::kube_prometheus_helm_chart_score::kube_prometheus_helm_chart_score,
},
modules::k3d::K3DInstallationScore,
topology::LocalhostTopology,
};
use super::{
HelmCommand, K8sclient, Topology,
k8s::K8sClient,
oberservability::{
K8sMonitorConfig,
k8s::K8sMonitor,
monitoring::{AlertChannel, AlertChannelConfig, Monitor},
},
tenant::{
ResourceLimits, TenantConfig, TenantManager, TenantNetworkPolicy, k8s::K8sTenantManager,
},
@@ -45,7 +37,6 @@ enum K8sSource {
pub struct K8sAnywhereTopology {
k8s_state: OnceCell<Option<K8sState>>,
tenant_manager: OnceCell<K8sTenantManager>,
k8s_monitor: OnceCell<K8sMonitor>,
}
#[async_trait]
@@ -70,7 +61,6 @@ impl K8sAnywhereTopology {
Self {
k8s_state: OnceCell::new(),
tenant_manager: OnceCell::new(),
k8s_monitor: OnceCell::new(),
}
}
@@ -180,6 +170,22 @@ impl K8sAnywhereTopology {
Ok(Some(state))
}
async fn ensure_k8s_tenant_manager(&self) -> Result<(), String> {
if let Some(_) = self.tenant_manager.get() {
return Ok(());
}
self.tenant_manager
.get_or_try_init(async || -> Result<K8sTenantManager, String> {
let k8s_client = self.k8s_client().await?;
Ok(K8sTenantManager::new(k8s_client))
})
.await
.unwrap();
Ok(())
}
fn get_k8s_tenant_manager(&self) -> Result<&K8sTenantManager, ExecutorError> {
match self.tenant_manager.get() {
Some(t) => Ok(t),
@@ -188,30 +194,6 @@ impl K8sAnywhereTopology {
)),
}
}
async fn ensure_k8s_monitor(&self) -> Result<(), String> {
if let Some(_) = self.k8s_monitor.get() {
return Ok(());
}
self.k8s_monitor
.get_or_try_init(async || -> Result<K8sMonitor, String> {
let config = K8sMonitorConfig::cluster_monitor();
Ok(K8sMonitor { config })
})
.await
.unwrap();
Ok(())
}
fn get_k8s_monitor(&self) -> Result<&K8sMonitor, ExecutorError> {
match self.k8s_monitor.get() {
Some(k) => Ok(k),
None => Err(ExecutorError::UnexpectedError(
"K8sMonitor not available".to_string(),
)),
}
}
}
struct K8sAnywhereConfig {
@@ -251,7 +233,7 @@ impl Topology for K8sAnywhereTopology {
"No K8s client could be found or installed".to_string(),
))?;
self.ensure_k8s_monitor()
self.ensure_k8s_tenant_manager()
.await
.map_err(|e| InterpretError::new(e))?;
@@ -301,20 +283,3 @@ impl TenantManager for K8sAnywhereTopology {
.await
}
}
#[async_trait]
impl Monitor for K8sAnywhereTopology {
async fn provision_monitor<T: Topology + HelmCommand>(
&self,
inventory: &Inventory,
topology: &T,
alert_receivers: Option<Vec<Box<dyn AlertChannelConfig>>>,
) -> Result<Outcome, InterpretError> {
self.get_k8s_monitor()?
.provision_monitor(inventory, topology, alert_receivers)
.await
}
fn delete_monitor(&self) -> Result<Outcome, InterpretError> {
todo!()
}
}

View File

@@ -1,71 +0,0 @@
use std::sync::Arc;
use async_trait::async_trait;
use serde::Serialize;
use crate::score::Score;
use crate::topology::HelmCommand;
use crate::{
interpret::{InterpretError, Outcome},
inventory::Inventory,
topology::Topology,
};
use super::{
K8sMonitorConfig,
monitoring::{AlertChannel, AlertChannelConfig, Monitor},
};
#[derive(Debug, Clone, Serialize)]
pub struct K8sMonitor {
pub config: K8sMonitorConfig,
}
#[async_trait]
impl Monitor for K8sMonitor {
async fn provision_monitor<T: Topology + HelmCommand>(
&self,
inventory: &Inventory,
topology: &T,
alert_channels: Option<Vec<Box<dyn AlertChannelConfig>>>,
) -> Result<Outcome, InterpretError> {
if let Some(channels) = alert_channels {
let alert_channels = self.build_alert_channels(channels).await?;
for channel in alert_channels {
channel.register_alert_channel().await?;
}
}
let chart = self.config.chart.clone();
chart
.create_interpret()
.execute(inventory, topology)
.await?;
Ok(Outcome::success("installed monitor".to_string()))
}
fn delete_monitor(&self) -> Result<Outcome, InterpretError> {
todo!()
}
}
#[async_trait]
impl AlertChannelConfig for K8sMonitor {
async fn build_alert_channel(&self) -> Result<Box<dyn AlertChannel>, InterpretError> {
todo!()
}
}
impl K8sMonitor {
pub async fn build_alert_channels(
&self,
alert_channel_configs: Vec<Box<dyn AlertChannelConfig>>,
) -> Result<Vec<Box<dyn AlertChannel>>, InterpretError> {
let mut alert_channels = Vec::new();
for config in alert_channel_configs {
let channel = config.build_alert_channel().await?;
alert_channels.push(channel)
}
Ok(alert_channels)
}
}

View File

@@ -1,23 +1 @@
use serde::Serialize;
use crate::modules::{
helm::chart::HelmChartScore,
monitoring::kube_prometheus::kube_prometheus_helm_chart_score::kube_prometheus_helm_chart_score,
};
pub mod k8s;
pub mod monitoring;
#[derive(Debug, Clone, Serialize)]
pub struct K8sMonitorConfig {
//probably need to do something better here
pub chart: HelmChartScore,
}
impl K8sMonitorConfig {
pub fn cluster_monitor() -> Self {
Self {
chart: kube_prometheus_helm_chart_score(),
}
}
}

View File

@@ -1,39 +1,31 @@
use async_trait::async_trait;
use dyn_clone::DynClone;
use std::fmt::Debug;
use crate::executors::ExecutorError;
use std::fmt::Debug;
use url::Url;
use crate::interpret::InterpretError;
use crate::inventory::Inventory;
use crate::topology::HelmCommand;
use crate::{interpret::Outcome, topology::Topology};
/// Represents an entity responsible for collecting and organizing observability data
/// from various telemetry sources such as Prometheus or Datadog
/// from various telemetry sources
/// A `Monitor` abstracts the logic required to scrape, aggregate, and structure
/// monitoring data, enabling consistent processing regardless of the underlying data source.
#[async_trait]
pub trait Monitor {
async fn provision_monitor<T: Topology + HelmCommand>(
pub trait Monitor<T: Topology>: Debug + Send + Sync {
async fn deploy_monitor(
&self,
inventory: &Inventory,
topology: &T,
alert_receivers: Option<Vec<Box<dyn AlertChannelConfig>>>,
alert_receivers: Vec<AlertReceiver>,
) -> Result<Outcome, InterpretError>;
fn delete_monitor(&self) -> Result<Outcome, InterpretError>;
async fn delete_monitor(
&self,
topolgy: &T,
alert_receivers: Vec<AlertReceiver>,
) -> Result<Outcome, InterpretError>;
}
#[async_trait]
pub trait AlertChannel: Debug + Send + Sync {
async fn register_alert_channel(&self) -> Result<Outcome, ExecutorError>;
//async fn get_channel_id(&self) -> String;
pub struct AlertReceiver {
pub receiver_id: String,
}
#[async_trait]
pub trait AlertChannelConfig: Debug + Send + Sync + DynClone {
async fn build_alert_channel(&self) -> Result<Box<dyn AlertChannel>, InterpretError>;
}
dyn_clone::clone_trait_object!(AlertChannelConfig);

View File

@@ -71,6 +71,21 @@ impl TenantManager for K8sTenantManager {
}
);
let network_policy = json!({
"apiVersion": "networking.k8s.io/v1",
"kind": "NetworkPolicy",
"metadata": {
"name": format!("{}-network-policy", config.name),
},
"spec": {
"podSelector": {},
"egress": [],
"ingress": [],
"policyTypes": [
]
}
});
}
async fn update_tenant_resource_limits(

View File

@@ -27,6 +27,28 @@ pub struct TenantConfig {
pub labels_or_tags: HashMap<String, String>,
}
impl Default for TenantConfig {
fn default() -> Self {
let id = Id::default();
Self {
name: format!("tenant_{id}"),
id,
resource_limits: ResourceLimits {
cpu_request_cores: 4.0,
cpu_limit_cores: 4.0,
memory_request_gb: 4.0,
memory_limit_gb: 4.0,
storage_total_gb: 20.0,
},
network_policy: TenantNetworkPolicy {
default_inter_tenant_ingress: InterTenantIngressPolicy::DenyAll,
default_internet_egress: InternetEgressPolicy::AllowAll,
},
labels_or_tags: HashMap::new(),
}
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
pub struct ResourceLimits {
/// Requested/guaranteed CPU cores (e.g., 2.0).

View File

@@ -1,42 +0,0 @@
use url::Url;
#[derive(Debug, Clone)]
pub struct DiscordWebhookAlertChannel {
pub webhook_url: Url,
pub name: String,
pub send_resolved_notifications: bool,
}
//impl AlertChannelConfig for DiscordWebhookAlertChannel {
// fn build_alert_channel(&self) -> Box<dyn AlertChannel> {
// Box::new(DiscordWebhookAlertChannel {
// webhook_url: self.webhook_url.clone(),
// name: self.name.clone(),
// send_resolved_notifications: self.send_resolved_notifications.clone(),
// })
// }
// fn channel_type(&self) -> String {
// "discord".to_string()
// }
//}
//
//#[async_trait]
//impl AlertChannel for DiscordWebhookAlertChannel {
// async fn get_channel_id(&self) -> String {
// self.name.clone()
// }
//}
//
//impl PrometheusAlertChannel for DiscordWebhookAlertChannel {
// fn get_alert_channel_global_settings(&self) -> Option<AlertManagerChannelGlobalConfigs> {
// None
// }
//
// fn get_alert_channel_route(&self) -> AlertManagerChannelRoute {
// todo!()
// }
//
// fn get_alert_channel_receiver(&self) -> AlertManagerChannelReceiver {
// todo!()
// }
//}

View File

@@ -1 +0,0 @@
pub mod discord_alert_channel;

View File

@@ -1,6 +1,6 @@
use serde::Serialize;
use super::types::AlertManagerChannelConfig;
use super::monitoring_alerting::AlertChannel;
#[derive(Debug, Clone, Serialize)]
pub struct KubePrometheusConfig {
@@ -21,7 +21,7 @@ pub struct KubePrometheusConfig {
pub kube_proxy: bool,
pub kube_state_metrics: bool,
pub prometheus_operator: bool,
pub alert_channels: Vec<AlertManagerChannelConfig>,
pub alert_channel: Vec<AlertChannel>,
}
impl KubePrometheusConfig {
pub fn new() -> Self {
@@ -30,6 +30,7 @@ impl KubePrometheusConfig {
default_rules: true,
windows_monitoring: false,
alert_manager: true,
alert_channel: Vec::new(),
grafana: true,
node_exporter: false,
prometheus: true,
@@ -43,7 +44,6 @@ impl KubePrometheusConfig {
prometheus_operator: true,
core_dns: false,
kube_scheduler: false,
alert_channels: Vec::new(),
}
}
}

View File

@@ -0,0 +1,35 @@
use std::str::FromStr;
use non_blank_string_rs::NonBlankString;
use url::Url;
use crate::modules::helm::chart::HelmChartScore;
pub fn discord_alert_manager_score(
webhook_url: Url,
namespace: String,
name: String,
) -> HelmChartScore {
let values = format!(
r#"
environment:
- name: "DISCORD_WEBHOOK"
value: "{webhook_url}"
"#,
);
HelmChartScore {
namespace: Some(NonBlankString::from_str(&namespace).unwrap()),
release_name: NonBlankString::from_str(&name).unwrap(),
chart_name: NonBlankString::from_str(
"oci://hub.nationtech.io/library/alertmanager-discord",
)
.unwrap(),
chart_version: None,
values_overrides: None,
values_yaml: Some(values.to_string()),
create_namespace: true,
install_only: true,
repository: None,
}
}

View File

@@ -0,0 +1,55 @@
use async_trait::async_trait;
use serde_json::Value;
use url::Url;
use crate::{
interpret::{InterpretError, Outcome},
topology::K8sAnywhereTopology,
};
#[derive(Debug, Clone)]
pub struct DiscordWebhookConfig {
pub webhook_url: Url,
pub name: String,
pub send_resolved_notifications: bool,
}
pub trait DiscordWebhookReceiver {
fn deploy_discord_webhook_receiver(
&self,
_notification_adapter_id: &str,
) -> Result<Outcome, InterpretError>;
fn delete_discord_webhook_receiver(
&self,
_notification_adapter_id: &str,
) -> Result<Outcome, InterpretError>;
}
// trait used to generate alert manager config values impl<T: Topology + AlertManagerConfig> Monitor for KubePrometheus
pub trait AlertManagerConfig<T> {
fn get_alert_manager_config(&self) -> Result<Value, InterpretError>;
}
#[async_trait]
impl<T: DiscordWebhookReceiver> AlertManagerConfig<T> for DiscordWebhookConfig {
fn get_alert_manager_config(&self) -> Result<Value, InterpretError> {
todo!()
}
}
#[async_trait]
impl DiscordWebhookReceiver for K8sAnywhereTopology {
fn deploy_discord_webhook_receiver(
&self,
_notification_adapter_id: &str,
) -> Result<Outcome, InterpretError> {
todo!()
}
fn delete_discord_webhook_receiver(
&self,
_notification_adapter_id: &str,
) -> Result<Outcome, InterpretError> {
todo!()
}
}

View File

@@ -1,13 +1,12 @@
use super::config::KubePrometheusConfig;
use super::{config::KubePrometheusConfig, monitoring_alerting::AlertChannel};
use log::info;
use non_blank_string_rs::NonBlankString;
use std::str::FromStr;
use std::{collections::HashMap, str::FromStr};
use url::Url;
use crate::modules::helm::chart::HelmChartScore;
pub fn kube_prometheus_helm_chart_score() -> HelmChartScore {
let config = KubePrometheusConfig::new();
pub fn kube_prometheus_helm_chart_score(config: &KubePrometheusConfig) -> HelmChartScore {
//TODO this should be make into a rule with default formatting that can be easily passed as a vec
//to the overrides or something leaving the user to deal with formatting here seems bad
let default_rules = config.default_rules.to_string();
@@ -145,6 +144,67 @@ prometheus:
enabled: {prometheus}
"#,
);
let alertmanager_config = alert_manager_yaml_builder(&config);
values.push_str(&alertmanager_config);
fn alert_manager_yaml_builder(config: &KubePrometheusConfig) -> String {
let mut receivers = String::new();
let mut routes = String::new();
let mut global_configs = String::new();
let alert_manager = config.alert_manager;
for alert_channel in &config.alert_channel {
match alert_channel {
AlertChannel::Discord { name, .. } => {
let (receiver, route) = discord_alert_builder(name);
info!("discord receiver: {} \nroute: {}", receiver, route);
receivers.push_str(&receiver);
routes.push_str(&route);
}
AlertChannel::Slack {
slack_channel,
webhook_url,
} => {
let (receiver, route) = slack_alert_builder(slack_channel);
info!("slack receiver: {} \nroute: {}", receiver, route);
receivers.push_str(&receiver);
routes.push_str(&route);
let global_config = format!(
r#"
global:
slack_api_url: {webhook_url}"#
);
global_configs.push_str(&global_config);
}
AlertChannel::Smpt { .. } => todo!(),
}
}
info!("after alert receiver: {}", receivers);
info!("after alert routes: {}", routes);
let alertmanager_config = format!(
r#"
alertmanager:
enabled: {alert_manager}
config: {global_configs}
route:
group_by: ['job']
group_wait: 30s
group_interval: 5m
repeat_interval: 12h
routes:
{routes}
receivers:
- name: 'null'
{receivers}"#
);
info!("alert manager config: {}", alertmanager_config);
alertmanager_config
}
HelmChartScore {
namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()),
release_name: NonBlankString::from_str("kube-prometheus").unwrap(),
@@ -160,102 +220,43 @@ prometheus:
repository: None,
}
}
// let alertmanager_config = alert_manager_yaml_builder(&config);
// values.push_str(&alertmanager_config);
//
// fn alert_manager_yaml_builder(config: &KubePrometheusConfig) -> String {
// let mut receivers = String::new();
// let mut routes = String::new();
// let mut global_configs = String::new();
// let alert_manager = config.alert_manager;
// for alert_channel in &config.alert_channel {
// match alert_channel {
// AlertChannel::Discord { name, .. } => {
// let (receiver, route) = discord_alert_builder(name);
// info!("discord receiver: {} \nroute: {}", receiver, route);
// receivers.push_str(&receiver);
// routes.push_str(&route);
// }
// AlertChannel::Slack {
// slack_channel,
// webhook_url,
// } => {
// let (receiver, route) = slack_alert_builder(slack_channel);
// info!("slack receiver: {} \nroute: {}", receiver, route);
// receivers.push_str(&receiver);
//
// routes.push_str(&route);
// let global_config = format!(
// r#"
// global:
// slack_api_url: {webhook_url}"#
// );
//
// global_configs.push_str(&global_config);
// }
// AlertChannel::Smpt { .. } => todo!(),
// }
// }
// info!("after alert receiver: {}", receivers);
// info!("after alert routes: {}", routes);
//
// let alertmanager_config = format!(
// r#"
//alertmanager:
// enabled: {alert_manager}
// config: {global_configs}
// route:
// group_by: ['job']
// group_wait: 30s
// group_interval: 5m
// repeat_interval: 12h
// routes:
//{routes}
// receivers:
// - name: 'null'
//{receivers}"#
// );
//
// info!("alert manager config: {}", alertmanager_config);
// alertmanager_config
// }
//fn discord_alert_builder(release_name: &String) -> (String, String) {
// let discord_receiver_name = format!("Discord-{}", release_name);
// let receiver = format!(
// r#"
// - name: '{discord_receiver_name}'
// webhook_configs:
// - url: 'http://{release_name}-alertmanager-discord:9094'
// send_resolved: true"#,
// );
// let route = format!(
// r#"
// - receiver: '{discord_receiver_name}'
// matchers:
// - alertname!=Watchdog
// continue: true"#,
// );
// (receiver, route)
//}
//
//fn slack_alert_builder(slack_channel: &String) -> (String, String) {
// let slack_receiver_name = format!("Slack-{}", slack_channel);
// let receiver = format!(
// r#"
// - name: '{slack_receiver_name}'
// slack_configs:
// - channel: '{slack_channel}'
// send_resolved: true
// title: '{{{{ .CommonAnnotations.title }}}}'
// text: '{{{{ .CommonAnnotations.description }}}}'"#,
// );
// let route = format!(
// r#"
// - receiver: '{slack_receiver_name}'
// matchers:
// - alertname!=Watchdog
// continue: true"#,
// );
// (receiver, route)
//}
fn discord_alert_builder(release_name: &String) -> (String, String) {
let discord_receiver_name = format!("Discord-{}", release_name);
let receiver = format!(
r#"
- name: '{discord_receiver_name}'
webhook_configs:
- url: 'http://{release_name}-alertmanager-discord:9094'
send_resolved: true"#,
);
let route = format!(
r#"
- receiver: '{discord_receiver_name}'
matchers:
- alertname!=Watchdog
continue: true"#,
);
(receiver, route)
}
fn slack_alert_builder(slack_channel: &String) -> (String, String) {
let slack_receiver_name = format!("Slack-{}", slack_channel);
let receiver = format!(
r#"
- name: '{slack_receiver_name}'
slack_configs:
- channel: '{slack_channel}'
send_resolved: true
title: '{{{{ .CommonAnnotations.title }}}}'
text: '{{{{ .CommonAnnotations.description }}}}'"#,
);
let route = format!(
r#"
- receiver: '{slack_receiver_name}'
matchers:
- alertname!=Watchdog
continue: true"#,
);
(receiver, route)
}

View File

@@ -1,85 +0,0 @@
//#[derive(Debug, Clone, Serialize)]
//pub struct KubePrometheusMonitorScore {
// pub kube_prometheus_config: KubePrometheusConfig,
// pub alert_channel_configs: Vec<dyn AlertChannelConfig>,
//}
//impl<T: Topology + Debug + HelmCommand + Monitor<T>> MonitorConfig<T>
// for KubePrometheusMonitorScore
//{
// fn build_monitor(&self) -> Box<dyn Monitor<T>> {
// Box::new(self.clone())
// }
//}
//impl<T: Topology + HelmCommand + Debug + Clone + 'static + Monitor<T>> Score<T>
// for KubePrometheusMonitorScore
//{
// fn create_interpret(&self) -> Box<dyn Interpret<T>> {
// Box::new(KubePrometheusMonitorInterpret {
// score: self.clone(),
// })
// }
//
// fn name(&self) -> String {
// "KubePrometheusMonitorScore".to_string()
// }
//}
//#[derive(Debug, Clone)]
//pub struct KubePrometheusMonitorInterpret {
// score: KubePrometheusMonitorScore,
//}
//#[async_trait]
//impl AlertChannelConfig for KubePrometheusMonitorInterpret {
// async fn build_alert_channel(
// &self,
// ) -> Box<dyn AlertChannel> {
// todo!()
// }
//}
//#[async_trait]
//impl<T: Topology + HelmCommand + Debug + Monitor<T>> Interpret<T>
// for KubePrometheusMonitorInterpret
//{
// async fn execute(
// &self,
// inventory: &Inventory,
// topology: &T,
// ) -> Result<Outcome, InterpretError> {
// let monitor = self.score.build_monitor();
//
// let mut alert_channels = Vec::new();
// //for config in self.score.alert_channel_configs {
// // alert_channels.push(self.build_alert_channel());
// //}
//
// monitor
// .deploy_monitor(inventory, topology, alert_channels)
// .await
// }
//
// fn get_name(&self) -> InterpretName {
// todo!()
// }
//
// fn get_version(&self) -> Version {
// todo!()
// }
//
// fn get_status(&self) -> InterpretStatus {
// todo!()
// }
//
// fn get_children(&self) -> Vec<Id> {
// todo!()
// }
//}
//#[async_trait]
//pub trait PrometheusAlertChannel {
// fn get_alert_channel_global_settings(&self) -> Option<AlertManagerChannelGlobalConfigs>;
// fn get_alert_channel_route(&self) -> AlertManagerChannelRoute;
// fn get_alert_channel_receiver(&self) -> AlertManagerChannelReceiver;
//}

View File

@@ -1,4 +0,0 @@
pub mod config;
pub mod kube_prometheus_helm_chart_score;
pub mod kube_prometheus_monitor;
pub mod types;

View File

@@ -1,14 +0,0 @@
use serde::Serialize;
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerChannelConfig {
pub global_configs: AlertManagerChannelGlobalConfigs,
pub route: AlertManagerChannelRoute,
pub receiver: AlertManagerChannelReceiver,
}
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerChannelGlobalConfigs {}
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerChannelReceiver {}
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerChannelRoute {}

View File

@@ -1,3 +1,5 @@
pub mod alert_channel;
pub mod kube_prometheus;
mod config;
mod discord_alert_manager;
pub mod discord_webhook_sender;
mod kube_prometheus;
pub mod monitoring_alerting;

View File

@@ -1,54 +1,146 @@
use async_trait::async_trait;
use email_address::EmailAddress;
use log::info;
use serde::Serialize;
use url::Url;
use crate::{
data::{Id, Version},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
score::Score,
topology::{
HelmCommand, Topology,
oberservability::monitoring::{AlertChannelConfig, Monitor},
},
topology::{HelmCommand, Topology},
};
use super::{
config::KubePrometheusConfig, discord_alert_manager::discord_alert_manager_score,
kube_prometheus::kube_prometheus_helm_chart_score,
};
#[derive(Debug, Clone, Serialize)]
pub struct MonitoringAlertingScore {
#[serde(skip)]
pub alert_channel_configs: Option<Vec<Box<dyn AlertChannelConfig>>>,
pub enum AlertChannel {
Discord {
name: String,
webhook_url: Url,
},
Slack {
slack_channel: String,
webhook_url: Url,
},
//TODO test and implement in helm chart
//currently does not work
Smpt {
email_address: EmailAddress,
service_name: String,
},
}
impl<T: Topology + HelmCommand + Monitor> Score<T> for MonitoringAlertingScore {
#[derive(Debug, Clone, Serialize)]
pub struct MonitoringAlertingStackScore {
pub alert_channel: Vec<AlertChannel>,
pub namespace: Option<String>,
}
impl MonitoringAlertingStackScore {
pub fn new() -> Self {
Self {
alert_channel: Vec::new(),
namespace: None,
}
}
}
impl<T: Topology + HelmCommand> Score<T> for MonitoringAlertingStackScore {
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(MonitoringAlertingInterpret {
Box::new(MonitoringAlertingStackInterpret {
score: self.clone(),
})
}
fn name(&self) -> String {
"MonitoringAlertingScore".to_string()
format!("MonitoringAlertingStackScore")
}
}
#[derive(Debug)]
struct MonitoringAlertingInterpret {
score: MonitoringAlertingScore,
#[derive(Debug, Clone, Serialize)]
struct MonitoringAlertingStackInterpret {
score: MonitoringAlertingStackScore,
}
impl MonitoringAlertingStackInterpret {
async fn build_kube_prometheus_helm_chart_config(&self) -> KubePrometheusConfig {
let mut config = KubePrometheusConfig::new();
if let Some(ns) = &self.score.namespace {
config.namespace = ns.clone();
}
config.alert_channel = self.score.alert_channel.clone();
config
}
async fn deploy_kube_prometheus_helm_chart_score<T: Topology + HelmCommand>(
&self,
inventory: &Inventory,
topology: &T,
config: &KubePrometheusConfig,
) -> Result<Outcome, InterpretError> {
let helm_chart = kube_prometheus_helm_chart_score(config);
helm_chart
.create_interpret()
.execute(inventory, topology)
.await
}
async fn deploy_alert_channel_service<T: Topology + HelmCommand>(
&self,
inventory: &Inventory,
topology: &T,
config: &KubePrometheusConfig,
) -> Result<Outcome, InterpretError> {
//let mut outcomes = vec![];
//for channel in &self.score.alert_channel {
// let outcome = match channel {
// AlertChannel::Discord { .. } => {
// discord_alert_manager_score(config)
// .create_interpret()
// .execute(inventory, topology)
// .await
// }
// AlertChannel::Slack { .. } => Ok(Outcome::success(
// "No extra configs for slack alerting".to_string(),
// )),
// AlertChannel::Smpt { .. } => {
// todo!()
// }
// };
// outcomes.push(outcome);
//}
//for result in outcomes {
// result?;
//}
Ok(Outcome::success("All alert channels deployed".to_string()))
}
}
#[async_trait]
impl<T: Topology + HelmCommand + Monitor> Interpret<T> for MonitoringAlertingInterpret {
impl<T: Topology + HelmCommand> Interpret<T> for MonitoringAlertingStackInterpret {
async fn execute(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
topology
.provision_monitor(
inventory,
topology,
self.score.alert_channel_configs.clone(),
)
.await
let config = self.build_kube_prometheus_helm_chart_config().await;
info!("Built kube prometheus config");
info!("Installing kube prometheus chart");
self.deploy_kube_prometheus_helm_chart_score(inventory, topology, &config)
.await?;
info!("Installing alert channel service");
self.deploy_alert_channel_service(inventory, topology, &config)
.await?;
Ok(Outcome::success(format!(
"succesfully deployed monitoring and alerting stack"
)))
}
fn get_name(&self) -> InterpretName {

View File

@@ -14,7 +14,7 @@ use crate::{
#[derive(Debug, Serialize, Clone)]
pub struct TenantScore {
config: TenantConfig,
pub config: TenantConfig,
}
impl<T: Topology + TenantManager> Score<T> for TenantScore {