Compare commits

..

27 Commits

Author SHA1 Message Date
613def5e0b feat: depoloys cluster monitoring stack from monitoring score on k8sanywhere topology
All checks were successful
Run Check Script / check (push) Successful in 1m46s
Run Check Script / check (pull_request) Successful in 1m47s
2025-06-11 15:06:39 -04:00
238d1f85e2 wip: impl k8sMonitor
Some checks failed
Run Check Script / check (push) Failing after 45s
Run Check Script / check (pull_request) Failing after 42s
2025-06-11 13:35:07 -04:00
dbc66f3d0c feat: setup basic structure to for the concrete implementation of kube prometheus monitor, removed discord webhook receiver trait as the dependency is no longer required for prometheus to interact with discord
All checks were successful
Run Check Script / check (push) Successful in 1m47s
Run Check Script / check (pull_request) Successful in 1m49s
2025-06-06 16:41:17 -04:00
31e59937dc Merge pull request 'feat: Initial setup for monitoring and alerting' (#48) from feat/monitor into master
All checks were successful
Run Check Script / check (push) Successful in 1m50s
Reviewed-on: #48
Reviewed-by: johnride <jg@nationtech.io>
2025-06-03 18:17:13 +00:00
12eb4ae31f fix: cargo fmt
All checks were successful
Run Check Script / check (push) Successful in 1m47s
Run Check Script / check (pull_request) Successful in 1m47s
2025-06-02 16:20:49 -04:00
a2be9457b9 wip: removed AlertReceiverConfig
Some checks failed
Run Check Script / check (push) Failing after 44s
Run Check Script / check (pull_request) Failing after 44s
2025-06-02 16:11:36 -04:00
0d56fbc09d wip: applied comments in pr, changed naming of AlertChannel to AlertReceiver and added rust doc to Monitor for clarity
All checks were successful
Run Check Script / check (push) Successful in 1m49s
Run Check Script / check (pull_request) Successful in 1m47s
2025-06-02 14:44:43 -04:00
56dc1e93c1 fix: modified files in mod
All checks were successful
Run Check Script / check (push) Successful in 1m48s
Run Check Script / check (pull_request) Successful in 1m46s
2025-06-02 11:47:21 -04:00
691540fe64 wip: modified initial monitoring architecture based on pr review
Some checks failed
Run Check Script / check (push) Failing after 46s
Run Check Script / check (pull_request) Failing after 43s
2025-06-02 11:42:37 -04:00
7e3f1b1830 fix:cargo fmt
All checks were successful
Run Check Script / check (push) Successful in 1m45s
Run Check Script / check (pull_request) Successful in 1m45s
2025-05-30 13:59:29 -04:00
b631e8ccbb feat: Initial setup for monitoring and alerting
Some checks failed
Run Check Script / check (push) Failing after 43s
Run Check Script / check (pull_request) Failing after 45s
2025-05-30 13:21:38 -04:00
60f2f31d6c feat: Add TenantScore and TenantInterpret (#45)
All checks were successful
Run Check Script / check (push) Successful in 1m47s
Reviewed-on: #45
Co-authored-by: Jean-Gabriel Gill-Couture <jg@nationtech.io>
Co-committed-by: Jean-Gabriel Gill-Couture <jg@nationtech.io>
2025-05-30 13:13:43 +00:00
27f1a9dbdd feat: add more to the tenantmanager k8s impl (#46)
All checks were successful
Run Check Script / check (push) Successful in 1m55s
Co-authored-by: Willem <wrolleman@nationtech.io>
Reviewed-on: #46
Co-authored-by: Taha Hawa <taha@taha.dev>
Co-committed-by: Taha Hawa <taha@taha.dev>
2025-05-29 20:15:38 +00:00
e7917843bc Merge pull request 'feat: Add initial Tenant traits and data structures' (#43) from feat/tenant into master
Some checks failed
Run Check Script / check (push) Has been cancelled
Reviewed-on: #43
2025-05-29 15:51:33 +00:00
7cd541bdd8 chore: Fix pr comments, remove many YAGNI things
All checks were successful
Run Check Script / check (push) Successful in 1m46s
Run Check Script / check (pull_request) Successful in 1m46s
2025-05-29 11:47:25 -04:00
270dd49567 Merge pull request 'docs: Add CONTRIBUTING.md guide' (#44) from doc/contributor into master
All checks were successful
Run Check Script / check (push) Successful in 1m46s
Reviewed-on: #44
2025-05-29 14:48:18 +00:00
0187300473 docs: Add CONTRIBUTING.md guide
All checks were successful
Run Check Script / check (push) Successful in 1m46s
Run Check Script / check (pull_request) Successful in 1m47s
2025-05-29 10:47:38 -04:00
bf16566b4e wip: Clean up some unnecessary bits in the Tenant module and move manager to its own file
All checks were successful
Run Check Script / check (push) Successful in 1m48s
Run Check Script / check (pull_request) Successful in 1m46s
2025-05-29 07:25:45 -04:00
895fb02f4e feat: Add initial Tenant traits and data structures
All checks were successful
Run Check Script / check (push) Successful in 1m46s
Run Check Script / check (pull_request) Successful in 1m45s
2025-05-28 22:33:46 -04:00
88d6af9815 Merge pull request 'feat/basicCI' (#42) from feat/basicCI into master
All checks were successful
Run Check Script / check (push) Successful in 1m50s
Reviewed-on: #42
Reviewed-by: taha <taha@noreply.git.nationtech.io>
2025-05-28 19:42:19 +00:00
5aa9dc701f fix: Removed forgotten refactoring bits and formatting
All checks were successful
Run Check Script / check (push) Successful in 1m46s
Run Check Script / check (pull_request) Successful in 1m48s
2025-05-28 15:19:39 -04:00
f4ef895d2e feat: Add basic CI configuration
Some checks failed
Run Check Script / check (push) Failing after 51s
2025-05-28 14:40:19 -04:00
6e7148a945 Merge pull request 'adr: Add ADR on multi tenancy using namespace based customer isolation' (#41) from adr/multi-tenancy into master
Reviewed-on: #41
2025-05-26 20:26:36 +00:00
83453273c6 adr: Add ADR on multi tenancy using namespace based customer isolation 2025-05-26 11:56:45 -04:00
76ae5eb747 fix: make HelmRepository public (#39)
Co-authored-by: tahahawa <tahahawa@gmail.com>
Reviewed-on: #39
Reviewed-by: johnride <jg@nationtech.io>
2025-05-22 20:07:42 +00:00
9c51040f3b Merge pull request 'feat:added Slack notifications support' (#38) from feat/slack-notifs into master
Reviewed-on: #38
Reviewed-by: johnride <jg@nationtech.io>
2025-05-22 20:04:51 +00:00
19bd47a545 Merge pull request 'monitoringalerting' (#37) from monitoringalerting into master
Reviewed-on: #37
Reviewed-by: johnride <jg@nationtech.io>
2025-05-21 17:32:26 +00:00
35 changed files with 1071 additions and 306 deletions

View File

@@ -0,0 +1,14 @@
name: Run Check Script
on:
push:
pull_request:
jobs:
check:
runs-on: rust-cargo
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Run check script
run: bash check.sh

36
CONTRIBUTING.md Normal file
View File

@@ -0,0 +1,36 @@
# Contributing to the Harmony project
## Write small P-R
Aim for the smallest piece of work that is mergeable.
Mergeable means that :
- it does not break the build
- it moves the codebase one step forward
P-Rs can be many things, they do not have to be complete features.
### What a P-R **should** be
- Introduce a new trait : This will be the place to discuss the new trait addition, its design and implementation
- A new implementation of a trait : a new concrete implementation of the LoadBalancer trait
- A new CI check : something that improves quality, robustness, ci performance
- Documentation improvements
- Refactoring
- Bugfix
### What a P-R **should not** be
- Large. Anything over 200 lines (excluding generated lines) should have a very good reason to be this large.
- A mix of refactoring, bug fixes and new features.
- Introducing multiple new features or ideas at once.
- Multiple new implementations of a trait/functionnality at once
The general idea is to keep P-Rs small and single purpose.
## Commit message formatting
We follow conventional commits guidelines.
https://www.conventionalcommits.org/en/v1.0.0/

View File

@@ -1,6 +1,6 @@
# Architecture Decision Record: \<Title\>
Name: \<Name\>
Initial Author: \<Name\>
Initial Date: \<Date\>

View File

@@ -1,6 +1,6 @@
# Architecture Decision Record: Helm and Kustomize Handling
Name: Taha Hawa
Initial Author: Taha Hawa
Initial Date: 2025-04-15

View File

@@ -1,7 +1,7 @@
# Architecture Decision Record: Monitoring and Alerting
Proposed by: Willem Rolleman
Date: April 28 2025
Initial Author : Willem Rolleman
Date : April 28 2025
## Status

View File

@@ -0,0 +1,160 @@
# Architecture Decision Record: Multi-Tenancy Strategy for Harmony Managed Clusters
Initial Author: Jean-Gabriel Gill-Couture
Initial Date: 2025-05-26
## Status
Proposed
## Context
Harmony manages production OKD/Kubernetes clusters that serve multiple clients with varying trust levels and operational requirements. We need a multi-tenancy strategy that provides:
1. **Strong isolation** between client workloads while maintaining operational simplicity
2. **Controlled API access** allowing clients self-service capabilities within defined boundaries
3. **Security-first approach** protecting both the cluster infrastructure and tenant data
4. **Harmony-native implementation** using our Score/Interpret pattern for automated tenant provisioning
5. **Scalable management** supporting both small trusted clients and larger enterprise customers
The official Kubernetes multi-tenancy documentation identifies two primary models: namespace-based isolation and virtual control planes per tenant. Given Harmony's focus on operational simplicity, provider-agnostic abstractions (ADR-003), and hexagonal architecture (ADR-002), we must choose an approach that balances security, usability, and maintainability.
Our clients represent a hybrid tenancy model:
- **Customer multi-tenancy**: Each client operates independently with no cross-tenant trust
- **Team multi-tenancy**: Individual clients may have multiple team members requiring coordinated access
- **API access requirement**: Unlike pure SaaS scenarios, clients need controlled Kubernetes API access for self-service operations
The official kubernetes documentation on multi tenancy heavily inspired this ADR : https://kubernetes.io/docs/concepts/security/multi-tenancy/
## Decision
Implement **namespace-based multi-tenancy** with the following architecture:
### 1. Network Security Model
- **Private cluster access**: Kubernetes API and OpenShift console accessible only via WireGuard VPN
- **No public exposure**: Control plane endpoints remain internal to prevent unauthorized access attempts
- **VPN-based authentication**: Initial access control through WireGuard client certificates
### 2. Tenant Isolation Strategy
- **Dedicated namespace per tenant**: Each client receives an isolated namespace with access limited only to the required resources and operations
- **Complete network isolation**: NetworkPolicies prevent cross-namespace communication while allowing full egress to public internet
- **Resource governance**: ResourceQuotas and LimitRanges enforce CPU, memory, and storage consumption limits
- **Storage access control**: Clients can create PersistentVolumeClaims but cannot directly manipulate PersistentVolumes or access other tenants' storage
### 3. Access Control Framework
- **Principle of Least Privilege**: RBAC grants only necessary permissions within tenant namespace scope
- **Namespace-scoped**: Clients can create/modify/delete resources within their namespace
- **Cluster-level restrictions**: No access to cluster-wide resources, other namespaces, or sensitive cluster operations
- **Whitelisted operations**: Controlled self-service capabilities for ingress, secrets, configmaps, and workload management
### 4. Identity Management Evolution
- **Phase 1**: Manual provisioning of VPN access and Kubernetes ServiceAccounts/Users
- **Phase 2**: Migration to Keycloak-based identity management (aligning with ADR-006) for centralized authentication and lifecycle management
### 5. Harmony Integration
- **TenantScore implementation**: Declarative tenant provisioning using Harmony's Score/Interpret pattern
- **Topology abstraction**: Tenant configuration abstracted from underlying Kubernetes implementation details
- **Automated deployment**: Complete tenant setup automated through Harmony's orchestration capabilities
## Rationale
### Network Security Through VPN Access
- **Defense in depth**: VPN requirement adds critical security layer preventing unauthorized cluster access
- **Simplified firewall rules**: No need for complex public endpoint protections or rate limiting
- **Audit capability**: VPN access provides clear audit trail of cluster connections
- **Aligns with enterprise practices**: Most enterprise customers already use VPN infrastructure
### Namespace Isolation vs Virtual Control Planes
Following Kubernetes official guidance, namespace isolation provides:
- **Lower resource overhead**: Virtual control planes require dedicated etcd, API server, and controller manager per tenant
- **Operational simplicity**: Single control plane to maintain, upgrade, and monitor
- **Cross-tenant service integration**: Enables future controlled cross-tenant communication if required
- **Proven stability**: Namespace-based isolation is well-tested and widely deployed
- **Cost efficiency**: Significantly lower infrastructure costs compared to dedicated control planes
### Hybrid Tenancy Model Suitability
Our approach addresses both customer and team multi-tenancy requirements:
- **Customer isolation**: Strong network and RBAC boundaries prevent cross-tenant interference
- **Team collaboration**: Multiple team members can share namespace access through group-based RBAC
- **Self-service balance**: Controlled API access enables client autonomy without compromising security
### Harmony Architecture Alignment
- **Provider agnostic**: TenantScore abstracts multi-tenancy concepts, enabling future support for other Kubernetes distributions
- **Hexagonal architecture**: Tenant management becomes an infrastructure capability accessed through well-defined ports
- **Declarative automation**: Tenant lifecycle fully managed through Harmony's Score execution model
## Consequences
### Positive Consequences
- **Strong security posture**: VPN + namespace isolation provides robust tenant separation
- **Operational efficiency**: Single cluster management with automated tenant provisioning
- **Client autonomy**: Self-service capabilities reduce operational support burden
- **Scalable architecture**: Can support hundreds of tenants per cluster without architectural changes
- **Future flexibility**: Foundation supports evolution to more sophisticated multi-tenancy models
- **Cost optimization**: Shared infrastructure maximizes resource utilization
### Negative Consequences
- **VPN operational overhead**: Requires VPN infrastructure management
- **Manual provisioning complexity**: Phase 1 manual user management creates administrative burden
- **Network policy dependency**: Requires CNI with NetworkPolicy support (OVN-Kubernetes provides this and is the OKD/Openshift default)
- **Cluster-wide resource limitations**: Some advanced Kubernetes features require cluster-wide access
- **Single point of failure**: Cluster outage affects all tenants simultaneously
### Migration Challenges
- **Legacy client integration**: Existing clients may need VPN client setup and credential migration
- **Monitoring complexity**: Per-tenant observability requires careful metric and log segmentation
- **Backup considerations**: Tenant data backup must respect isolation boundaries
## Alternatives Considered
### Alternative 1: Virtual Control Plane Per Tenant
**Pros**: Complete control plane isolation, full Kubernetes API access per tenant
**Cons**: 3-5x higher resource usage, complex cross-tenant networking, operational complexity scales linearly with tenants
**Rejected**: Resource overhead incompatible with cost-effective multi-tenancy goals
### Alternative 2: Dedicated Clusters Per Tenant
**Pros**: Maximum isolation, independent upgrade cycles, simplified security model
**Cons**: Exponential operational complexity, prohibitive costs, resource waste
**Rejected**: Operational overhead makes this approach unsustainable for multiple clients
### Alternative 3: Public API with Advanced Authentication
**Pros**: No VPN requirement, potentially simpler client access
**Cons**: Larger attack surface, complex rate limiting and DDoS protection, increased security monitoring requirements
**Rejected**: Risk/benefit analysis favors VPN-based access control
### Alternative 4: Service Mesh Based Isolation
**Pros**: Fine-grained traffic control, encryption, advanced observability
**Cons**: Significant operational complexity, performance overhead, steep learning curve
**Rejected**: Complexity overhead outweighs benefits for current requirements; remains option for future enhancement
## Additional Notes
### Implementation Roadmap
1. **Phase 1**: Implement VPN access and manual tenant provisioning
2. **Phase 2**: Deploy TenantScore automation for namespace, RBAC, and NetworkPolicy management
3. **Phase 3**: Integrate Keycloak for centralized identity management
4. **Phase 4**: Add advanced monitoring and per-tenant observability
### TenantScore Structure Preview
```rust
pub struct TenantScore {
pub tenant_config: TenantConfig,
pub resource_quotas: ResourceQuotaConfig,
pub network_isolation: NetworkIsolationPolicy,
pub storage_access: StorageAccessConfig,
pub rbac_config: RBACConfig,
}
```
### Future Enhancements
- **Cross-tenant service mesh**: For approved inter-tenant communication
- **Advanced monitoring**: Per-tenant Prometheus/Grafana instances
- **Backup automation**: Tenant-scoped backup policies
- **Cost allocation**: Detailed per-tenant resource usage tracking
This ADR establishes the foundation for secure, scalable multi-tenancy in Harmony-managed clusters while maintaining operational simplicity and cost effectiveness. A follow-up ADR will detail the Tenant abstraction and user management mechanisms within the Harmony framework.

View File

@@ -2,12 +2,7 @@ use harmony::{
data::Version,
inventory::Inventory,
maestro::Maestro,
modules::{
lamp::{LAMPConfig, LAMPScore},
monitoring::monitoring_alerting::{
AlertChannel, MonitoringAlertingStackScore, WebhookServiceType,
},
},
modules::lamp::{LAMPConfig, LAMPScore},
topology::{K8sAnywhereTopology, Url},
};
@@ -45,17 +40,7 @@ async fn main() {
.await
.unwrap();
let url = url::Url::parse("https://discord.com/api/webhooks/dummy_channel/dummy_token")
.expect("invalid URL");
let mut monitoring_stack_score = MonitoringAlertingStackScore::new();
monitoring_stack_score.namespace = Some(lamp_stack.config.namespace.clone());
monitoring_stack_score.alert_channel = Some(AlertChannel::WebHookUrl {
url: url,
webhook_service_type: WebhookServiceType::Discord,
});
maestro.register_all(vec![Box::new(lamp_stack), Box::new(monitoring_stack_score)]);
maestro.register_all(vec![Box::new(lamp_stack)]);
// Here we bootstrap the CLI, this gives some nice features if you need them
harmony_cli::init(maestro, None).await.unwrap();
}

View File

@@ -0,0 +1,12 @@
[package]
name = "webhook_sender"
edition = "2024"
version.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
harmony = { version = "0.1.0", path = "../../harmony" }
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
tokio.workspace = true
url.workspace = true

View File

@@ -0,0 +1,23 @@
use harmony::{
inventory::Inventory,
maestro::Maestro,
modules::monitoring::monitoring_alerting::MonitoringAlertingScore,
topology::{K8sAnywhereTopology, oberservability::K8sMonitorConfig},
};
#[tokio::main]
async fn main() {
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
Inventory::autoload(),
K8sAnywhereTopology::new(),
)
.await
.unwrap();
let monitoring = MonitoringAlertingScore {
alert_channel_configs: None,
};
maestro.register_all(vec![Box::new(monitoring)]);
harmony_cli::init(maestro, None).await.unwrap();
}

View File

@@ -49,3 +49,4 @@ fqdn = { version = "0.4.6", features = [
"serde",
] }
temp-dir = "0.1.14"
dyn-clone = "1.0.19"

View File

@@ -1,6 +1,6 @@
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Id {
value: String,
}
@@ -10,3 +10,9 @@ impl Id {
Self { value }
}
}
impl std::fmt::Display for Id {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(&self.value)
}
}

View File

@@ -20,6 +20,7 @@ pub enum InterpretName {
Panic,
OPNSense,
K3dInstallation,
TenantInterpret,
}
impl std::fmt::Display for InterpretName {
@@ -35,6 +36,7 @@ impl std::fmt::Display for InterpretName {
InterpretName::Panic => f.write_str("Panic"),
InterpretName::OPNSense => f.write_str("OPNSense"),
InterpretName::K3dInstallation => f.write_str("K3dInstallation"),
InterpretName::TenantInterpret => f.write_str("Tenant"),
}
}
}

View File

@@ -6,14 +6,29 @@ use log::{info, warn};
use tokio::sync::OnceCell;
use crate::{
executors::ExecutorError,
interpret::{InterpretError, Outcome},
inventory::Inventory,
maestro::Maestro,
modules::k3d::K3DInstallationScore,
modules::{
k3d::K3DInstallationScore,
monitoring::kube_prometheus::kube_prometheus_helm_chart_score::kube_prometheus_helm_chart_score,
},
topology::LocalhostTopology,
};
use super::{HelmCommand, K8sclient, Topology, k8s::K8sClient};
use super::{
HelmCommand, K8sclient, Topology,
k8s::K8sClient,
oberservability::{
K8sMonitorConfig,
k8s::K8sMonitor,
monitoring::{AlertChannel, AlertChannelConfig, Monitor},
},
tenant::{
ResourceLimits, TenantConfig, TenantManager, TenantNetworkPolicy, k8s::K8sTenantManager,
},
};
struct K8sState {
client: Arc<K8sClient>,
@@ -21,6 +36,7 @@ struct K8sState {
message: String,
}
#[derive(Debug)]
enum K8sSource {
LocalK3d,
Kubeconfig,
@@ -28,6 +44,8 @@ enum K8sSource {
pub struct K8sAnywhereTopology {
k8s_state: OnceCell<Option<K8sState>>,
tenant_manager: OnceCell<K8sTenantManager>,
k8s_monitor: OnceCell<K8sMonitor>,
}
#[async_trait]
@@ -51,6 +69,8 @@ impl K8sAnywhereTopology {
pub fn new() -> Self {
Self {
k8s_state: OnceCell::new(),
tenant_manager: OnceCell::new(),
k8s_monitor: OnceCell::new(),
}
}
@@ -159,6 +179,39 @@ impl K8sAnywhereTopology {
Ok(Some(state))
}
fn get_k8s_tenant_manager(&self) -> Result<&K8sTenantManager, ExecutorError> {
match self.tenant_manager.get() {
Some(t) => Ok(t),
None => Err(ExecutorError::UnexpectedError(
"K8sTenantManager not available".to_string(),
)),
}
}
async fn ensure_k8s_monitor(&self) -> Result<(), String> {
if let Some(_) = self.k8s_monitor.get() {
return Ok(());
}
self.k8s_monitor
.get_or_try_init(async || -> Result<K8sMonitor, String> {
let config = K8sMonitorConfig::cluster_monitor();
Ok(K8sMonitor { config })
})
.await
.unwrap();
Ok(())
}
fn get_k8s_monitor(&self) -> Result<&K8sMonitor, ExecutorError> {
match self.k8s_monitor.get() {
Some(k) => Ok(k),
None => Err(ExecutorError::UnexpectedError(
"K8sMonitor not available".to_string(),
)),
}
}
}
struct K8sAnywhereConfig {
@@ -198,6 +251,10 @@ impl Topology for K8sAnywhereTopology {
"No K8s client could be found or installed".to_string(),
))?;
self.ensure_k8s_monitor()
.await
.map_err(|e| InterpretError::new(e))?;
match self.is_helm_available() {
Ok(()) => Ok(Outcome::success(format!(
"{} + helm available",
@@ -209,3 +266,55 @@ impl Topology for K8sAnywhereTopology {
}
impl HelmCommand for K8sAnywhereTopology {}
#[async_trait]
impl TenantManager for K8sAnywhereTopology {
async fn provision_tenant(&self, config: &TenantConfig) -> Result<(), ExecutorError> {
self.get_k8s_tenant_manager()?
.provision_tenant(config)
.await
}
async fn update_tenant_resource_limits(
&self,
tenant_name: &str,
new_limits: &ResourceLimits,
) -> Result<(), ExecutorError> {
self.get_k8s_tenant_manager()?
.update_tenant_resource_limits(tenant_name, new_limits)
.await
}
async fn update_tenant_network_policy(
&self,
tenant_name: &str,
new_policy: &TenantNetworkPolicy,
) -> Result<(), ExecutorError> {
self.get_k8s_tenant_manager()?
.update_tenant_network_policy(tenant_name, new_policy)
.await
}
async fn deprovision_tenant(&self, tenant_name: &str) -> Result<(), ExecutorError> {
self.get_k8s_tenant_manager()?
.deprovision_tenant(tenant_name)
.await
}
}
#[async_trait]
impl Monitor for K8sAnywhereTopology {
async fn provision_monitor<T: Topology + HelmCommand>(
&self,
inventory: &Inventory,
topology: &T,
alert_receivers: Option<Vec<Box<dyn AlertChannelConfig>>>,
) -> Result<Outcome, InterpretError> {
self.get_k8s_monitor()?
.provision_monitor(inventory, topology, alert_receivers)
.await
}
fn delete_monitor(&self) -> Result<Outcome, InterpretError> {
todo!()
}
}

View File

@@ -7,6 +7,12 @@ use serde::Serialize;
use super::{IpAddress, LogicalHost};
use crate::executors::ExecutorError;
impl std::fmt::Debug for dyn LoadBalancer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!("LoadBalancer {}", self.get_ip()))
}
}
#[async_trait]
pub trait LoadBalancer: Send + Sync {
fn get_ip(&self) -> IpAddress;
@@ -32,11 +38,6 @@ pub trait LoadBalancer: Send + Sync {
}
}
impl std::fmt::Debug for dyn LoadBalancer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!("LoadBalancer {}", self.get_ip()))
}
}
#[derive(Debug, PartialEq, Clone, Serialize)]
pub struct LoadBalancerService {
pub backend_servers: Vec<BackendServer>,

View File

@@ -3,6 +3,8 @@ mod host_binding;
mod http;
mod k8s_anywhere;
mod localhost;
pub mod oberservability;
pub mod tenant;
pub use k8s_anywhere::*;
pub use localhost::*;
pub mod k8s;

View File

@@ -0,0 +1,71 @@
use std::sync::Arc;
use async_trait::async_trait;
use serde::Serialize;
use crate::score::Score;
use crate::topology::HelmCommand;
use crate::{
interpret::{InterpretError, Outcome},
inventory::Inventory,
topology::Topology,
};
use super::{
K8sMonitorConfig,
monitoring::{AlertChannel, AlertChannelConfig, Monitor},
};
#[derive(Debug, Clone, Serialize)]
pub struct K8sMonitor {
pub config: K8sMonitorConfig,
}
#[async_trait]
impl Monitor for K8sMonitor {
async fn provision_monitor<T: Topology + HelmCommand>(
&self,
inventory: &Inventory,
topology: &T,
alert_channels: Option<Vec<Box<dyn AlertChannelConfig>>>,
) -> Result<Outcome, InterpretError> {
if let Some(channels) = alert_channels {
let alert_channels = self.build_alert_channels(channels).await?;
for channel in alert_channels {
channel.register_alert_channel().await?;
}
}
let chart = self.config.chart.clone();
chart
.create_interpret()
.execute(inventory, topology)
.await?;
Ok(Outcome::success("installed monitor".to_string()))
}
fn delete_monitor(&self) -> Result<Outcome, InterpretError> {
todo!()
}
}
#[async_trait]
impl AlertChannelConfig for K8sMonitor {
async fn build_alert_channel(&self) -> Result<Box<dyn AlertChannel>, InterpretError> {
todo!()
}
}
impl K8sMonitor {
pub async fn build_alert_channels(
&self,
alert_channel_configs: Vec<Box<dyn AlertChannelConfig>>,
) -> Result<Vec<Box<dyn AlertChannel>>, InterpretError> {
let mut alert_channels = Vec::new();
for config in alert_channel_configs {
let channel = config.build_alert_channel().await?;
alert_channels.push(channel)
}
Ok(alert_channels)
}
}

View File

@@ -0,0 +1,23 @@
use serde::Serialize;
use crate::modules::{
helm::chart::HelmChartScore,
monitoring::kube_prometheus::kube_prometheus_helm_chart_score::kube_prometheus_helm_chart_score,
};
pub mod k8s;
pub mod monitoring;
#[derive(Debug, Clone, Serialize)]
pub struct K8sMonitorConfig {
//probably need to do something better here
pub chart: HelmChartScore,
}
impl K8sMonitorConfig {
pub fn cluster_monitor() -> Self {
Self {
chart: kube_prometheus_helm_chart_score(),
}
}
}

View File

@@ -0,0 +1,39 @@
use async_trait::async_trait;
use dyn_clone::DynClone;
use std::fmt::Debug;
use crate::executors::ExecutorError;
use crate::interpret::InterpretError;
use crate::inventory::Inventory;
use crate::topology::HelmCommand;
use crate::{interpret::Outcome, topology::Topology};
/// Represents an entity responsible for collecting and organizing observability data
/// from various telemetry sources such as Prometheus or Datadog
/// A `Monitor` abstracts the logic required to scrape, aggregate, and structure
/// monitoring data, enabling consistent processing regardless of the underlying data source.
#[async_trait]
pub trait Monitor {
async fn provision_monitor<T: Topology + HelmCommand>(
&self,
inventory: &Inventory,
topology: &T,
alert_receivers: Option<Vec<Box<dyn AlertChannelConfig>>>,
) -> Result<Outcome, InterpretError>;
fn delete_monitor(&self) -> Result<Outcome, InterpretError>;
}
#[async_trait]
pub trait AlertChannel: Debug + Send + Sync {
async fn register_alert_channel(&self) -> Result<Outcome, ExecutorError>;
//async fn get_channel_id(&self) -> String;
}
#[async_trait]
pub trait AlertChannelConfig: Debug + Send + Sync + DynClone {
async fn build_alert_channel(&self) -> Result<Box<dyn AlertChannel>, InterpretError>;
}
dyn_clone::clone_trait_object!(AlertChannelConfig);

View File

@@ -0,0 +1,95 @@
use std::sync::Arc;
use crate::{executors::ExecutorError, topology::k8s::K8sClient};
use async_trait::async_trait;
use derive_new::new;
use k8s_openapi::api::core::v1::Namespace;
use serde_json::json;
use super::{ResourceLimits, TenantConfig, TenantManager, TenantNetworkPolicy};
#[derive(new)]
pub struct K8sTenantManager {
k8s_client: Arc<K8sClient>,
}
#[async_trait]
impl TenantManager for K8sTenantManager {
async fn provision_tenant(&self, config: &TenantConfig) -> Result<(), ExecutorError> {
let namespace = json!(
{
"apiVersion": "v1",
"kind": "Namespace",
"metadata": {
"labels": {
"harmony.nationtech.io/tenant.id": config.id,
"harmony.nationtech.io/tenant.name": config.name,
},
"name": config.name,
},
}
);
todo!("Validate that when tenant already exists (by id) that name has not changed");
let namespace: Namespace = serde_json::from_value(namespace).unwrap();
let resource_quota = json!(
{
"apiVersion": "v1",
"kind": "List",
"items": [
{
"apiVersion": "v1",
"kind": "ResourceQuota",
"metadata": {
"name": config.name,
"labels": {
"harmony.nationtech.io/tenant.id": config.id,
"harmony.nationtech.io/tenant.name": config.name,
},
"namespace": config.name,
},
"spec": {
"hard": {
"limits.cpu": format!("{:.0}",config.resource_limits.cpu_limit_cores),
"limits.memory": format!("{:.3}Gi", config.resource_limits.memory_limit_gb),
"requests.cpu": format!("{:.0}",config.resource_limits.cpu_request_cores),
"requests.memory": format!("{:.3}Gi", config.resource_limits.memory_request_gb),
"requests.storage": format!("{:.3}", config.resource_limits.storage_total_gb),
"pods": "20",
"services": "10",
"configmaps": "30",
"secrets": "30",
"persistentvolumeclaims": "15",
"services.loadbalancers": "2",
"services.nodeports": "5",
}
}
}
]
}
);
}
async fn update_tenant_resource_limits(
&self,
tenant_name: &str,
new_limits: &ResourceLimits,
) -> Result<(), ExecutorError> {
todo!()
}
async fn update_tenant_network_policy(
&self,
tenant_name: &str,
new_policy: &TenantNetworkPolicy,
) -> Result<(), ExecutorError> {
todo!()
}
async fn deprovision_tenant(&self, tenant_name: &str) -> Result<(), ExecutorError> {
todo!()
}
}

View File

@@ -0,0 +1,46 @@
use super::*;
use async_trait::async_trait;
use crate::executors::ExecutorError;
#[async_trait]
pub trait TenantManager {
/// Provisions a new tenant based on the provided configuration.
/// This operation should be idempotent; if a tenant with the same `config.name`
/// already exists and matches the config, it will succeed without changes.
/// If it exists but differs, it will be updated, or return an error if the update
/// action is not supported
///
/// # Arguments
/// * `config`: The desired configuration for the new tenant.
async fn provision_tenant(&self, config: &TenantConfig) -> Result<(), ExecutorError>;
/// Updates the resource limits for an existing tenant.
///
/// # Arguments
/// * `tenant_name`: The logical name of the tenant to update.
/// * `new_limits`: The new set of resource limits to apply.
async fn update_tenant_resource_limits(
&self,
tenant_name: &str,
new_limits: &ResourceLimits,
) -> Result<(), ExecutorError>;
/// Updates the high-level network isolation policy for an existing tenant.
///
/// # Arguments
/// * `tenant_name`: The logical name of the tenant to update.
/// * `new_policy`: The new network policy to apply.
async fn update_tenant_network_policy(
&self,
tenant_name: &str,
new_policy: &TenantNetworkPolicy,
) -> Result<(), ExecutorError>;
/// Decommissions an existing tenant, removing its isolated context and associated resources.
/// This operation should be idempotent.
///
/// # Arguments
/// * `tenant_name`: The logical name of the tenant to deprovision.
async fn deprovision_tenant(&self, tenant_name: &str) -> Result<(), ExecutorError>;
}

View File

@@ -0,0 +1,67 @@
pub mod k8s;
mod manager;
pub use manager::*;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use crate::data::Id;
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] // Assuming serde for Scores
pub struct TenantConfig {
/// This will be used as the primary unique identifier for management operations and will never
/// change for the entire lifetime of the tenant
pub id: Id,
/// A human-readable name for the tenant (e.g., "client-alpha", "project-phoenix").
pub name: String,
/// Desired resource allocations and limits for the tenant.
pub resource_limits: ResourceLimits,
/// High-level network isolation policies for the tenant.
pub network_policy: TenantNetworkPolicy,
/// Key-value pairs for provider-specific tagging, labeling, or metadata.
/// Useful for billing, organization, or filtering within the provider's console.
pub labels_or_tags: HashMap<String, String>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
pub struct ResourceLimits {
/// Requested/guaranteed CPU cores (e.g., 2.0).
pub cpu_request_cores: f32,
/// Maximum CPU cores the tenant can burst to (e.g., 4.0).
pub cpu_limit_cores: f32,
/// Requested/guaranteed memory in Gigabytes (e.g., 8.0).
pub memory_request_gb: f32,
/// Maximum memory in Gigabytes tenant can burst to (e.g., 16.0).
pub memory_limit_gb: f32,
/// Total persistent storage allocation in Gigabytes across all volumes.
pub storage_total_gb: f32,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct TenantNetworkPolicy {
/// Policy for ingress traffic originating from other tenants within the same Harmony-managed environment.
pub default_inter_tenant_ingress: InterTenantIngressPolicy,
/// Policy for egress traffic destined for the public internet.
pub default_internet_egress: InternetEgressPolicy,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum InterTenantIngressPolicy {
/// Deny all traffic from other tenants by default.
DenyAll,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum InternetEgressPolicy {
/// Allow all outbound traffic to the internet.
AllowAll,
/// Deny all outbound traffic to the internet by default.
DenyAll,
}

View File

@@ -23,7 +23,7 @@ pub struct HelmRepository {
force_update: bool,
}
impl HelmRepository {
pub(crate) fn new(name: String, url: Url, force_update: bool) -> Self {
pub fn new(name: String, url: Url, force_update: bool) -> Self {
Self {
name,
url,
@@ -104,7 +104,10 @@ impl HelmChartInterpret {
fn run_helm_command(args: &[&str]) -> Result<Output, InterpretError> {
let command_str = format!("helm {}", args.join(" "));
debug!("Got KUBECONFIG: `{}`", std::env::var("KUBECONFIG").unwrap());
debug!(
"Got KUBECONFIG: `{}`",
std::env::var("KUBECONFIG").unwrap_or("".to_string())
);
debug!("Running Helm command: `{}`", command_str);
let output = Command::new("helm")

View File

@@ -1,12 +1,9 @@
use async_trait::async_trait;
use log::debug;
use non_blank_string_rs::NonBlankString;
use serde::Serialize;
use std::collections::HashMap;
use std::env::temp_dir;
use std::ffi::OsStr;
use std::io::ErrorKind;
use std::path::{Path, PathBuf};
use std::path::PathBuf;
use std::process::{Command, Output};
use temp_dir::{self, TempDir};
use temp_file::TempFile;

View File

@@ -12,4 +12,5 @@ pub mod load_balancer;
pub mod monitoring;
pub mod okd;
pub mod opnsense;
pub mod tenant;
pub mod tftp;

View File

@@ -0,0 +1,42 @@
use url::Url;
#[derive(Debug, Clone)]
pub struct DiscordWebhookAlertChannel {
pub webhook_url: Url,
pub name: String,
pub send_resolved_notifications: bool,
}
//impl AlertChannelConfig for DiscordWebhookAlertChannel {
// fn build_alert_channel(&self) -> Box<dyn AlertChannel> {
// Box::new(DiscordWebhookAlertChannel {
// webhook_url: self.webhook_url.clone(),
// name: self.name.clone(),
// send_resolved_notifications: self.send_resolved_notifications.clone(),
// })
// }
// fn channel_type(&self) -> String {
// "discord".to_string()
// }
//}
//
//#[async_trait]
//impl AlertChannel for DiscordWebhookAlertChannel {
// async fn get_channel_id(&self) -> String {
// self.name.clone()
// }
//}
//
//impl PrometheusAlertChannel for DiscordWebhookAlertChannel {
// fn get_alert_channel_global_settings(&self) -> Option<AlertManagerChannelGlobalConfigs> {
// None
// }
//
// fn get_alert_channel_route(&self) -> AlertManagerChannelRoute {
// todo!()
// }
//
// fn get_alert_channel_receiver(&self) -> AlertManagerChannelReceiver {
// todo!()
// }
//}

View File

@@ -0,0 +1 @@
pub mod discord_alert_channel;

View File

@@ -1,46 +0,0 @@
use std::str::FromStr;
use non_blank_string_rs::NonBlankString;
use crate::modules::helm::chart::HelmChartScore;
use super::{config::KubePrometheusConfig, monitoring_alerting::AlertChannel};
fn get_discord_alert_manager_score(config: &KubePrometheusConfig) -> Option<HelmChartScore> {
let (url, name) = config.alert_channel.iter().find_map(|channel| {
if let AlertChannel::Discord { webhook_url, name } = channel {
Some((webhook_url, name))
} else {
None
}
})?;
let values = format!(
r#"
environment:
- name: "DISCORD_WEBHOOK"
value: "{url}"
"#,
);
Some(HelmChartScore {
namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()),
release_name: NonBlankString::from_str(&name).unwrap(),
chart_name: NonBlankString::from_str("oci://hub.nationtech.io/library/alertmanager-discord")
.unwrap(),
chart_version: None,
values_overrides: None,
values_yaml: Some(values.to_string()),
create_namespace: true,
install_only: true,
repository: None,
})
}
pub fn discord_alert_manager_score(config: &KubePrometheusConfig) -> HelmChartScore {
if let Some(chart) = get_discord_alert_manager_score(config) {
chart
} else {
panic!("Expected discord alert manager helm chart");
}
}

View File

@@ -1,7 +1,6 @@
use serde::Serialize;
use super::monitoring_alerting::AlertChannel;
use super::types::AlertManagerChannelConfig;
#[derive(Debug, Clone, Serialize)]
pub struct KubePrometheusConfig {
@@ -22,7 +21,7 @@ pub struct KubePrometheusConfig {
pub kube_proxy: bool,
pub kube_state_metrics: bool,
pub prometheus_operator: bool,
pub alert_channel: Vec<AlertChannel>,
pub alert_channels: Vec<AlertManagerChannelConfig>,
}
impl KubePrometheusConfig {
pub fn new() -> Self {
@@ -31,7 +30,6 @@ impl KubePrometheusConfig {
default_rules: true,
windows_monitoring: false,
alert_manager: true,
alert_channel: Vec::new(),
grafana: true,
node_exporter: false,
prometheus: true,
@@ -45,6 +43,7 @@ impl KubePrometheusConfig {
prometheus_operator: true,
core_dns: false,
kube_scheduler: false,
alert_channels: Vec::new(),
}
}
}

View File

@@ -1,12 +1,13 @@
use super::{config::KubePrometheusConfig, monitoring_alerting::AlertChannel};
use super::config::KubePrometheusConfig;
use log::info;
use non_blank_string_rs::NonBlankString;
use std::{collections::HashMap, str::FromStr};
use url::Url;
use std::str::FromStr;
use crate::modules::helm::chart::HelmChartScore;
pub fn kube_prometheus_helm_chart_score(config: &KubePrometheusConfig) -> HelmChartScore {
pub fn kube_prometheus_helm_chart_score() -> HelmChartScore {
let config = KubePrometheusConfig::new();
//TODO this should be make into a rule with default formatting that can be easily passed as a vec
//to the overrides or something leaving the user to deal with formatting here seems bad
let default_rules = config.default_rules.to_string();
@@ -144,67 +145,6 @@ prometheus:
enabled: {prometheus}
"#,
);
let alertmanager_config = alert_manager_yaml_builder(&config);
values.push_str(&alertmanager_config);
fn alert_manager_yaml_builder(config: &KubePrometheusConfig) -> String {
let mut receivers = String::new();
let mut routes = String::new();
let mut global_configs = String::new();
let alert_manager = config.alert_manager;
for alert_channel in &config.alert_channel {
match alert_channel {
AlertChannel::Discord { name, .. } => {
let (receiver, route) = discord_alert_builder(name);
info!("discord receiver: {} \nroute: {}", receiver, route);
receivers.push_str(&receiver);
routes.push_str(&route);
}
AlertChannel::Slack {
slack_channel,
webhook_url,
} => {
let (receiver, route) = slack_alert_builder(slack_channel);
info!("slack receiver: {} \nroute: {}", receiver, route);
receivers.push_str(&receiver);
routes.push_str(&route);
let global_config = format!(
r#"
global:
slack_api_url: {webhook_url}"#
);
global_configs.push_str(&global_config);
}
AlertChannel::Smpt { .. } => todo!(),
}
}
info!("after alert receiver: {}", receivers);
info!("after alert routes: {}", routes);
let alertmanager_config = format!(
r#"
alertmanager:
enabled: {alert_manager}
config: {global_configs}
route:
group_by: ['job']
group_wait: 30s
group_interval: 5m
repeat_interval: 12h
routes:
{routes}
receivers:
- name: 'null'
{receivers}"#
);
info!("alert manager config: {}", alertmanager_config);
alertmanager_config
}
HelmChartScore {
namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()),
release_name: NonBlankString::from_str("kube-prometheus").unwrap(),
@@ -220,43 +160,102 @@ alertmanager:
repository: None,
}
}
// let alertmanager_config = alert_manager_yaml_builder(&config);
// values.push_str(&alertmanager_config);
//
// fn alert_manager_yaml_builder(config: &KubePrometheusConfig) -> String {
// let mut receivers = String::new();
// let mut routes = String::new();
// let mut global_configs = String::new();
// let alert_manager = config.alert_manager;
// for alert_channel in &config.alert_channel {
// match alert_channel {
// AlertChannel::Discord { name, .. } => {
// let (receiver, route) = discord_alert_builder(name);
// info!("discord receiver: {} \nroute: {}", receiver, route);
// receivers.push_str(&receiver);
// routes.push_str(&route);
// }
// AlertChannel::Slack {
// slack_channel,
// webhook_url,
// } => {
// let (receiver, route) = slack_alert_builder(slack_channel);
// info!("slack receiver: {} \nroute: {}", receiver, route);
// receivers.push_str(&receiver);
//
// routes.push_str(&route);
// let global_config = format!(
// r#"
// global:
// slack_api_url: {webhook_url}"#
// );
//
// global_configs.push_str(&global_config);
// }
// AlertChannel::Smpt { .. } => todo!(),
// }
// }
// info!("after alert receiver: {}", receivers);
// info!("after alert routes: {}", routes);
//
// let alertmanager_config = format!(
// r#"
//alertmanager:
// enabled: {alert_manager}
// config: {global_configs}
// route:
// group_by: ['job']
// group_wait: 30s
// group_interval: 5m
// repeat_interval: 12h
// routes:
//{routes}
// receivers:
// - name: 'null'
//{receivers}"#
// );
//
// info!("alert manager config: {}", alertmanager_config);
// alertmanager_config
// }
fn discord_alert_builder(release_name: &String) -> (String, String) {
let discord_receiver_name = format!("Discord-{}", release_name);
let receiver = format!(
r#"
- name: '{discord_receiver_name}'
webhook_configs:
- url: 'http://{release_name}-alertmanager-discord:9094'
send_resolved: true"#,
);
let route = format!(
r#"
- receiver: '{discord_receiver_name}'
matchers:
- alertname!=Watchdog
continue: true"#,
);
(receiver, route)
}
fn slack_alert_builder(slack_channel: &String) -> (String, String) {
let slack_receiver_name = format!("Slack-{}", slack_channel);
let receiver = format!(
r#"
- name: '{slack_receiver_name}'
slack_configs:
- channel: '{slack_channel}'
send_resolved: true
title: '{{{{ .CommonAnnotations.title }}}}'
text: '{{{{ .CommonAnnotations.description }}}}'"#,
);
let route = format!(
r#"
- receiver: '{slack_receiver_name}'
matchers:
- alertname!=Watchdog
continue: true"#,
);
(receiver, route)
}
//fn discord_alert_builder(release_name: &String) -> (String, String) {
// let discord_receiver_name = format!("Discord-{}", release_name);
// let receiver = format!(
// r#"
// - name: '{discord_receiver_name}'
// webhook_configs:
// - url: 'http://{release_name}-alertmanager-discord:9094'
// send_resolved: true"#,
// );
// let route = format!(
// r#"
// - receiver: '{discord_receiver_name}'
// matchers:
// - alertname!=Watchdog
// continue: true"#,
// );
// (receiver, route)
//}
//
//fn slack_alert_builder(slack_channel: &String) -> (String, String) {
// let slack_receiver_name = format!("Slack-{}", slack_channel);
// let receiver = format!(
// r#"
// - name: '{slack_receiver_name}'
// slack_configs:
// - channel: '{slack_channel}'
// send_resolved: true
// title: '{{{{ .CommonAnnotations.title }}}}'
// text: '{{{{ .CommonAnnotations.description }}}}'"#,
// );
// let route = format!(
// r#"
// - receiver: '{slack_receiver_name}'
// matchers:
// - alertname!=Watchdog
// continue: true"#,
// );
// (receiver, route)
//}

View File

@@ -0,0 +1,85 @@
//#[derive(Debug, Clone, Serialize)]
//pub struct KubePrometheusMonitorScore {
// pub kube_prometheus_config: KubePrometheusConfig,
// pub alert_channel_configs: Vec<dyn AlertChannelConfig>,
//}
//impl<T: Topology + Debug + HelmCommand + Monitor<T>> MonitorConfig<T>
// for KubePrometheusMonitorScore
//{
// fn build_monitor(&self) -> Box<dyn Monitor<T>> {
// Box::new(self.clone())
// }
//}
//impl<T: Topology + HelmCommand + Debug + Clone + 'static + Monitor<T>> Score<T>
// for KubePrometheusMonitorScore
//{
// fn create_interpret(&self) -> Box<dyn Interpret<T>> {
// Box::new(KubePrometheusMonitorInterpret {
// score: self.clone(),
// })
// }
//
// fn name(&self) -> String {
// "KubePrometheusMonitorScore".to_string()
// }
//}
//#[derive(Debug, Clone)]
//pub struct KubePrometheusMonitorInterpret {
// score: KubePrometheusMonitorScore,
//}
//#[async_trait]
//impl AlertChannelConfig for KubePrometheusMonitorInterpret {
// async fn build_alert_channel(
// &self,
// ) -> Box<dyn AlertChannel> {
// todo!()
// }
//}
//#[async_trait]
//impl<T: Topology + HelmCommand + Debug + Monitor<T>> Interpret<T>
// for KubePrometheusMonitorInterpret
//{
// async fn execute(
// &self,
// inventory: &Inventory,
// topology: &T,
// ) -> Result<Outcome, InterpretError> {
// let monitor = self.score.build_monitor();
//
// let mut alert_channels = Vec::new();
// //for config in self.score.alert_channel_configs {
// // alert_channels.push(self.build_alert_channel());
// //}
//
// monitor
// .deploy_monitor(inventory, topology, alert_channels)
// .await
// }
//
// fn get_name(&self) -> InterpretName {
// todo!()
// }
//
// fn get_version(&self) -> Version {
// todo!()
// }
//
// fn get_status(&self) -> InterpretStatus {
// todo!()
// }
//
// fn get_children(&self) -> Vec<Id> {
// todo!()
// }
//}
//#[async_trait]
//pub trait PrometheusAlertChannel {
// fn get_alert_channel_global_settings(&self) -> Option<AlertManagerChannelGlobalConfigs>;
// fn get_alert_channel_route(&self) -> AlertManagerChannelRoute;
// fn get_alert_channel_receiver(&self) -> AlertManagerChannelReceiver;
//}

View File

@@ -0,0 +1,4 @@
pub mod config;
pub mod kube_prometheus_helm_chart_score;
pub mod kube_prometheus_monitor;
pub mod types;

View File

@@ -0,0 +1,14 @@
use serde::Serialize;
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerChannelConfig {
pub global_configs: AlertManagerChannelGlobalConfigs,
pub route: AlertManagerChannelRoute,
pub receiver: AlertManagerChannelReceiver,
}
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerChannelGlobalConfigs {}
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerChannelReceiver {}
#[derive(Debug, Clone, Serialize)]
pub struct AlertManagerChannelRoute {}

View File

@@ -1,4 +1,3 @@
mod kube_prometheus;
pub mod alert_channel;
pub mod kube_prometheus;
pub mod monitoring_alerting;
mod discord_alert_manager;
mod config;

View File

@@ -1,146 +1,54 @@
use async_trait::async_trait;
use email_address::EmailAddress;
use log::info;
use serde::Serialize;
use url::Url;
use crate::{
data::{Id, Version},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
score::Score,
topology::{HelmCommand, Topology},
};
use super::{
config::KubePrometheusConfig, discord_alert_manager::discord_alert_manager_score,
kube_prometheus::kube_prometheus_helm_chart_score,
topology::{
HelmCommand, Topology,
oberservability::monitoring::{AlertChannelConfig, Monitor},
},
};
#[derive(Debug, Clone, Serialize)]
pub enum AlertChannel {
Discord {
name: String,
webhook_url: Url,
},
Slack {
slack_channel: String,
webhook_url: Url,
},
//TODO test and implement in helm chart
//currently does not work
Smpt {
email_address: EmailAddress,
service_name: String,
},
pub struct MonitoringAlertingScore {
#[serde(skip)]
pub alert_channel_configs: Option<Vec<Box<dyn AlertChannelConfig>>>,
}
#[derive(Debug, Clone, Serialize)]
pub struct MonitoringAlertingStackScore {
pub alert_channel: Vec<AlertChannel>,
pub namespace: Option<String>,
}
impl MonitoringAlertingStackScore {
pub fn new() -> Self {
Self {
alert_channel: Vec::new(),
namespace: None,
}
}
}
impl<T: Topology + HelmCommand> Score<T> for MonitoringAlertingStackScore {
impl<T: Topology + HelmCommand + Monitor> Score<T> for MonitoringAlertingScore {
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(MonitoringAlertingStackInterpret {
Box::new(MonitoringAlertingInterpret {
score: self.clone(),
})
}
fn name(&self) -> String {
format!("MonitoringAlertingStackScore")
"MonitoringAlertingScore".to_string()
}
}
#[derive(Debug, Clone, Serialize)]
struct MonitoringAlertingStackInterpret {
score: MonitoringAlertingStackScore,
}
impl MonitoringAlertingStackInterpret {
async fn build_kube_prometheus_helm_chart_config(&self) -> KubePrometheusConfig {
let mut config = KubePrometheusConfig::new();
if let Some(ns) = &self.score.namespace {
config.namespace = ns.clone();
}
config.alert_channel = self.score.alert_channel.clone();
config
}
async fn deploy_kube_prometheus_helm_chart_score<T: Topology + HelmCommand>(
&self,
inventory: &Inventory,
topology: &T,
config: &KubePrometheusConfig,
) -> Result<Outcome, InterpretError> {
let helm_chart = kube_prometheus_helm_chart_score(config);
helm_chart
.create_interpret()
.execute(inventory, topology)
.await
}
async fn deploy_alert_channel_service<T: Topology + HelmCommand>(
&self,
inventory: &Inventory,
topology: &T,
config: &KubePrometheusConfig,
) -> Result<Outcome, InterpretError> {
let mut outcomes = vec![];
for channel in &self.score.alert_channel {
let outcome = match channel {
AlertChannel::Discord { .. } => {
discord_alert_manager_score(config)
.create_interpret()
.execute(inventory, topology)
.await
}
AlertChannel::Slack { .. } => Ok(Outcome::success(
"No extra configs for slack alerting".to_string(),
)),
AlertChannel::Smpt { .. } => {
todo!()
}
};
outcomes.push(outcome);
}
for result in outcomes {
result?;
}
Ok(Outcome::success("All alert channels deployed".to_string()))
}
#[derive(Debug)]
struct MonitoringAlertingInterpret {
score: MonitoringAlertingScore,
}
#[async_trait]
impl<T: Topology + HelmCommand> Interpret<T> for MonitoringAlertingStackInterpret {
impl<T: Topology + HelmCommand + Monitor> Interpret<T> for MonitoringAlertingInterpret {
async fn execute(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
let config = self.build_kube_prometheus_helm_chart_config().await;
info!("Built kube prometheus config");
info!("Installing kube prometheus chart");
self.deploy_kube_prometheus_helm_chart_score(inventory, topology, &config)
.await?;
info!("Installing alert channel service");
self.deploy_alert_channel_service(inventory, topology, &config)
.await?;
Ok(Outcome::success(format!(
"succesfully deployed monitoring and alerting stack"
)))
topology
.provision_monitor(
inventory,
topology,
self.score.alert_channel_configs.clone(),
)
.await
}
fn get_name(&self) -> InterpretName {

View File

@@ -0,0 +1,67 @@
use async_trait::async_trait;
use serde::Serialize;
use crate::{
data::{Id, Version},
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
inventory::Inventory,
score::Score,
topology::{
Topology,
tenant::{TenantConfig, TenantManager},
},
};
#[derive(Debug, Serialize, Clone)]
pub struct TenantScore {
config: TenantConfig,
}
impl<T: Topology + TenantManager> Score<T> for TenantScore {
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
Box::new(TenantInterpret {
tenant_config: self.config.clone(),
})
}
fn name(&self) -> String {
format!("{} TenantScore", self.config.name)
}
}
#[derive(Debug)]
pub struct TenantInterpret {
tenant_config: TenantConfig,
}
#[async_trait]
impl<T: Topology + TenantManager> Interpret<T> for TenantInterpret {
async fn execute(
&self,
_inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
topology.provision_tenant(&self.tenant_config).await?;
Ok(Outcome::success(format!(
"Successfully provisioned tenant {} with id {}",
self.tenant_config.name, self.tenant_config.id
)))
}
fn get_name(&self) -> InterpretName {
InterpretName::TenantInterpret
}
fn get_version(&self) -> Version {
todo!()
}
fn get_status(&self) -> InterpretStatus {
todo!()
}
fn get_children(&self) -> Vec<Id> {
todo!()
}
}