Compare commits

...

15 Commits

Author SHA1 Message Date
5c628b37b7 wip:added alertreceiver and alert rules which are built and added to the yaml before deploying prometheus, added a few dashboards to grafana. Trying to fix prometheus-server clusterrole/role/serviceaccount so that it can discover targets and kubernetes in a namespaced release where it does not have access to clusterrole 2025-07-09 15:09:38 -04:00
31661aaaf1 fix: prometheus deploys as namespaced resource without prometheus-server clusterrole and clusterrolebinding 2025-07-07 14:33:09 -04:00
2c208df143 fix: deploys by default in the application name namespace 2025-07-07 13:24:21 -04:00
1a6d72dc17 fix: uncommented example
All checks were successful
Run Check Script / check (pull_request) Successful in 1m37s
2025-07-04 16:30:13 -04:00
df9e21807e fix: git conflict
All checks were successful
Run Check Script / check (pull_request) Successful in -6s
2025-07-04 16:22:39 -04:00
b1bf4fd4d5 fix: cargo fmt
All checks were successful
Run Check Script / check (pull_request) Successful in 1m40s
2025-07-04 16:14:47 -04:00
f702ecd8c9 fix: deploys a lighter weight prometheus and grafana which is limited to their respective namespaces 2025-07-04 16:13:41 -04:00
a19b52e690 fix: properly append YAML in correct places in argoapplication (#80)
All checks were successful
Run Check Script / check (push) Successful in -7s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 3m56s
Co-authored-by: tahahawa <tahahawa@gmail.com>
Reviewed-on: #80
2025-07-04 15:32:02 +00:00
b73f2e76d0 Merge pull request 'refact: Make RustWebappScore generic, it is now Application score and takes an application and list of features to attach to the application' (#81) from refact/application into master
All checks were successful
Run Check Script / check (push) Successful in -1s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 3m38s
Reviewed-on: #81
Reviewed-by: wjro <wrolleman@nationtech.io>
2025-07-04 14:31:38 +00:00
b4534c6ee0 refact: Make RustWebappScore generic, it is now Application score and takes an application and list of features to attach to the application
All checks were successful
Run Check Script / check (pull_request) Successful in -8s
2025-07-04 10:27:16 -04:00
6149249a6c feat: create Argo interpret and kube client apply_yaml to install Argo Applications. Very messy implementation though, must be refactored/improved
All checks were successful
Run Check Script / check (push) Successful in -5s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 4m13s
2025-07-04 09:49:43 -04:00
d9935e20cb Merge pull request 'feat: harmony now defaults to using local k3d cluster. Also created OCICompliant: Application trait to make building images cleaner' (#76) from feat/oci into master
All checks were successful
Run Check Script / check (push) Successful in -9s
Compile and package harmony_composer / package_harmony_composer (push) Successful in 4m4s
Reviewed-on: #76
2025-07-03 19:37:46 +00:00
7b0f3b79b1 Merge remote-tracking branch 'origin/master' into feat/oci
All checks were successful
Run Check Script / check (pull_request) Successful in -8s
2025-07-03 15:36:52 -04:00
e6612245a5 Merge pull request 'feat/cd/localdeploymentdemo' (#79) from feat/cd/localdeploymentdemo into feat/oci
All checks were successful
Run Check Script / check (pull_request) Successful in -9s
Reviewed-on: #79
2025-07-03 19:31:45 +00:00
b4f5b91a57 feat: WIP argocd_score (#78)
Some checks are pending
Compile and package harmony_composer / package_harmony_composer (push) Waiting to run
Run Check Script / check (push) Successful in -8s
Co-authored-by: tahahawa <tahahawa@gmail.com>
Reviewed-on: #78
Reviewed-by: johnride <jg@nationtech.io>
Co-authored-by: Taha Hawa <taha@taha.dev>
Co-committed-by: Taha Hawa <taha@taha.dev>
2025-07-03 19:30:00 +00:00
38 changed files with 2374 additions and 91 deletions

View File

@@ -3,8 +3,18 @@ use std::{path::PathBuf, sync::Arc};
use harmony::{ use harmony::{
inventory::Inventory, inventory::Inventory,
maestro::Maestro, maestro::Maestro,
modules::application::{ modules::{
RustWebFramework, RustWebapp, RustWebappScore, features::ContinuousDelivery, application::{
ApplicationScore, RustWebFramework, RustWebapp,
features::{ContinuousDelivery, PrometheusMonitoring},
},
monitoring::{
alert_channel::discord_alert_channel::DiscordWebhook,
alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
},
prometheus::alerts::k8s::{
pod::pod_in_failed_state, pvc::high_pvc_fill_rate_over_two_days,
},
}, },
topology::{K8sAnywhereTopology, Url}, topology::{K8sAnywhereTopology, Url},
}; };
@@ -12,18 +22,34 @@ use harmony::{
#[tokio::main] #[tokio::main]
async fn main() { async fn main() {
env_logger::init(); env_logger::init();
let application = RustWebapp { let application = Arc::new(RustWebapp {
name: "harmony-example-rust-webapp".to_string(), name: "harmony-example-rust-webapp".to_string(),
domain: Url::Url(url::Url::parse("https://rustapp.harmony.example.com").unwrap()),
project_root: PathBuf::from("./examples/rust/webapp"), project_root: PathBuf::from("./examples/rust/webapp"),
framework: Some(RustWebFramework::Leptos), framework: Some(RustWebFramework::Leptos),
}; });
// TODO RustWebappScore should simply take a RustWebApp as config
let app = RustWebappScore { let pod_failed = pod_in_failed_state();
name: "Example Rust Webapp".to_string(), let pod_failed_2 = pod_in_failed_state();
domain: Url::Url(url::Url::parse("https://rustapp.harmony.example.com").unwrap()), let pod_failed_3 = pod_in_failed_state();
features: vec![Box::new(ContinuousDelivery {
application: Arc::new(application.clone()), let additional_rules = AlertManagerRuleGroup::new("pod-alerts", vec![pod_failed]);
})], let additional_rules_2 = AlertManagerRuleGroup::new("pod-alerts-2", vec![pod_failed_2, pod_failed_3]);
let app = ApplicationScore {
features: vec![
//Box::new(ContinuousDelivery {
// application: application.clone(),
//}),
Box::new(PrometheusMonitoring {
application: application.clone(),
alert_receivers: vec![Box::new(DiscordWebhook {
name: "dummy-discord".to_string(),
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
})],
alert_rules: vec![Box::new(additional_rules), Box::new(additional_rules_2)],
}),
// TODO add monitoring, backups, multisite ha, etc
],
application, application,
}; };

View File

@@ -13,7 +13,7 @@ reqwest = { version = "0.11", features = ["blocking", "json"] }
russh = "0.45.0" russh = "0.45.0"
rust-ipmi = "0.1.1" rust-ipmi = "0.1.1"
semver = "1.0.23" semver = "1.0.23"
serde = { version = "1.0.209", features = ["derive"] } serde = { version = "1.0.209", features = ["derive", "rc"] }
serde_json = "1.0.127" serde_json = "1.0.127"
tokio.workspace = true tokio.workspace = true
derive-new.workspace = true derive-new.workspace = true

View File

@@ -22,6 +22,7 @@ pub enum InterpretName {
K3dInstallation, K3dInstallation,
TenantInterpret, TenantInterpret,
Application, Application,
ArgoCD,
} }
impl std::fmt::Display for InterpretName { impl std::fmt::Display for InterpretName {
@@ -39,6 +40,7 @@ impl std::fmt::Display for InterpretName {
InterpretName::K3dInstallation => f.write_str("K3dInstallation"), InterpretName::K3dInstallation => f.write_str("K3dInstallation"),
InterpretName::TenantInterpret => f.write_str("Tenant"), InterpretName::TenantInterpret => f.write_str("Tenant"),
InterpretName::Application => f.write_str("Application"), InterpretName::Application => f.write_str("Application"),
InterpretName::ArgoCD => f.write_str("ArgoCD"),
} }
} }
} }

View File

@@ -0,0 +1,59 @@
////////////////////
/// Working idea
///
///
trait ScoreWithDep<T> {
fn create_interpret(&self) -> Box<dyn Interpret<T>>;
fn name(&self) -> String;
fn get_dependencies(&self) -> Vec<TypeId>; // Force T to impl Installer<TypeId> or something
// like that
}
struct PrometheusAlertScore;
impl <T> ScoreWithDep<T> for PrometheusAlertScore {
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
todo!()
}
fn name(&self) -> String {
todo!()
}
fn get_dependencies(&self) -> Vec<TypeId> {
// We have to find a way to constrait here so at compile time we are only allowed to return
// TypeId for types which can be installed by T
//
// This means, for example that T must implement HelmCommand if the impl <T: HelmCommand> Installable<T> for
// KubePrometheus calls for HelmCommand.
vec![TypeId::of::<KubePrometheus>()]
}
}
trait Installable{}
struct KubePrometheus;
impl Installable for KubePrometheus;
struct Maestro<T> {
topology: T
}
impl <T>Maestro<T> {
fn execute_store(&self, score: ScoreWithDep<T>) {
score.get_dependencies().iter().for_each(|dep| {
self.topology.ensure_dependency_ready(dep);
});
}
}
struct TopologyWithDep {
}
impl TopologyWithDep {
fn ensure_dependency_ready(&self, type_id: TypeId) -> Result<(), String> {
self.installer
}
}

View File

@@ -4,8 +4,6 @@ use k8s_openapi::{
ClusterResourceScope, NamespaceResourceScope, ClusterResourceScope, NamespaceResourceScope,
api::{apps::v1::Deployment, core::v1::Pod}, api::{apps::v1::Deployment, core::v1::Pod},
}; };
use kube::runtime::conditions;
use kube::runtime::wait::await_condition;
use kube::{ use kube::{
Client, Config, Error, Resource, Client, Config, Error, Resource,
api::{Api, AttachParams, ListParams, Patch, PatchParams, ResourceExt}, api::{Api, AttachParams, ListParams, Patch, PatchParams, ResourceExt},
@@ -13,6 +11,11 @@ use kube::{
core::ErrorResponse, core::ErrorResponse,
runtime::reflector::Lookup, runtime::reflector::Lookup,
}; };
use kube::{api::DynamicObject, runtime::conditions};
use kube::{
api::{ApiResource, GroupVersionKind},
runtime::wait::await_condition,
};
use log::{debug, error, trace}; use log::{debug, error, trace};
use serde::de::DeserializeOwned; use serde::de::DeserializeOwned;
use similar::{DiffableStr, TextDiff}; use similar::{DiffableStr, TextDiff};
@@ -239,6 +242,54 @@ impl K8sClient {
Ok(result) Ok(result)
} }
pub async fn apply_yaml_many(
&self,
yaml: &Vec<serde_yaml::Value>,
ns: Option<&str>,
) -> Result<(), Error> {
for y in yaml.iter() {
self.apply_yaml(y, ns).await?;
}
Ok(())
}
pub async fn apply_yaml(
&self,
yaml: &serde_yaml::Value,
ns: Option<&str>,
) -> Result<(), Error> {
let obj: DynamicObject = serde_yaml::from_value(yaml.clone()).expect("TODO do not unwrap");
let name = obj.metadata.name.as_ref().expect("YAML must have a name");
let namespace = obj
.metadata
.namespace
.as_ref()
.expect("YAML must have a namespace");
// 4. Define the API resource type using the GVK from the object.
// The plural name 'applications' is taken from your CRD definition.
error!("This only supports argocd application harcoded, very rrrong");
let gvk = GroupVersionKind::gvk("argoproj.io", "v1alpha1", "Application");
let api_resource = ApiResource::from_gvk_with_plural(&gvk, "applications");
// 5. Create a dynamic API client for this resource type.
let api: Api<DynamicObject> =
Api::namespaced_with(self.client.clone(), namespace, &api_resource);
// 6. Apply the object to the cluster using Server-Side Apply.
// This will create the resource if it doesn't exist, or update it if it does.
println!(
"Applying Argo Application '{}' in namespace '{}'...",
name, namespace
);
let patch_params = PatchParams::apply("harmony"); // Use a unique field manager name
let result = api.patch(name, &patch_params, &Patch::Apply(&obj)).await?;
println!("Successfully applied '{}'.", result.name_any());
Ok(())
}
pub(crate) async fn from_kubeconfig(path: &str) -> Option<K8sClient> { pub(crate) async fn from_kubeconfig(path: &str) -> Option<K8sClient> {
let k = match Kubeconfig::read_from(path) { let k = match Kubeconfig::read_from(path) {
Ok(k) => k, Ok(k) => k,

View File

@@ -246,7 +246,7 @@ pub struct K8sAnywhereConfig {
/// ///
/// default: true /// default: true
pub use_local_k3d: bool, pub use_local_k3d: bool,
harmony_profile: String, pub harmony_profile: String,
} }
impl K8sAnywhereConfig { impl K8sAnywhereConfig {

View File

@@ -1,3 +1,5 @@
use std::any::Any;
use async_trait::async_trait; use async_trait::async_trait;
use log::debug; use log::debug;
@@ -9,7 +11,7 @@ use crate::{
}; };
#[async_trait] #[async_trait]
pub trait AlertSender: Send + Sync + std::fmt::Debug { pub trait AlertSender: Any + Send + Sync + std::fmt::Debug {
fn name(&self) -> String; fn name(&self) -> String;
} }

View File

@@ -3,7 +3,6 @@ use serde::Serialize;
use crate::topology::Topology; use crate::topology::Topology;
use super::Application;
/// An ApplicationFeature provided by harmony, such as Backups, Monitoring, MultisiteAvailability, /// An ApplicationFeature provided by harmony, such as Backups, Monitoring, MultisiteAvailability,
/// ContinuousIntegration, ContinuousDelivery /// ContinuousIntegration, ContinuousDelivery
#[async_trait] #[async_trait]

View File

@@ -0,0 +1,357 @@
use std::{backtrace, collections::HashMap};
use k8s_openapi::{Metadata, NamespaceResourceScope, Resource};
use log::debug;
use serde::Serialize;
use serde_yaml::Value;
use url::Url;
use crate::modules::application::features::CDApplicationConfig;
#[derive(Clone, Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct Helm {
pub pass_credentials: Option<bool>,
pub parameters: Vec<Value>,
pub file_parameters: Vec<Value>,
pub release_name: Option<String>,
pub value_files: Vec<String>,
pub ignore_missing_value_files: Option<bool>,
pub values: Option<String>,
pub values_object: Option<Value>,
pub skip_crds: Option<bool>,
pub skip_schema_validation: Option<bool>,
pub version: Option<String>,
pub kube_version: Option<String>,
pub api_versions: Vec<String>,
pub namespace: Option<String>,
}
#[derive(Clone, Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct Source {
pub repo_url: Url,
pub target_revision: Option<String>,
pub chart: String,
pub helm: Helm,
}
#[derive(Clone, Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct Automated {
pub prune: bool,
pub self_heal: bool,
pub allow_empty: bool,
}
#[derive(Clone, Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct Backoff {
pub duration: String,
pub factor: u32,
pub max_duration: String,
}
#[derive(Clone, Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct Retry {
pub limit: u32,
pub backoff: Backoff,
}
#[derive(Clone, Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct SyncPolicy {
pub automated: Automated,
pub sync_options: Vec<String>,
pub retry: Retry,
}
#[derive(Clone, Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct ArgoApplication {
pub name: String,
pub namespace: Option<String>,
pub project: String,
pub source: Source,
pub sync_policy: SyncPolicy,
pub revision_history_limit: u32,
}
impl Default for ArgoApplication {
fn default() -> Self {
Self {
name: Default::default(),
namespace: Default::default(),
project: Default::default(),
source: Source {
repo_url: Url::parse("http://asdf").expect("Couldn't parse to URL"),
target_revision: None,
chart: "".to_string(),
helm: Helm {
pass_credentials: None,
parameters: vec![],
file_parameters: vec![],
release_name: None,
value_files: vec![],
ignore_missing_value_files: None,
values: None,
values_object: None,
skip_crds: None,
skip_schema_validation: None,
version: None,
kube_version: None,
api_versions: vec![],
namespace: None,
},
},
sync_policy: SyncPolicy {
automated: Automated {
prune: false,
self_heal: false,
allow_empty: false,
},
sync_options: vec![],
retry: Retry {
limit: 5,
backoff: Backoff {
duration: "5s".to_string(),
factor: 2,
max_duration: "3m".to_string(),
},
},
},
revision_history_limit: 10,
}
}
}
impl From<CDApplicationConfig> for ArgoApplication {
fn from(value: CDApplicationConfig) -> Self {
Self {
name: value.name,
namespace: Some(value.namespace),
project: "default".to_string(),
source: Source {
repo_url: Url::parse(value.helm_chart_repo_url.to_string().as_str())
.expect("couldn't convert to URL"),
target_revision: None,
chart: value.helm_chart_name,
helm: Helm {
pass_credentials: None,
parameters: vec![],
file_parameters: vec![],
release_name: None,
value_files: vec![],
ignore_missing_value_files: None,
values: None,
values_object: Some(value.values_overrides),
skip_crds: None,
skip_schema_validation: None,
version: None,
kube_version: None,
api_versions: vec![],
namespace: None,
},
},
sync_policy: SyncPolicy {
automated: Automated {
prune: false,
self_heal: false,
allow_empty: true,
},
sync_options: vec![],
retry: Retry {
limit: 5,
backoff: Backoff {
duration: "5s".to_string(),
factor: 2,
max_duration: "3m".to_string(),
},
},
},
..Self::default()
}
}
}
impl ArgoApplication {
pub fn to_yaml(&self) -> serde_yaml::Value {
let name = &self.name;
let namespace = if let Some(ns) = self.namespace.as_ref() {
&ns
} else {
"argocd"
};
let project = &self.project;
let source = &self.source;
let yaml_str = format!(
r#"
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: {name}
# You'll usually want to add your resources to the argocd namespace.
namespace: {namespace}
spec:
# The project the application belongs to.
project: {project}
# Destination cluster and namespace to deploy the application
destination:
# cluster API URL
server: https://kubernetes.default.svc
# or cluster name
# name: in-cluster
# The namespace will only be set for namespace-scoped resources that have not set a value for .metadata.namespace
namespace: {namespace}
"#
);
let mut yaml_value: Value =
serde_yaml::from_str(yaml_str.as_str()).expect("couldn't parse string to YAML");
let mut spec = yaml_value
.get_mut("spec")
.expect("couldn't get spec from yaml")
.as_mapping_mut()
.expect("couldn't unwrap spec as mutable mapping");
let source =
serde_yaml::to_value(&self.source).expect("couldn't serialize source to value");
let sync_policy = serde_yaml::to_value(&self.sync_policy)
.expect("couldn't serialize sync_policy to value");
let revision_history_limit = serde_yaml::to_value(&self.revision_history_limit)
.expect("couldn't serialize revision_history_limit to value");
spec.insert(
serde_yaml::to_value("source").expect("string to value failed"),
source,
);
spec.insert(
serde_yaml::to_value("syncPolicy").expect("string to value failed"),
sync_policy,
);
spec.insert(
serde_yaml::to_value("revisionHistoryLimit")
.expect("couldn't convert str to yaml value"),
revision_history_limit,
);
debug!("spec: {}", serde_yaml::to_string(spec).unwrap());
debug!(
"entire yaml_value: {}",
serde_yaml::to_string(&yaml_value).unwrap()
);
yaml_value
}
}
#[cfg(test)]
mod tests {
use url::Url;
use crate::modules::application::features::{
ArgoApplication, Automated, Backoff, Helm, Retry, Source, SyncPolicy,
};
#[test]
fn test_argo_application_to_yaml_happy_path() {
let app = ArgoApplication {
name: "test".to_string(),
namespace: Some("test-ns".to_string()),
project: "test-project".to_string(),
source: Source {
repo_url: Url::parse("http://test").unwrap(),
target_revision: None,
chart: "test-chart".to_string(),
helm: Helm {
pass_credentials: None,
parameters: vec![],
file_parameters: vec![],
release_name: Some("test-release-neame".to_string()),
value_files: vec![],
ignore_missing_value_files: None,
values: None,
values_object: None,
skip_crds: None,
skip_schema_validation: None,
version: None,
kube_version: None,
api_versions: vec![],
namespace: None,
},
},
sync_policy: SyncPolicy {
automated: Automated {
prune: false,
self_heal: false,
allow_empty: false,
},
sync_options: vec![],
retry: Retry {
limit: 5,
backoff: Backoff {
duration: "5s".to_string(),
factor: 2,
max_duration: "3m".to_string(),
},
},
},
revision_history_limit: 10,
};
let expected_yaml_output = r#"apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: test
namespace: test-ns
spec:
project: test-project
destination:
server: https://kubernetes.default.svc
namespace: test-ns
source:
repoUrl: http://test/
targetRevision: null
chart: test-chart
helm:
passCredentials: null
parameters: []
fileParameters: []
releaseName: test-release-neame
valueFiles: []
ignoreMissingValueFiles: null
values: null
valuesObject: null
skipCrds: null
skipSchemaValidation: null
version: null
kubeVersion: null
apiVersions: []
namespace: null
syncPolicy:
automated:
prune: false
selfHeal: false
allowEmpty: false
syncOptions: []
retry:
limit: 5
backoff:
duration: 5s
factor: 2
maxDuration: 3m
revisionHistoryLimit: 10"#;
assert_eq!(
expected_yaml_output.trim(),
serde_yaml::to_string(&app.clone().to_yaml())
.unwrap()
.trim()
);
}
}

View File

@@ -2,7 +2,7 @@ use std::{io::Write, process::Command, sync::Arc};
use async_trait::async_trait; use async_trait::async_trait;
use log::{error, info}; use log::{error, info};
use serde_json::Value; use serde_yaml::Value;
use tempfile::NamedTempFile; use tempfile::NamedTempFile;
use crate::{ use crate::{
@@ -10,11 +10,14 @@ use crate::{
data::Version, data::Version,
inventory::Inventory, inventory::Inventory,
modules::{ modules::{
application::{Application, ApplicationFeature, HelmPackage, OCICompliant}, application::{
Application, ApplicationFeature, HelmPackage, OCICompliant,
features::{ArgoApplication, ArgoHelmScore},
},
helm::chart::HelmChartScore, helm::chart::HelmChartScore,
}, },
score::Score, score::Score,
topology::{DeploymentTarget, HelmCommand, MultiTargetTopology, Topology, Url}, topology::{DeploymentTarget, HelmCommand, K8sclient, MultiTargetTopology, Topology, Url},
}; };
/// ContinuousDelivery in Harmony provides this functionality : /// ContinuousDelivery in Harmony provides this functionality :
@@ -139,7 +142,7 @@ impl<A: OCICompliant + HelmPackage> ContinuousDelivery<A> {
#[async_trait] #[async_trait]
impl< impl<
A: OCICompliant + HelmPackage + Clone + 'static, A: OCICompliant + HelmPackage + Clone + 'static,
T: Topology + HelmCommand + MultiTargetTopology + 'static, T: Topology + HelmCommand + MultiTargetTopology + K8sclient + 'static,
> ApplicationFeature<T> for ContinuousDelivery<A> > ApplicationFeature<T> for ContinuousDelivery<A>
{ {
async fn ensure_installed(&self, topology: &T) -> Result<(), String> { async fn ensure_installed(&self, topology: &T) -> Result<(), String> {
@@ -150,12 +153,16 @@ impl<
"TODO reverse helm chart packaging and docker image build. I put helm package first for faster iterations" "TODO reverse helm chart packaging and docker image build. I put helm package first for faster iterations"
); );
// TODO Write CI/CD workflow files
// we can autotedect the CI type using the remote url (default to github action for github
// url, etc..)
// Or ask for it when unknown
let helm_chart = self.application.build_push_helm_package(&image).await?; let helm_chart = self.application.build_push_helm_package(&image).await?;
info!("Pushed new helm chart {helm_chart}"); info!("Pushed new helm chart {helm_chart}");
// let image = self.application.build_push_oci_image().await?; let image = self.application.build_push_oci_image().await?;
// info!("Pushed new docker image {image}"); info!("Pushed new docker image {image}");
error!("uncomment above");
info!("Installing ContinuousDelivery feature"); info!("Installing ContinuousDelivery feature");
// TODO this is a temporary hack for demo purposes, the deployment target should be driven // TODO this is a temporary hack for demo purposes, the deployment target should be driven
@@ -178,29 +185,33 @@ impl<
} }
target => { target => {
info!("Deploying to target {target:?}"); info!("Deploying to target {target:?}");
let cd_server = HelmChartScore { let score = ArgoHelmScore {
namespace: todo!( namespace: "harmonydemo-staging".to_string(),
"ArgoCD Helm chart with proper understanding of Tenant, see how Will did it for Monitoring for now" openshift: true,
), domain: "argo.harmonydemo.apps.st.mcd".to_string(),
release_name: todo!("argocd helm chart whatever"), argo_apps: vec![ArgoApplication::from(CDApplicationConfig {
chart_name: todo!(), // helm pull oci://hub.nationtech.io/harmony/harmony-example-rust-webapp-chart/harmony-example-rust-webapp-chart --version 0.1.0
chart_version: todo!(), version: Version::from("0.1.0").unwrap(),
values_overrides: todo!(), helm_chart_repo_url: Url::Url(url::Url::parse("oci://hub.nationtech.io/harmony/harmony-example-rust-webapp-chart/harmony-example-rust-webapp-chart").unwrap()),
values_yaml: todo!(), helm_chart_name: "harmony-example-rust-webapp-chart".to_string(),
create_namespace: todo!(), values_overrides: Value::Null,
install_only: todo!(), name: "harmony-demo-rust-webapp".to_string(),
repository: todo!(), namespace: "harmonydemo-staging".to_string(),
})],
}; };
let interpret = cd_server.create_interpret(); score
interpret.execute(&Inventory::empty(), topology); .create_interpret()
.execute(&Inventory::empty(), topology)
.await
.unwrap();
} }
}; };
todo!("1. Create ArgoCD score that installs argo using helm chart, see if Taha's already done it todo!("1. Create ArgoCD score that installs argo using helm chart, see if Taha's already done it
- [X] Package app (docker image, helm chart) - [X] Package app (docker image, helm chart)
- [X] Push to registry - [X] Push to registry
- [ ] Push only if staging or prod - [X] Push only if staging or prod
- [ ] Deploy to local k3d when target is local - [X] Deploy to local k3d when target is local
- [ ] Poke Argo - [ ] Poke Argo
- [ ] Ensure app is up") - [ ] Ensure app is up")
} }
@@ -212,9 +223,12 @@ impl<
/// For now this is entirely bound to K8s / ArgoCD, will have to be revisited when we support /// For now this is entirely bound to K8s / ArgoCD, will have to be revisited when we support
/// more CD systems /// more CD systems
pub struct CDApplicationConfig { pub struct CDApplicationConfig {
version: Version, pub version: Version,
helm_chart_url: Url, pub helm_chart_repo_url: Url,
values_overrides: Value, pub helm_chart_name: String,
pub values_overrides: Value,
pub name: String,
pub namespace: String,
} }
pub trait ContinuousDeliveryApplication { pub trait ContinuousDeliveryApplication {

View File

@@ -2,7 +2,7 @@ use async_trait::async_trait;
use log::info; use log::info;
use crate::{ use crate::{
modules::application::{Application, ApplicationFeature}, modules::application::ApplicationFeature,
topology::{K8sclient, Topology}, topology::{K8sclient, Topology},
}; };

File diff suppressed because it is too large Load Diff

View File

@@ -6,3 +6,9 @@ pub use monitoring::*;
mod continuous_delivery; mod continuous_delivery;
pub use continuous_delivery::*; pub use continuous_delivery::*;
mod helm_argocd_score;
pub use helm_argocd_score::*;
mod argo_types;
pub use argo_types::*;

View File

@@ -1,19 +1,53 @@
use std::sync::Arc;
use async_trait::async_trait; use async_trait::async_trait;
use log::info; use log::info;
use crate::{ use crate::{
modules::application::{Application, ApplicationFeature}, inventory::Inventory,
topology::{HelmCommand, Topology}, modules::{
application::{Application, ApplicationFeature},
monitoring::{
application_monitoring::k8s_application_monitoring_score::ApplicationPrometheusMonitoringScore,
kube_prometheus::types::{NamespaceSelector, ServiceMonitor}, prometheus::prometheus::Prometheus,
},
},
score::Score,
topology::{oberservability::monitoring::{AlertReceiver, AlertRule, AlertSender}, tenant::TenantManager, HelmCommand, K8sclient, Topology},
}; };
#[derive(Debug, Default, Clone)] #[derive(Debug, Clone)]
pub struct Monitoring {} pub struct PrometheusMonitoring {
pub application: Arc<dyn Application>,
pub alert_receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>,
pub alert_rules: Vec<Box<dyn AlertRule<Prometheus>>>,
}
#[async_trait] #[async_trait]
impl<T: Topology + HelmCommand + 'static> ApplicationFeature<T> for Monitoring { impl<T: Topology + HelmCommand + 'static + TenantManager> ApplicationFeature<T> for PrometheusMonitoring {
async fn ensure_installed(&self, _topology: &T) -> Result<(), String> { async fn ensure_installed(&self, topology: &T) -> Result<(), String> {
info!("Ensuring monitoring is available for application"); info!("Ensuring monitoring is available for application");
todo!("create and execute k8s prometheus score, depends on Will's work") let ns = self.application.name();
let mut service_monitor = ServiceMonitor::default();
service_monitor.name = ns.clone();
service_monitor.namespace = ns.clone();
service_monitor.namespace_selector = Some(NamespaceSelector {
any: true,
match_names: vec![ns.clone()],
});
let alerting_score = ApplicationPrometheusMonitoringScore {
namespace: ns,
receivers: self.alert_receivers.clone(),
rules: self.alert_rules.clone(),
service_monitors: vec![service_monitor],
};
alerting_score
.create_interpret()
.execute(&Inventory::empty(), topology)
.await
.unwrap();
Ok(())
} }
fn name(&self) -> String { fn name(&self) -> String {
"Monitoring".to_string() "Monitoring".to_string()

View File

@@ -23,13 +23,13 @@ pub trait Application: std::fmt::Debug + Send + Sync {
} }
#[derive(Debug)] #[derive(Debug)]
pub struct ApplicationInterpret<T: Topology + std::fmt::Debug> { pub struct ApplicationInterpret<A: Application, T: Topology + std::fmt::Debug> {
features: Vec<Box<dyn ApplicationFeature<T>>>, features: Vec<Box<dyn ApplicationFeature<T>>>,
application: Arc<Box<dyn Application>>, application: Arc<A>,
} }
#[async_trait] #[async_trait]
impl<T: Topology + std::fmt::Debug> Interpret<T> for ApplicationInterpret<T> { impl<A: Application, T: Topology + std::fmt::Debug> Interpret<T> for ApplicationInterpret<A, T> {
async fn execute( async fn execute(
&self, &self,
_inventory: &Inventory, _inventory: &Inventory,

View File

@@ -19,23 +19,30 @@ use crate::{
use super::{Application, ApplicationFeature, ApplicationInterpret, HelmPackage, OCICompliant}; use super::{Application, ApplicationFeature, ApplicationInterpret, HelmPackage, OCICompliant};
#[derive(Debug, Serialize, Clone)] #[derive(Debug, Serialize, Clone)]
pub struct RustWebappScore<T: Topology + Clone + Serialize> { pub struct ApplicationScore<A: Application + Serialize, T: Topology + Clone + Serialize>
pub name: String, where
pub domain: Url, Arc<A>: Serialize + Clone,
{
pub features: Vec<Box<dyn ApplicationFeature<T>>>, pub features: Vec<Box<dyn ApplicationFeature<T>>>,
pub application: RustWebapp, pub application: Arc<A>,
} }
impl<T: Topology + std::fmt::Debug + Clone + Serialize + 'static> Score<T> for RustWebappScore<T> { impl<
A: Application + Serialize + Clone + 'static,
T: Topology + std::fmt::Debug + Clone + Serialize + 'static,
> Score<T> for ApplicationScore<A, T>
where
Arc<A>: Serialize,
{
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> { fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
Box::new(ApplicationInterpret { Box::new(ApplicationInterpret {
features: self.features.clone(), features: self.features.clone(),
application: Arc::new(Box::new(self.application.clone())), application: self.application.clone(),
}) })
} }
fn name(&self) -> String { fn name(&self) -> String {
format!("{}-RustWebapp", self.name) format!("Application: {}", self.application.name())
} }
} }
@@ -47,6 +54,7 @@ pub enum RustWebFramework {
#[derive(Debug, Clone, Serialize)] #[derive(Debug, Clone, Serialize)]
pub struct RustWebapp { pub struct RustWebapp {
pub name: String, pub name: String,
pub domain: Url,
/// The path to the root of the Rust project to be containerized. /// The path to the root of the Rust project to be containerized.
pub project_root: PathBuf, pub project_root: PathBuf,
pub framework: Option<RustWebFramework>, pub framework: Option<RustWebFramework>,

View File

@@ -220,7 +220,6 @@ impl<T: Topology + HelmCommand> Interpret<T> for HelmChartInterpret {
yaml_path, yaml_path,
Some(&helm_options), Some(&helm_options),
); );
let status = match res { let status = match res {
Ok(status) => status, Ok(status) => status,
Err(err) => return Err(InterpretError::new(err.to_string())), Err(err) => return Err(InterpretError::new(err.to_string())),

View File

@@ -1,14 +1,22 @@
use std::any::Any;
use async_trait::async_trait; use async_trait::async_trait;
use serde::Serialize; use serde::Serialize;
use serde_yaml::{Mapping, Value}; use serde_yaml::{Mapping, Value};
use crate::{ use crate::{
interpret::{InterpretError, Outcome}, interpret::{InterpretError, Outcome},
modules::monitoring::kube_prometheus::{ modules::monitoring::{
prometheus::{Prometheus, PrometheusReceiver}, kube_prometheus::{
types::{AlertChannelConfig, AlertManagerChannelConfig}, prometheus::{KubePrometheus, KubePrometheusReceiver},
types::{AlertChannelConfig, AlertManagerChannelConfig},
},
prometheus::prometheus::{Prometheus, PrometheusReceiver},
},
topology::{
Url,
oberservability::monitoring::{AlertReceiver, AlertSender},
}, },
topology::{Url, oberservability::monitoring::AlertReceiver},
}; };
#[derive(Debug, Clone, Serialize)] #[derive(Debug, Clone, Serialize)]
@@ -37,6 +45,26 @@ impl PrometheusReceiver for DiscordWebhook {
} }
} }
#[async_trait]
impl AlertReceiver<KubePrometheus> for DiscordWebhook {
async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> {
sender.install_receiver(self).await
}
fn clone_box(&self) -> Box<dyn AlertReceiver<KubePrometheus>> {
Box::new(self.clone())
}
}
#[async_trait]
impl KubePrometheusReceiver for DiscordWebhook {
fn name(&self) -> String {
self.name.clone()
}
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
self.get_config().await
}
}
#[async_trait] #[async_trait]
impl AlertChannelConfig for DiscordWebhook { impl AlertChannelConfig for DiscordWebhook {
async fn get_config(&self) -> AlertManagerChannelConfig { async fn get_config(&self) -> AlertManagerChannelConfig {

View File

@@ -4,9 +4,12 @@ use serde_yaml::{Mapping, Value};
use crate::{ use crate::{
interpret::{InterpretError, Outcome}, interpret::{InterpretError, Outcome},
modules::monitoring::kube_prometheus::{ modules::monitoring::{
prometheus::{Prometheus, PrometheusReceiver}, kube_prometheus::{
types::{AlertChannelConfig, AlertManagerChannelConfig}, prometheus::{KubePrometheus, KubePrometheusReceiver},
types::{AlertChannelConfig, AlertManagerChannelConfig},
},
prometheus::prometheus::{Prometheus, PrometheusReceiver},
}, },
topology::{Url, oberservability::monitoring::AlertReceiver}, topology::{Url, oberservability::monitoring::AlertReceiver},
}; };
@@ -36,6 +39,25 @@ impl PrometheusReceiver for WebhookReceiver {
self.get_config().await self.get_config().await
} }
} }
#[async_trait]
impl AlertReceiver<KubePrometheus> for WebhookReceiver {
async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> {
sender.install_receiver(self).await
}
fn clone_box(&self) -> Box<dyn AlertReceiver<KubePrometheus>> {
Box::new(self.clone())
}
}
#[async_trait]
impl KubePrometheusReceiver for WebhookReceiver {
fn name(&self) -> String {
self.name.clone()
}
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
self.get_config().await
}
}
#[async_trait] #[async_trait]
impl AlertChannelConfig for WebhookReceiver { impl AlertChannelConfig for WebhookReceiver {

View File

@@ -5,13 +5,26 @@ use serde::Serialize;
use crate::{ use crate::{
interpret::{InterpretError, Outcome}, interpret::{InterpretError, Outcome},
modules::monitoring::kube_prometheus::{ modules::monitoring::{
prometheus::{Prometheus, PrometheusRule}, kube_prometheus::{
types::{AlertGroup, AlertManagerAdditionalPromRules}, prometheus::{KubePrometheus, KubePrometheusRule},
types::{AlertGroup, AlertManagerAdditionalPromRules},
},
prometheus::prometheus::{Prometheus, PrometheusRule},
}, },
topology::oberservability::monitoring::AlertRule, topology::oberservability::monitoring::AlertRule,
}; };
#[async_trait]
impl AlertRule<KubePrometheus> for AlertManagerRuleGroup {
async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> {
sender.install_rule(&self).await
}
fn clone_box(&self) -> Box<dyn AlertRule<KubePrometheus>> {
Box::new(self.clone())
}
}
#[async_trait] #[async_trait]
impl AlertRule<Prometheus> for AlertManagerRuleGroup { impl AlertRule<Prometheus> for AlertManagerRuleGroup {
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> { async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
@@ -41,6 +54,25 @@ impl PrometheusRule for AlertManagerRuleGroup {
} }
} }
} }
#[async_trait]
impl KubePrometheusRule for AlertManagerRuleGroup {
fn name(&self) -> String {
self.name.clone()
}
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules {
let mut additional_prom_rules = BTreeMap::new();
additional_prom_rules.insert(
self.name.clone(),
AlertGroup {
groups: vec![self.clone()],
},
);
AlertManagerAdditionalPromRules {
rules: additional_prom_rules,
}
}
}
impl AlertManagerRuleGroup { impl AlertManagerRuleGroup {
pub fn new(name: &str, rules: Vec<PrometheusAlertRule>) -> AlertManagerRuleGroup { pub fn new(name: &str, rules: Vec<PrometheusAlertRule>) -> AlertManagerRuleGroup {

View File

@@ -0,0 +1,45 @@
use std::sync::{Arc, Mutex};
use log::debug;
use serde::Serialize;
use crate::{
modules::monitoring::{
kube_prometheus::types::ServiceMonitor,
prometheus::{prometheus::Prometheus, prometheus_config::HelmPrometheusConfig},
},
score::Score,
topology::{
oberservability::monitoring::{AlertReceiver, AlertRule, AlertingInterpret}, tenant::TenantManager, HelmCommand, K8sclient, Topology
},
};
#[derive(Clone, Debug, Serialize)]
pub struct ApplicationPrometheusMonitoringScore {
pub namespace: String,
pub receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>,
pub rules: Vec<Box<dyn AlertRule<Prometheus>>>,
pub service_monitors: Vec<ServiceMonitor>,
}
impl<T: Topology + HelmCommand + TenantManager> Score<T> for ApplicationPrometheusMonitoringScore {
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
let config = Arc::new(Mutex::new(HelmPrometheusConfig::new()));
config
.try_lock()
.expect("couldn't lock config")
.additional_service_monitors = self.service_monitors.clone();
let ns = self.namespace.clone();
config.try_lock().expect("couldn't lock config").namespace = Some(ns.clone());
debug!("set namespace to {}", ns);
Box::new(AlertingInterpret {
sender: Prometheus { config },
receivers: self.receivers.clone(),
rules: self.rules.clone(),
})
}
fn name(&self) -> String {
"ApplicationPrometheusMonitoringScore".to_string()
}
}

View File

@@ -0,0 +1 @@
pub mod k8s_application_monitoring_score;

View File

@@ -0,0 +1,64 @@
use std::str::FromStr;
use non_blank_string_rs::NonBlankString;
use crate::modules::helm::chart::HelmChartScore;
pub fn grafana_helm_chart_score(ns: &str) -> HelmChartScore {
let values = format!(
r#"
rbac:
namespaced: true
datasources:
datasources.yaml:
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus-server.{ns}.svc.cluster.local
isDefault: true
dashboardProviders:
dashboardproviders.yaml:
apiVersion: 1
providers:
- name: 'default'
orgId: 1
folder: ''
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: true
editable: true
options:
path: /var/lib/grafana/dashboards/default
dashboards:
default:
compute-usage:
url: https://grafana.com/api/dashboards/315/revisions/1/download
pod-health:
url: https://grafana.com/api/dashboards/15758/revisions/1/download
namespace-resources:
url: https://grafana.com/api/dashboards/9809/revisions/1/download
namespace-resources-vs-quotas:
url: https://grafana.com/api/dashboards/17044/revisions/1/download
persistent-volume-usage:
url: https://grafana.com/api/dashboards/7685/revisions/1/download
"#,
ns = ns
);
HelmChartScore {
namespace: Some(NonBlankString::from_str(ns).unwrap()),
release_name: NonBlankString::from_str("grafana").unwrap(),
chart_name: NonBlankString::from_str("oci://ghcr.io/grafana/helm-charts/grafana").unwrap(),
chart_version: None,
values_overrides: None,
values_yaml: Some(values),
create_namespace: true,
install_only: false,
repository: None,
}
}

View File

@@ -0,0 +1 @@
pub mod helm_grafana;

View File

@@ -0,0 +1 @@
pub mod helm;

View File

@@ -68,6 +68,9 @@ pub fn kube_prometheus_helm_chart_score(
let mut values = format!( let mut values = format!(
r#" r#"
global:
rbac:
create: false
prometheus: prometheus:
enabled: {prometheus} enabled: {prometheus}
prometheusSpec: prometheusSpec:
@@ -242,7 +245,7 @@ prometheus-node-exporter:
cpu: 200m cpu: 200m
memory: 250Mi memory: 250Mi
prometheusOperator: prometheusOperator:
enabled: {prometheus_operator} enabled: false
resources: resources:
requests: requests:
cpu: 100m cpu: 100m

View File

@@ -2,7 +2,7 @@ use std::sync::{Arc, Mutex};
use serde::Serialize; use serde::Serialize;
use super::{helm::config::KubePrometheusConfig, prometheus::Prometheus}; use super::{helm::config::KubePrometheusConfig, prometheus::KubePrometheus};
use crate::{ use crate::{
modules::monitoring::kube_prometheus::types::ServiceMonitor, modules::monitoring::kube_prometheus::types::ServiceMonitor,
score::Score, score::Score,
@@ -15,8 +15,8 @@ use crate::{
#[derive(Clone, Debug, Serialize)] #[derive(Clone, Debug, Serialize)]
pub struct HelmPrometheusAlertingScore { pub struct HelmPrometheusAlertingScore {
pub receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>, pub receivers: Vec<Box<dyn AlertReceiver<KubePrometheus>>>,
pub rules: Vec<Box<dyn AlertRule<Prometheus>>>, pub rules: Vec<Box<dyn AlertRule<KubePrometheus>>>,
pub service_monitors: Vec<ServiceMonitor>, pub service_monitors: Vec<ServiceMonitor>,
} }
@@ -28,7 +28,7 @@ impl<T: Topology + HelmCommand + TenantManager> Score<T> for HelmPrometheusAlert
.expect("couldn't lock config") .expect("couldn't lock config")
.additional_service_monitors = self.service_monitors.clone(); .additional_service_monitors = self.service_monitors.clone();
Box::new(AlertingInterpret { Box::new(AlertingInterpret {
sender: Prometheus::new(), sender: KubePrometheus::new(),
receivers: self.receivers.clone(), receivers: self.receivers.clone(),
rules: self.rules.clone(), rules: self.rules.clone(),
}) })

View File

@@ -10,7 +10,7 @@ use crate::{
modules::monitoring::alert_rule::prometheus_alert_rule::AlertManagerRuleGroup, modules::monitoring::alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
score, score,
topology::{ topology::{
HelmCommand, K8sAnywhereTopology, Topology, HelmCommand, Topology,
installable::Installable, installable::Installable,
oberservability::monitoring::{AlertReceiver, AlertRule, AlertSender}, oberservability::monitoring::{AlertReceiver, AlertRule, AlertSender},
tenant::TenantManager, tenant::TenantManager,
@@ -27,14 +27,14 @@ use super::{
}; };
#[async_trait] #[async_trait]
impl AlertSender for Prometheus { impl AlertSender for KubePrometheus {
fn name(&self) -> String { fn name(&self) -> String {
"HelmKubePrometheus".to_string() "HelmKubePrometheus".to_string()
} }
} }
#[async_trait] #[async_trait]
impl<T: Topology + HelmCommand + TenantManager> Installable<T> for Prometheus { impl<T: Topology + HelmCommand + TenantManager> Installable<T> for KubePrometheus {
async fn configure(&self, _inventory: &Inventory, topology: &T) -> Result<(), InterpretError> { async fn configure(&self, _inventory: &Inventory, topology: &T) -> Result<(), InterpretError> {
self.configure_with_topology(topology).await; self.configure_with_topology(topology).await;
Ok(()) Ok(())
@@ -51,11 +51,11 @@ impl<T: Topology + HelmCommand + TenantManager> Installable<T> for Prometheus {
} }
#[derive(Debug)] #[derive(Debug)]
pub struct Prometheus { pub struct KubePrometheus {
pub config: Arc<Mutex<KubePrometheusConfig>>, pub config: Arc<Mutex<KubePrometheusConfig>>,
} }
impl Prometheus { impl KubePrometheus {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
config: Arc::new(Mutex::new(KubePrometheusConfig::new())), config: Arc::new(Mutex::new(KubePrometheusConfig::new())),
@@ -75,7 +75,7 @@ impl Prometheus {
pub async fn install_receiver( pub async fn install_receiver(
&self, &self,
prometheus_receiver: &dyn PrometheusReceiver, prometheus_receiver: &dyn KubePrometheusReceiver,
) -> Result<Outcome, InterpretError> { ) -> Result<Outcome, InterpretError> {
let prom_receiver = prometheus_receiver.configure_receiver().await; let prom_receiver = prometheus_receiver.configure_receiver().await;
debug!( debug!(
@@ -120,12 +120,12 @@ impl Prometheus {
} }
#[async_trait] #[async_trait]
pub trait PrometheusReceiver: Send + Sync + std::fmt::Debug { pub trait KubePrometheusReceiver: Send + Sync + std::fmt::Debug {
fn name(&self) -> String; fn name(&self) -> String;
async fn configure_receiver(&self) -> AlertManagerChannelConfig; async fn configure_receiver(&self) -> AlertManagerChannelConfig;
} }
impl Serialize for Box<dyn AlertReceiver<Prometheus>> { impl Serialize for Box<dyn AlertReceiver<KubePrometheus>> {
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error> fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
where where
S: serde::Serializer, S: serde::Serializer,
@@ -134,19 +134,19 @@ impl Serialize for Box<dyn AlertReceiver<Prometheus>> {
} }
} }
impl Clone for Box<dyn AlertReceiver<Prometheus>> { impl Clone for Box<dyn AlertReceiver<KubePrometheus>> {
fn clone(&self) -> Self { fn clone(&self) -> Self {
self.clone_box() self.clone_box()
} }
} }
#[async_trait] #[async_trait]
pub trait PrometheusRule: Send + Sync + std::fmt::Debug { pub trait KubePrometheusRule: Send + Sync + std::fmt::Debug {
fn name(&self) -> String; fn name(&self) -> String;
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules; async fn configure_rule(&self) -> AlertManagerAdditionalPromRules;
} }
impl Serialize for Box<dyn AlertRule<Prometheus>> { impl Serialize for Box<dyn AlertRule<KubePrometheus>> {
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error> fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
where where
S: serde::Serializer, S: serde::Serializer,
@@ -155,7 +155,7 @@ impl Serialize for Box<dyn AlertRule<Prometheus>> {
} }
} }
impl Clone for Box<dyn AlertRule<Prometheus>> { impl Clone for Box<dyn AlertRule<KubePrometheus>> {
fn clone(&self) -> Self { fn clone(&self) -> Self {
self.clone_box() self.clone_box()
} }

View File

@@ -211,8 +211,10 @@ pub struct Selector {
pub struct ServiceMonitor { pub struct ServiceMonitor {
pub name: String, pub name: String,
pub namespace: String,
// # Additional labels to set used for the ServiceMonitorSelector. Together with standard labels from the chart // # Additional labels to set used for the ServiceMonitorSelector. Together with standard labels from the chart
pub additional_labels: Option<Mapping>, pub additional_labels: Option<HashMap<String, String>>,
// # Service label for use in assembling a job name of the form <label value>-<port> // # Service label for use in assembling a job name of the form <label value>-<port>
// # If no label is specified, the service name is used. // # If no label is specified, the service name is used.
@@ -240,7 +242,7 @@ pub struct ServiceMonitor {
// any: bool, // any: bool,
// # Explicit list of namespace names to select // # Explicit list of namespace names to select
// matchNames: Vec, // matchNames: Vec,
pub namespace_selector: Option<Mapping>, pub namespace_selector: Option<NamespaceSelector>,
// # Endpoints of the selected service to be monitored // # Endpoints of the selected service to be monitored
pub endpoints: Vec<ServiceMonitorEndpoint>, pub endpoints: Vec<ServiceMonitorEndpoint>,
@@ -250,10 +252,18 @@ pub struct ServiceMonitor {
pub fallback_scrape_protocol: Option<String>, pub fallback_scrape_protocol: Option<String>,
} }
#[derive(Debug, Serialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct NamespaceSelector {
pub any: bool,
pub match_names: Vec<String>,
}
impl Default for ServiceMonitor { impl Default for ServiceMonitor {
fn default() -> Self { fn default() -> Self {
Self { Self {
name: Default::default(), name: Default::default(),
namespace: Default::default(),
additional_labels: Default::default(), additional_labels: Default::default(),
job_label: Default::default(), job_label: Default::default(),
target_labels: Default::default(), target_labels: Default::default(),

View File

@@ -1,4 +1,7 @@
pub mod alert_channel; pub mod alert_channel;
pub mod alert_rule; pub mod alert_rule;
pub mod application_monitoring;
pub mod grafana;
pub mod kube_prometheus; pub mod kube_prometheus;
pub mod ntfy; pub mod ntfy;
pub mod prometheus;

View File

@@ -0,0 +1,2 @@
pub mod prometheus_helm;
pub mod types;

View File

@@ -0,0 +1,155 @@
use std::collections::BTreeMap;
use std::str::FromStr;
use std::sync::{Arc, Mutex};
use log::debug;
use non_blank_string_rs::NonBlankString;
use serde_yaml::{Mapping, Value};
use crate::modules::helm::chart::HelmChartScore;
use crate::modules::monitoring::kube_prometheus::types::{
AlertGroup, AlertManager, AlertManagerConfig, AlertManagerRoute, AlertManagerSpec,
ConfigReloader, Limits, Requests, Resources,
};
use crate::modules::monitoring::prometheus::helm::types::{
AlertFile, EnabledConfig, KsmRbacConfig, KubeStateMetricsConfig, LabelSelector, Monitor,
Prometheus, PrometheusHelmValues, RbacConfig, ServerConfig, ServerRbacConfig,
};
use crate::modules::monitoring::prometheus::prometheus_config::HelmPrometheusConfig;
pub fn prometheus_helm_chart_score(config: Arc<Mutex<HelmPrometheusConfig>>) -> HelmChartScore {
let config = config.lock().unwrap();
let ns = config.namespace.clone().unwrap();
let rbac_config = RbacConfig { create: false };
let ksm_config = KubeStateMetricsConfig {
enabled: true,
rbac: KsmRbacConfig {
use_cluster_role: false,
},
prometheus: Prometheus {
monitor: Monitor { enabled: true },
},
};
let mut selector_labels = BTreeMap::new();
selector_labels.insert("kubernetes.io/metadata.name".to_string(), ns.clone());
let mut kube_state_metrics_labels = BTreeMap::new();
kube_state_metrics_labels.insert(
"app.kubernetes.io/name".to_string(),
"kube-state-metrics".to_string(),
);
let selector = LabelSelector {
match_labels: selector_labels,
};
let server_config = ServerConfig {
namespaces: vec![ns.clone()],
use_existing_cluster_role_name: false,
};
let mut null_receiver = Mapping::new();
null_receiver.insert(
Value::String("receiver".to_string()),
Value::String("default-receiver".to_string()),
);
null_receiver.insert(
Value::String("matchers".to_string()),
Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]),
);
null_receiver.insert(Value::String("continue".to_string()), Value::Bool(true));
let mut alert_manager_channel_config = AlertManagerConfig {
global: Mapping::new(),
route: AlertManagerRoute {
routes: vec![Value::Mapping(null_receiver)],
},
receivers: vec![serde_yaml::from_str("name: 'default-receiver'").unwrap()],
};
for receiver in config.alert_receiver_configs.iter() {
if let Some(global) = receiver.channel_global_config.clone() {
alert_manager_channel_config
.global
.insert(global.0, global.1);
}
alert_manager_channel_config
.route
.routes
.push(receiver.channel_route.clone());
alert_manager_channel_config
.receivers
.push(receiver.channel_receiver.clone());
}
let alert_manager_values = AlertManager {
enabled: config.alert_manager,
config: alert_manager_channel_config,
alertmanager_spec: AlertManagerSpec {
resources: Resources {
limits: Limits {
memory: "100Mi".to_string(),
cpu: "100m".to_string(),
},
requests: Requests {
memory: "100Mi".to_string(),
cpu: "100m".to_string(),
},
},
},
init_config_reloader: ConfigReloader {
resources: Resources {
limits: Limits {
memory: "100Mi".to_string(),
cpu: "100m".to_string(),
},
requests: Requests {
memory: "100Mi".to_string(),
cpu: "100m".to_string(),
},
},
},
};
let mut result: BTreeMap<String, AlertFile> = BTreeMap::new();
for rule in config.alert_rules.clone().iter() {
for (name, group) in &rule.rules {
result
.entry("alerting_rules.yml".to_string())
.and_modify(|e| e.groups.extend(group.groups.clone()))
.or_insert(AlertFile {
groups: group.groups.clone(),
});
}
}
let final_values = PrometheusHelmValues {
rbac: rbac_config,
kube_state_metrics: ksm_config,
server: server_config,
alertmanager: alert_manager_values,
server_files: result,
additional_service_monitors: config.additional_service_monitors.clone(),
prometheus_node_exporter: EnabledConfig { enabled: false },
prometheus_pushgateway: EnabledConfig { enabled: false },
};
let values_yaml =
serde_yaml::to_string(&final_values).expect("Failed to serialize final Helm values");
debug!("full values.yaml: \n{}", values_yaml);
HelmChartScore {
namespace: Some(NonBlankString::from_str(&ns).unwrap()),
release_name: NonBlankString::from_str("prometheus").unwrap(),
chart_name: NonBlankString::from_str(
"oci://ghcr.io/prometheus-community/charts/prometheus",
)
.unwrap(),
chart_version: None,
values_overrides: None,
values_yaml: Some(values_yaml),
create_namespace: true,
install_only: true,
repository: None,
}
}

View File

@@ -0,0 +1,94 @@
use std::collections::BTreeMap;
use serde::Serialize;
use crate::modules::monitoring::{alert_rule::prometheus_alert_rule::AlertManagerRuleGroup, kube_prometheus::types::{
AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerValues, ServiceMonitor
}};
#[derive(Debug, Clone, Serialize)]
pub struct RuleFilesConfig {
#[serde(rename = "ruleFiles")]
pub files: BTreeMap<String, AlertGroup>,
}
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct PrometheusHelmValues {
pub rbac: RbacConfig,
#[serde(rename = "kube-state-metrics")]
pub kube_state_metrics: KubeStateMetricsConfig,
pub server: ServerConfig,
pub alertmanager: AlertManager, // You already have this
#[serde(rename = "serverFiles")]
pub server_files: BTreeMap<String, AlertFile>, // You already have this
pub additional_service_monitors: Vec<ServiceMonitor>, // You already have this
#[serde(rename = "prometheus-node-exporter")]
pub prometheus_node_exporter: EnabledConfig,
#[serde(rename = "prometheus-pushgateway")]
pub prometheus_pushgateway: EnabledConfig,
}
#[derive(Serialize, Debug, Clone)]
pub struct AlertFile {
pub groups: Vec<AlertManagerRuleGroup>,
}
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct RbacConfig {
pub create: bool,
}
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct KubeStateMetricsConfig {
pub enabled: bool,
pub rbac: KsmRbacConfig,
pub prometheus: Prometheus,
}
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct Prometheus {
pub monitor: Monitor
}
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct Monitor{
pub enabled: bool
}
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct KsmRbacConfig {
pub use_cluster_role: bool,
}
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct ServerConfig {
pub namespaces: Vec<String>,
pub use_existing_cluster_role_name: bool,
}
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct ServerRbacConfig {
pub create: bool,
pub use_cluster_role: bool,
pub namespaced: bool,
}
#[derive(Serialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct LabelSelector {
#[serde(rename = "matchLabels")]
pub match_labels: BTreeMap<String, String>,
}
#[derive(Serialize, Debug)]
pub struct EnabledConfig {
pub enabled: bool,
}

View File

@@ -0,0 +1,3 @@
pub mod helm;
pub mod prometheus;
pub mod prometheus_config;

View File

@@ -0,0 +1,189 @@
use std::sync::{Arc, Mutex};
use async_trait::async_trait;
use log::{debug, error};
use serde::Serialize;
use crate::{
interpret::{InterpretError, Outcome},
inventory::Inventory,
modules::monitoring::{
alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
grafana::helm::helm_grafana::grafana_helm_chart_score,
kube_prometheus::types::{AlertManagerAdditionalPromRules, AlertManagerChannelConfig},
},
score::Score,
topology::{
HelmCommand, K8sclient, Topology,
installable::Installable,
oberservability::monitoring::{AlertReceiver, AlertRule, AlertSender},
tenant::TenantManager,
},
};
use super::{
helm::prometheus_helm::prometheus_helm_chart_score, prometheus_config::HelmPrometheusConfig,
};
#[derive(Debug)]
pub struct Prometheus {
pub config: Arc<Mutex<HelmPrometheusConfig>>,
}
#[async_trait]
impl AlertSender for Prometheus {
fn name(&self) -> String {
"Prometheus".to_string()
}
}
impl Prometheus {
pub fn new() -> Self {
Self {
config: Arc::new(Mutex::new(HelmPrometheusConfig::new())),
}
}
pub async fn configure_with_topology<T: TenantManager>(&self, topology: &T) {
if let Some(cfg) = topology.get_tenant_config().await {
debug!("Overriding namespace with tenant config: {}", cfg.name);
self.config.lock().unwrap().namespace = Some(cfg.name.clone());
} else {
debug!("No tenant config found; keeping existing namespace.");
}
error!("This must be refactored, see comments in pr #74");
}
pub async fn install_receiver(
&self,
prometheus_receiver: &dyn PrometheusReceiver,
) -> Result<Outcome, InterpretError> {
let prom_receiver = prometheus_receiver.configure_receiver().await;
debug!(
"adding alert receiver to prometheus config: {:#?}",
&prom_receiver
);
let mut config = self.config.lock().unwrap();
config.alert_receiver_configs.push(prom_receiver);
let prom_receiver_name = prometheus_receiver.name();
debug!("installed alert receiver {}", &prom_receiver_name);
Ok(Outcome::success(format!(
"Sucessfully installed receiver {}",
prom_receiver_name
)))
}
pub async fn install_rule(
&self,
prometheus_rule: &AlertManagerRuleGroup,
) -> Result<Outcome, InterpretError> {
let prometheus_rule = prometheus_rule.configure_rule().await;
let mut config = self.config.lock().unwrap();
config.alert_rules.push(prometheus_rule.clone());
Ok(Outcome::success(format!(
"Successfully installed alert rule: {:#?},",
prometheus_rule
)))
}
pub async fn install_prometheus<T: Topology + HelmCommand + Send + Sync>(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
prometheus_helm_chart_score(self.config.clone())
.create_interpret()
.execute(inventory, topology)
.await
}
pub async fn install_grafana<T: Topology + HelmCommand + Send + Sync>(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
let namespace = {
let config = self.config.lock().unwrap();
config.namespace.clone()
};
if let Some(ns) = namespace.as_deref() {
grafana_helm_chart_score(ns)
.create_interpret()
.execute(inventory, topology)
.await
} else {
Err(InterpretError::new(format!(
"could not install grafana, missing namespace",
)))
}
}
}
#[async_trait]
impl<T: Topology + HelmCommand + TenantManager> Installable<T> for Prometheus {
async fn configure(&self, _inventory: &Inventory, topology: &T) -> Result<(), InterpretError> {
self.configure_with_topology(topology).await;
Ok(())
}
async fn ensure_installed(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<(), InterpretError> {
self.install_prometheus(inventory, topology).await?;
let install_grafana = {
let config = self.config.lock().unwrap();
config.grafana
};
if install_grafana {
self.install_grafana(inventory, topology).await?;
}
Ok(())
}
}
#[async_trait]
pub trait PrometheusReceiver: Send + Sync + std::fmt::Debug {
fn name(&self) -> String;
async fn configure_receiver(&self) -> AlertManagerChannelConfig;
}
impl Serialize for Box<dyn AlertReceiver<Prometheus>> {
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
todo!()
}
}
impl Clone for Box<dyn AlertReceiver<Prometheus>> {
fn clone(&self) -> Self {
self.clone_box()
}
}
#[async_trait]
pub trait PrometheusRule: Send + Sync + std::fmt::Debug {
fn name(&self) -> String;
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules;
}
impl Serialize for Box<dyn AlertRule<Prometheus>> {
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
todo!()
}
}
impl Clone for Box<dyn AlertRule<Prometheus>> {
fn clone(&self) -> Self {
self.clone_box()
}
}

View File

@@ -0,0 +1,32 @@
use crate::modules::monitoring::kube_prometheus::types::{
AlertManagerAdditionalPromRules, AlertManagerChannelConfig, ServiceMonitor,
};
#[derive(Debug)]
pub struct HelmPrometheusConfig {
pub namespace: Option<String>,
pub alert_manager: bool,
pub node_exporter: bool,
pub kube_state_metrics: bool,
pub grafana: bool,
pub prometheus_pushgateway: bool,
pub alert_receiver_configs: Vec<AlertManagerChannelConfig>,
pub alert_rules: Vec<AlertManagerAdditionalPromRules>,
pub additional_service_monitors: Vec<ServiceMonitor>,
}
impl HelmPrometheusConfig {
pub fn new() -> Self {
Self {
namespace: None,
alert_manager: true,
node_exporter: false,
kube_state_metrics: false,
grafana: true,
prometheus_pushgateway: false,
alert_receiver_configs: vec![],
alert_rules: vec![],
additional_service_monitors: vec![],
}
}
}

View File

@@ -1 +1,2 @@
pub mod pvc; pub mod pvc;
pub mod pod;

View File

@@ -0,0 +1,38 @@
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
pub fn pod_in_failed_state() -> PrometheusAlertRule {
PrometheusAlertRule::new(
"PodInFailedState",
// This expression checks for any pod where the status phase is 'Failed' and the value is 1 (true).
"kube_pod_status_phase{phase=\"Failed\"} == 1",
)
.for_duration("1m") // Fire if the pod is in this state for 1 minute.
.label("severity", "critical") // A failed pod is a critical issue.
.annotation(
"summary",
"Pod {{ $labels.pod }} in namespace {{ $labels.namespace }} has failed.",
)
.annotation(
"description",
"The pod {{ $labels.pod }} in namespace {{ $labels.namespace }} has entered the 'Failed' state. This is a terminal error and the pod will not be automatically restarted. Please check the pod logs to diagnose the issue.",
)
}
pub fn pod_restarting_frequently() -> PrometheusAlertRule {
PrometheusAlertRule::new(
"PodRestartingFrequently",
// This expression calculates the increase in the restart count over the last 30 minutes.
// Alert if a container has restarted more than 5 times.
"increase(kube_pod_container_status_restarts_total[30m]) > 5",
)
.for_duration("15m") // The condition must persist for 15 minutes to avoid alerts for minor flaps.
.label("severity", "critical") // A crash-looping pod is effectively down.
.annotation(
"summary",
"Container {{ $labels.container }} in pod {{ $labels.pod }} is restarting frequently.",
)
.annotation(
"description",
"The container '{{ $labels.container }}' in pod '{{ $labels.pod }}' (namespace '{{ $labels.namespace }}') has restarted more than 5 times in the last 30 minutes. The pod is likely in a CrashLoopBackOff state.",
)
}