Compare commits
15 Commits
feat/cd/lo
...
feat/monit
| Author | SHA1 | Date | |
|---|---|---|---|
| 5c628b37b7 | |||
| 31661aaaf1 | |||
| 2c208df143 | |||
| 1a6d72dc17 | |||
| df9e21807e | |||
| b1bf4fd4d5 | |||
| f702ecd8c9 | |||
| a19b52e690 | |||
| b73f2e76d0 | |||
| b4534c6ee0 | |||
| 6149249a6c | |||
| d9935e20cb | |||
| 7b0f3b79b1 | |||
| e6612245a5 | |||
| b4f5b91a57 |
@@ -3,8 +3,18 @@ use std::{path::PathBuf, sync::Arc};
|
|||||||
use harmony::{
|
use harmony::{
|
||||||
inventory::Inventory,
|
inventory::Inventory,
|
||||||
maestro::Maestro,
|
maestro::Maestro,
|
||||||
modules::application::{
|
modules::{
|
||||||
RustWebFramework, RustWebapp, RustWebappScore, features::ContinuousDelivery,
|
application::{
|
||||||
|
ApplicationScore, RustWebFramework, RustWebapp,
|
||||||
|
features::{ContinuousDelivery, PrometheusMonitoring},
|
||||||
|
},
|
||||||
|
monitoring::{
|
||||||
|
alert_channel::discord_alert_channel::DiscordWebhook,
|
||||||
|
alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
|
||||||
|
},
|
||||||
|
prometheus::alerts::k8s::{
|
||||||
|
pod::pod_in_failed_state, pvc::high_pvc_fill_rate_over_two_days,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
topology::{K8sAnywhereTopology, Url},
|
topology::{K8sAnywhereTopology, Url},
|
||||||
};
|
};
|
||||||
@@ -12,18 +22,34 @@ use harmony::{
|
|||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() {
|
async fn main() {
|
||||||
env_logger::init();
|
env_logger::init();
|
||||||
let application = RustWebapp {
|
let application = Arc::new(RustWebapp {
|
||||||
name: "harmony-example-rust-webapp".to_string(),
|
name: "harmony-example-rust-webapp".to_string(),
|
||||||
|
domain: Url::Url(url::Url::parse("https://rustapp.harmony.example.com").unwrap()),
|
||||||
project_root: PathBuf::from("./examples/rust/webapp"),
|
project_root: PathBuf::from("./examples/rust/webapp"),
|
||||||
framework: Some(RustWebFramework::Leptos),
|
framework: Some(RustWebFramework::Leptos),
|
||||||
};
|
});
|
||||||
// TODO RustWebappScore should simply take a RustWebApp as config
|
|
||||||
let app = RustWebappScore {
|
let pod_failed = pod_in_failed_state();
|
||||||
name: "Example Rust Webapp".to_string(),
|
let pod_failed_2 = pod_in_failed_state();
|
||||||
domain: Url::Url(url::Url::parse("https://rustapp.harmony.example.com").unwrap()),
|
let pod_failed_3 = pod_in_failed_state();
|
||||||
features: vec![Box::new(ContinuousDelivery {
|
|
||||||
application: Arc::new(application.clone()),
|
let additional_rules = AlertManagerRuleGroup::new("pod-alerts", vec![pod_failed]);
|
||||||
})],
|
let additional_rules_2 = AlertManagerRuleGroup::new("pod-alerts-2", vec![pod_failed_2, pod_failed_3]);
|
||||||
|
let app = ApplicationScore {
|
||||||
|
features: vec![
|
||||||
|
//Box::new(ContinuousDelivery {
|
||||||
|
// application: application.clone(),
|
||||||
|
//}),
|
||||||
|
Box::new(PrometheusMonitoring {
|
||||||
|
application: application.clone(),
|
||||||
|
alert_receivers: vec![Box::new(DiscordWebhook {
|
||||||
|
name: "dummy-discord".to_string(),
|
||||||
|
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
|
||||||
|
})],
|
||||||
|
alert_rules: vec![Box::new(additional_rules), Box::new(additional_rules_2)],
|
||||||
|
}),
|
||||||
|
// TODO add monitoring, backups, multisite ha, etc
|
||||||
|
],
|
||||||
application,
|
application,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ reqwest = { version = "0.11", features = ["blocking", "json"] }
|
|||||||
russh = "0.45.0"
|
russh = "0.45.0"
|
||||||
rust-ipmi = "0.1.1"
|
rust-ipmi = "0.1.1"
|
||||||
semver = "1.0.23"
|
semver = "1.0.23"
|
||||||
serde = { version = "1.0.209", features = ["derive"] }
|
serde = { version = "1.0.209", features = ["derive", "rc"] }
|
||||||
serde_json = "1.0.127"
|
serde_json = "1.0.127"
|
||||||
tokio.workspace = true
|
tokio.workspace = true
|
||||||
derive-new.workspace = true
|
derive-new.workspace = true
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ pub enum InterpretName {
|
|||||||
K3dInstallation,
|
K3dInstallation,
|
||||||
TenantInterpret,
|
TenantInterpret,
|
||||||
Application,
|
Application,
|
||||||
|
ArgoCD,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for InterpretName {
|
impl std::fmt::Display for InterpretName {
|
||||||
@@ -39,6 +40,7 @@ impl std::fmt::Display for InterpretName {
|
|||||||
InterpretName::K3dInstallation => f.write_str("K3dInstallation"),
|
InterpretName::K3dInstallation => f.write_str("K3dInstallation"),
|
||||||
InterpretName::TenantInterpret => f.write_str("Tenant"),
|
InterpretName::TenantInterpret => f.write_str("Tenant"),
|
||||||
InterpretName::Application => f.write_str("Application"),
|
InterpretName::Application => f.write_str("Application"),
|
||||||
|
InterpretName::ArgoCD => f.write_str("ArgoCD"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
59
harmony/src/domain/score_with_dep.rs
Normal file
59
harmony/src/domain/score_with_dep.rs
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
////////////////////
|
||||||
|
/// Working idea
|
||||||
|
///
|
||||||
|
///
|
||||||
|
trait ScoreWithDep<T> {
|
||||||
|
fn create_interpret(&self) -> Box<dyn Interpret<T>>;
|
||||||
|
fn name(&self) -> String;
|
||||||
|
fn get_dependencies(&self) -> Vec<TypeId>; // Force T to impl Installer<TypeId> or something
|
||||||
|
// like that
|
||||||
|
}
|
||||||
|
|
||||||
|
struct PrometheusAlertScore;
|
||||||
|
|
||||||
|
impl <T> ScoreWithDep<T> for PrometheusAlertScore {
|
||||||
|
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn name(&self) -> String {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_dependencies(&self) -> Vec<TypeId> {
|
||||||
|
// We have to find a way to constrait here so at compile time we are only allowed to return
|
||||||
|
// TypeId for types which can be installed by T
|
||||||
|
//
|
||||||
|
// This means, for example that T must implement HelmCommand if the impl <T: HelmCommand> Installable<T> for
|
||||||
|
// KubePrometheus calls for HelmCommand.
|
||||||
|
vec![TypeId::of::<KubePrometheus>()]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
trait Installable{}
|
||||||
|
|
||||||
|
struct KubePrometheus;
|
||||||
|
|
||||||
|
impl Installable for KubePrometheus;
|
||||||
|
|
||||||
|
|
||||||
|
struct Maestro<T> {
|
||||||
|
topology: T
|
||||||
|
}
|
||||||
|
|
||||||
|
impl <T>Maestro<T> {
|
||||||
|
fn execute_store(&self, score: ScoreWithDep<T>) {
|
||||||
|
score.get_dependencies().iter().for_each(|dep| {
|
||||||
|
self.topology.ensure_dependency_ready(dep);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct TopologyWithDep {
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TopologyWithDep {
|
||||||
|
fn ensure_dependency_ready(&self, type_id: TypeId) -> Result<(), String> {
|
||||||
|
self.installer
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -4,8 +4,6 @@ use k8s_openapi::{
|
|||||||
ClusterResourceScope, NamespaceResourceScope,
|
ClusterResourceScope, NamespaceResourceScope,
|
||||||
api::{apps::v1::Deployment, core::v1::Pod},
|
api::{apps::v1::Deployment, core::v1::Pod},
|
||||||
};
|
};
|
||||||
use kube::runtime::conditions;
|
|
||||||
use kube::runtime::wait::await_condition;
|
|
||||||
use kube::{
|
use kube::{
|
||||||
Client, Config, Error, Resource,
|
Client, Config, Error, Resource,
|
||||||
api::{Api, AttachParams, ListParams, Patch, PatchParams, ResourceExt},
|
api::{Api, AttachParams, ListParams, Patch, PatchParams, ResourceExt},
|
||||||
@@ -13,6 +11,11 @@ use kube::{
|
|||||||
core::ErrorResponse,
|
core::ErrorResponse,
|
||||||
runtime::reflector::Lookup,
|
runtime::reflector::Lookup,
|
||||||
};
|
};
|
||||||
|
use kube::{api::DynamicObject, runtime::conditions};
|
||||||
|
use kube::{
|
||||||
|
api::{ApiResource, GroupVersionKind},
|
||||||
|
runtime::wait::await_condition,
|
||||||
|
};
|
||||||
use log::{debug, error, trace};
|
use log::{debug, error, trace};
|
||||||
use serde::de::DeserializeOwned;
|
use serde::de::DeserializeOwned;
|
||||||
use similar::{DiffableStr, TextDiff};
|
use similar::{DiffableStr, TextDiff};
|
||||||
@@ -239,6 +242,54 @@ impl K8sClient {
|
|||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn apply_yaml_many(
|
||||||
|
&self,
|
||||||
|
yaml: &Vec<serde_yaml::Value>,
|
||||||
|
ns: Option<&str>,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
for y in yaml.iter() {
|
||||||
|
self.apply_yaml(y, ns).await?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn apply_yaml(
|
||||||
|
&self,
|
||||||
|
yaml: &serde_yaml::Value,
|
||||||
|
ns: Option<&str>,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let obj: DynamicObject = serde_yaml::from_value(yaml.clone()).expect("TODO do not unwrap");
|
||||||
|
let name = obj.metadata.name.as_ref().expect("YAML must have a name");
|
||||||
|
let namespace = obj
|
||||||
|
.metadata
|
||||||
|
.namespace
|
||||||
|
.as_ref()
|
||||||
|
.expect("YAML must have a namespace");
|
||||||
|
|
||||||
|
// 4. Define the API resource type using the GVK from the object.
|
||||||
|
// The plural name 'applications' is taken from your CRD definition.
|
||||||
|
error!("This only supports argocd application harcoded, very rrrong");
|
||||||
|
let gvk = GroupVersionKind::gvk("argoproj.io", "v1alpha1", "Application");
|
||||||
|
let api_resource = ApiResource::from_gvk_with_plural(&gvk, "applications");
|
||||||
|
|
||||||
|
// 5. Create a dynamic API client for this resource type.
|
||||||
|
let api: Api<DynamicObject> =
|
||||||
|
Api::namespaced_with(self.client.clone(), namespace, &api_resource);
|
||||||
|
|
||||||
|
// 6. Apply the object to the cluster using Server-Side Apply.
|
||||||
|
// This will create the resource if it doesn't exist, or update it if it does.
|
||||||
|
println!(
|
||||||
|
"Applying Argo Application '{}' in namespace '{}'...",
|
||||||
|
name, namespace
|
||||||
|
);
|
||||||
|
let patch_params = PatchParams::apply("harmony"); // Use a unique field manager name
|
||||||
|
let result = api.patch(name, &patch_params, &Patch::Apply(&obj)).await?;
|
||||||
|
|
||||||
|
println!("Successfully applied '{}'.", result.name_any());
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) async fn from_kubeconfig(path: &str) -> Option<K8sClient> {
|
pub(crate) async fn from_kubeconfig(path: &str) -> Option<K8sClient> {
|
||||||
let k = match Kubeconfig::read_from(path) {
|
let k = match Kubeconfig::read_from(path) {
|
||||||
Ok(k) => k,
|
Ok(k) => k,
|
||||||
|
|||||||
@@ -246,7 +246,7 @@ pub struct K8sAnywhereConfig {
|
|||||||
///
|
///
|
||||||
/// default: true
|
/// default: true
|
||||||
pub use_local_k3d: bool,
|
pub use_local_k3d: bool,
|
||||||
harmony_profile: String,
|
pub harmony_profile: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl K8sAnywhereConfig {
|
impl K8sAnywhereConfig {
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
use std::any::Any;
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
|
|
||||||
@@ -9,7 +11,7 @@ use crate::{
|
|||||||
};
|
};
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
pub trait AlertSender: Send + Sync + std::fmt::Debug {
|
pub trait AlertSender: Any + Send + Sync + std::fmt::Debug {
|
||||||
fn name(&self) -> String;
|
fn name(&self) -> String;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ use serde::Serialize;
|
|||||||
|
|
||||||
use crate::topology::Topology;
|
use crate::topology::Topology;
|
||||||
|
|
||||||
use super::Application;
|
|
||||||
/// An ApplicationFeature provided by harmony, such as Backups, Monitoring, MultisiteAvailability,
|
/// An ApplicationFeature provided by harmony, such as Backups, Monitoring, MultisiteAvailability,
|
||||||
/// ContinuousIntegration, ContinuousDelivery
|
/// ContinuousIntegration, ContinuousDelivery
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
|
|||||||
357
harmony/src/modules/application/features/argo_types.rs
Normal file
357
harmony/src/modules/application/features/argo_types.rs
Normal file
@@ -0,0 +1,357 @@
|
|||||||
|
use std::{backtrace, collections::HashMap};
|
||||||
|
|
||||||
|
use k8s_openapi::{Metadata, NamespaceResourceScope, Resource};
|
||||||
|
use log::debug;
|
||||||
|
use serde::Serialize;
|
||||||
|
use serde_yaml::Value;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
use crate::modules::application::features::CDApplicationConfig;
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct Helm {
|
||||||
|
pub pass_credentials: Option<bool>,
|
||||||
|
pub parameters: Vec<Value>,
|
||||||
|
pub file_parameters: Vec<Value>,
|
||||||
|
pub release_name: Option<String>,
|
||||||
|
pub value_files: Vec<String>,
|
||||||
|
pub ignore_missing_value_files: Option<bool>,
|
||||||
|
pub values: Option<String>,
|
||||||
|
pub values_object: Option<Value>,
|
||||||
|
pub skip_crds: Option<bool>,
|
||||||
|
pub skip_schema_validation: Option<bool>,
|
||||||
|
pub version: Option<String>,
|
||||||
|
pub kube_version: Option<String>,
|
||||||
|
pub api_versions: Vec<String>,
|
||||||
|
pub namespace: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct Source {
|
||||||
|
pub repo_url: Url,
|
||||||
|
pub target_revision: Option<String>,
|
||||||
|
pub chart: String,
|
||||||
|
pub helm: Helm,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct Automated {
|
||||||
|
pub prune: bool,
|
||||||
|
pub self_heal: bool,
|
||||||
|
pub allow_empty: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct Backoff {
|
||||||
|
pub duration: String,
|
||||||
|
pub factor: u32,
|
||||||
|
pub max_duration: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct Retry {
|
||||||
|
pub limit: u32,
|
||||||
|
pub backoff: Backoff,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct SyncPolicy {
|
||||||
|
pub automated: Automated,
|
||||||
|
pub sync_options: Vec<String>,
|
||||||
|
pub retry: Retry,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct ArgoApplication {
|
||||||
|
pub name: String,
|
||||||
|
pub namespace: Option<String>,
|
||||||
|
pub project: String,
|
||||||
|
pub source: Source,
|
||||||
|
pub sync_policy: SyncPolicy,
|
||||||
|
pub revision_history_limit: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for ArgoApplication {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
name: Default::default(),
|
||||||
|
namespace: Default::default(),
|
||||||
|
project: Default::default(),
|
||||||
|
source: Source {
|
||||||
|
repo_url: Url::parse("http://asdf").expect("Couldn't parse to URL"),
|
||||||
|
target_revision: None,
|
||||||
|
chart: "".to_string(),
|
||||||
|
helm: Helm {
|
||||||
|
pass_credentials: None,
|
||||||
|
parameters: vec![],
|
||||||
|
file_parameters: vec![],
|
||||||
|
release_name: None,
|
||||||
|
value_files: vec![],
|
||||||
|
ignore_missing_value_files: None,
|
||||||
|
values: None,
|
||||||
|
values_object: None,
|
||||||
|
skip_crds: None,
|
||||||
|
skip_schema_validation: None,
|
||||||
|
version: None,
|
||||||
|
kube_version: None,
|
||||||
|
api_versions: vec![],
|
||||||
|
namespace: None,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
sync_policy: SyncPolicy {
|
||||||
|
automated: Automated {
|
||||||
|
prune: false,
|
||||||
|
self_heal: false,
|
||||||
|
allow_empty: false,
|
||||||
|
},
|
||||||
|
sync_options: vec![],
|
||||||
|
retry: Retry {
|
||||||
|
limit: 5,
|
||||||
|
backoff: Backoff {
|
||||||
|
duration: "5s".to_string(),
|
||||||
|
factor: 2,
|
||||||
|
max_duration: "3m".to_string(),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
revision_history_limit: 10,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<CDApplicationConfig> for ArgoApplication {
|
||||||
|
fn from(value: CDApplicationConfig) -> Self {
|
||||||
|
Self {
|
||||||
|
name: value.name,
|
||||||
|
namespace: Some(value.namespace),
|
||||||
|
project: "default".to_string(),
|
||||||
|
source: Source {
|
||||||
|
repo_url: Url::parse(value.helm_chart_repo_url.to_string().as_str())
|
||||||
|
.expect("couldn't convert to URL"),
|
||||||
|
target_revision: None,
|
||||||
|
chart: value.helm_chart_name,
|
||||||
|
helm: Helm {
|
||||||
|
pass_credentials: None,
|
||||||
|
parameters: vec![],
|
||||||
|
file_parameters: vec![],
|
||||||
|
release_name: None,
|
||||||
|
value_files: vec![],
|
||||||
|
ignore_missing_value_files: None,
|
||||||
|
values: None,
|
||||||
|
values_object: Some(value.values_overrides),
|
||||||
|
skip_crds: None,
|
||||||
|
skip_schema_validation: None,
|
||||||
|
version: None,
|
||||||
|
kube_version: None,
|
||||||
|
api_versions: vec![],
|
||||||
|
namespace: None,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
sync_policy: SyncPolicy {
|
||||||
|
automated: Automated {
|
||||||
|
prune: false,
|
||||||
|
self_heal: false,
|
||||||
|
allow_empty: true,
|
||||||
|
},
|
||||||
|
sync_options: vec![],
|
||||||
|
retry: Retry {
|
||||||
|
limit: 5,
|
||||||
|
backoff: Backoff {
|
||||||
|
duration: "5s".to_string(),
|
||||||
|
factor: 2,
|
||||||
|
max_duration: "3m".to_string(),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
..Self::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ArgoApplication {
|
||||||
|
pub fn to_yaml(&self) -> serde_yaml::Value {
|
||||||
|
let name = &self.name;
|
||||||
|
let namespace = if let Some(ns) = self.namespace.as_ref() {
|
||||||
|
&ns
|
||||||
|
} else {
|
||||||
|
"argocd"
|
||||||
|
};
|
||||||
|
let project = &self.project;
|
||||||
|
let source = &self.source;
|
||||||
|
|
||||||
|
let yaml_str = format!(
|
||||||
|
r#"
|
||||||
|
apiVersion: argoproj.io/v1alpha1
|
||||||
|
kind: Application
|
||||||
|
metadata:
|
||||||
|
name: {name}
|
||||||
|
# You'll usually want to add your resources to the argocd namespace.
|
||||||
|
namespace: {namespace}
|
||||||
|
spec:
|
||||||
|
# The project the application belongs to.
|
||||||
|
project: {project}
|
||||||
|
|
||||||
|
# Destination cluster and namespace to deploy the application
|
||||||
|
destination:
|
||||||
|
# cluster API URL
|
||||||
|
server: https://kubernetes.default.svc
|
||||||
|
# or cluster name
|
||||||
|
# name: in-cluster
|
||||||
|
# The namespace will only be set for namespace-scoped resources that have not set a value for .metadata.namespace
|
||||||
|
namespace: {namespace}
|
||||||
|
|
||||||
|
"#
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut yaml_value: Value =
|
||||||
|
serde_yaml::from_str(yaml_str.as_str()).expect("couldn't parse string to YAML");
|
||||||
|
|
||||||
|
let mut spec = yaml_value
|
||||||
|
.get_mut("spec")
|
||||||
|
.expect("couldn't get spec from yaml")
|
||||||
|
.as_mapping_mut()
|
||||||
|
.expect("couldn't unwrap spec as mutable mapping");
|
||||||
|
|
||||||
|
let source =
|
||||||
|
serde_yaml::to_value(&self.source).expect("couldn't serialize source to value");
|
||||||
|
let sync_policy = serde_yaml::to_value(&self.sync_policy)
|
||||||
|
.expect("couldn't serialize sync_policy to value");
|
||||||
|
let revision_history_limit = serde_yaml::to_value(&self.revision_history_limit)
|
||||||
|
.expect("couldn't serialize revision_history_limit to value");
|
||||||
|
|
||||||
|
spec.insert(
|
||||||
|
serde_yaml::to_value("source").expect("string to value failed"),
|
||||||
|
source,
|
||||||
|
);
|
||||||
|
spec.insert(
|
||||||
|
serde_yaml::to_value("syncPolicy").expect("string to value failed"),
|
||||||
|
sync_policy,
|
||||||
|
);
|
||||||
|
spec.insert(
|
||||||
|
serde_yaml::to_value("revisionHistoryLimit")
|
||||||
|
.expect("couldn't convert str to yaml value"),
|
||||||
|
revision_history_limit,
|
||||||
|
);
|
||||||
|
|
||||||
|
debug!("spec: {}", serde_yaml::to_string(spec).unwrap());
|
||||||
|
debug!(
|
||||||
|
"entire yaml_value: {}",
|
||||||
|
serde_yaml::to_string(&yaml_value).unwrap()
|
||||||
|
);
|
||||||
|
|
||||||
|
yaml_value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
use crate::modules::application::features::{
|
||||||
|
ArgoApplication, Automated, Backoff, Helm, Retry, Source, SyncPolicy,
|
||||||
|
};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_argo_application_to_yaml_happy_path() {
|
||||||
|
let app = ArgoApplication {
|
||||||
|
name: "test".to_string(),
|
||||||
|
namespace: Some("test-ns".to_string()),
|
||||||
|
project: "test-project".to_string(),
|
||||||
|
source: Source {
|
||||||
|
repo_url: Url::parse("http://test").unwrap(),
|
||||||
|
target_revision: None,
|
||||||
|
chart: "test-chart".to_string(),
|
||||||
|
helm: Helm {
|
||||||
|
pass_credentials: None,
|
||||||
|
parameters: vec![],
|
||||||
|
file_parameters: vec![],
|
||||||
|
release_name: Some("test-release-neame".to_string()),
|
||||||
|
value_files: vec![],
|
||||||
|
ignore_missing_value_files: None,
|
||||||
|
values: None,
|
||||||
|
values_object: None,
|
||||||
|
skip_crds: None,
|
||||||
|
skip_schema_validation: None,
|
||||||
|
version: None,
|
||||||
|
kube_version: None,
|
||||||
|
api_versions: vec![],
|
||||||
|
namespace: None,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
sync_policy: SyncPolicy {
|
||||||
|
automated: Automated {
|
||||||
|
prune: false,
|
||||||
|
self_heal: false,
|
||||||
|
allow_empty: false,
|
||||||
|
},
|
||||||
|
sync_options: vec![],
|
||||||
|
retry: Retry {
|
||||||
|
limit: 5,
|
||||||
|
backoff: Backoff {
|
||||||
|
duration: "5s".to_string(),
|
||||||
|
factor: 2,
|
||||||
|
max_duration: "3m".to_string(),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
revision_history_limit: 10,
|
||||||
|
};
|
||||||
|
|
||||||
|
let expected_yaml_output = r#"apiVersion: argoproj.io/v1alpha1
|
||||||
|
kind: Application
|
||||||
|
metadata:
|
||||||
|
name: test
|
||||||
|
namespace: test-ns
|
||||||
|
spec:
|
||||||
|
project: test-project
|
||||||
|
destination:
|
||||||
|
server: https://kubernetes.default.svc
|
||||||
|
namespace: test-ns
|
||||||
|
source:
|
||||||
|
repoUrl: http://test/
|
||||||
|
targetRevision: null
|
||||||
|
chart: test-chart
|
||||||
|
helm:
|
||||||
|
passCredentials: null
|
||||||
|
parameters: []
|
||||||
|
fileParameters: []
|
||||||
|
releaseName: test-release-neame
|
||||||
|
valueFiles: []
|
||||||
|
ignoreMissingValueFiles: null
|
||||||
|
values: null
|
||||||
|
valuesObject: null
|
||||||
|
skipCrds: null
|
||||||
|
skipSchemaValidation: null
|
||||||
|
version: null
|
||||||
|
kubeVersion: null
|
||||||
|
apiVersions: []
|
||||||
|
namespace: null
|
||||||
|
syncPolicy:
|
||||||
|
automated:
|
||||||
|
prune: false
|
||||||
|
selfHeal: false
|
||||||
|
allowEmpty: false
|
||||||
|
syncOptions: []
|
||||||
|
retry:
|
||||||
|
limit: 5
|
||||||
|
backoff:
|
||||||
|
duration: 5s
|
||||||
|
factor: 2
|
||||||
|
maxDuration: 3m
|
||||||
|
revisionHistoryLimit: 10"#;
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
expected_yaml_output.trim(),
|
||||||
|
serde_yaml::to_string(&app.clone().to_yaml())
|
||||||
|
.unwrap()
|
||||||
|
.trim()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,7 +2,7 @@ use std::{io::Write, process::Command, sync::Arc};
|
|||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use log::{error, info};
|
use log::{error, info};
|
||||||
use serde_json::Value;
|
use serde_yaml::Value;
|
||||||
use tempfile::NamedTempFile;
|
use tempfile::NamedTempFile;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
@@ -10,11 +10,14 @@ use crate::{
|
|||||||
data::Version,
|
data::Version,
|
||||||
inventory::Inventory,
|
inventory::Inventory,
|
||||||
modules::{
|
modules::{
|
||||||
application::{Application, ApplicationFeature, HelmPackage, OCICompliant},
|
application::{
|
||||||
|
Application, ApplicationFeature, HelmPackage, OCICompliant,
|
||||||
|
features::{ArgoApplication, ArgoHelmScore},
|
||||||
|
},
|
||||||
helm::chart::HelmChartScore,
|
helm::chart::HelmChartScore,
|
||||||
},
|
},
|
||||||
score::Score,
|
score::Score,
|
||||||
topology::{DeploymentTarget, HelmCommand, MultiTargetTopology, Topology, Url},
|
topology::{DeploymentTarget, HelmCommand, K8sclient, MultiTargetTopology, Topology, Url},
|
||||||
};
|
};
|
||||||
|
|
||||||
/// ContinuousDelivery in Harmony provides this functionality :
|
/// ContinuousDelivery in Harmony provides this functionality :
|
||||||
@@ -139,7 +142,7 @@ impl<A: OCICompliant + HelmPackage> ContinuousDelivery<A> {
|
|||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl<
|
impl<
|
||||||
A: OCICompliant + HelmPackage + Clone + 'static,
|
A: OCICompliant + HelmPackage + Clone + 'static,
|
||||||
T: Topology + HelmCommand + MultiTargetTopology + 'static,
|
T: Topology + HelmCommand + MultiTargetTopology + K8sclient + 'static,
|
||||||
> ApplicationFeature<T> for ContinuousDelivery<A>
|
> ApplicationFeature<T> for ContinuousDelivery<A>
|
||||||
{
|
{
|
||||||
async fn ensure_installed(&self, topology: &T) -> Result<(), String> {
|
async fn ensure_installed(&self, topology: &T) -> Result<(), String> {
|
||||||
@@ -150,12 +153,16 @@ impl<
|
|||||||
"TODO reverse helm chart packaging and docker image build. I put helm package first for faster iterations"
|
"TODO reverse helm chart packaging and docker image build. I put helm package first for faster iterations"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// TODO Write CI/CD workflow files
|
||||||
|
// we can autotedect the CI type using the remote url (default to github action for github
|
||||||
|
// url, etc..)
|
||||||
|
// Or ask for it when unknown
|
||||||
|
|
||||||
let helm_chart = self.application.build_push_helm_package(&image).await?;
|
let helm_chart = self.application.build_push_helm_package(&image).await?;
|
||||||
info!("Pushed new helm chart {helm_chart}");
|
info!("Pushed new helm chart {helm_chart}");
|
||||||
|
|
||||||
// let image = self.application.build_push_oci_image().await?;
|
let image = self.application.build_push_oci_image().await?;
|
||||||
// info!("Pushed new docker image {image}");
|
info!("Pushed new docker image {image}");
|
||||||
error!("uncomment above");
|
|
||||||
|
|
||||||
info!("Installing ContinuousDelivery feature");
|
info!("Installing ContinuousDelivery feature");
|
||||||
// TODO this is a temporary hack for demo purposes, the deployment target should be driven
|
// TODO this is a temporary hack for demo purposes, the deployment target should be driven
|
||||||
@@ -178,29 +185,33 @@ impl<
|
|||||||
}
|
}
|
||||||
target => {
|
target => {
|
||||||
info!("Deploying to target {target:?}");
|
info!("Deploying to target {target:?}");
|
||||||
let cd_server = HelmChartScore {
|
let score = ArgoHelmScore {
|
||||||
namespace: todo!(
|
namespace: "harmonydemo-staging".to_string(),
|
||||||
"ArgoCD Helm chart with proper understanding of Tenant, see how Will did it for Monitoring for now"
|
openshift: true,
|
||||||
),
|
domain: "argo.harmonydemo.apps.st.mcd".to_string(),
|
||||||
release_name: todo!("argocd helm chart whatever"),
|
argo_apps: vec![ArgoApplication::from(CDApplicationConfig {
|
||||||
chart_name: todo!(),
|
// helm pull oci://hub.nationtech.io/harmony/harmony-example-rust-webapp-chart/harmony-example-rust-webapp-chart --version 0.1.0
|
||||||
chart_version: todo!(),
|
version: Version::from("0.1.0").unwrap(),
|
||||||
values_overrides: todo!(),
|
helm_chart_repo_url: Url::Url(url::Url::parse("oci://hub.nationtech.io/harmony/harmony-example-rust-webapp-chart/harmony-example-rust-webapp-chart").unwrap()),
|
||||||
values_yaml: todo!(),
|
helm_chart_name: "harmony-example-rust-webapp-chart".to_string(),
|
||||||
create_namespace: todo!(),
|
values_overrides: Value::Null,
|
||||||
install_only: todo!(),
|
name: "harmony-demo-rust-webapp".to_string(),
|
||||||
repository: todo!(),
|
namespace: "harmonydemo-staging".to_string(),
|
||||||
|
})],
|
||||||
};
|
};
|
||||||
let interpret = cd_server.create_interpret();
|
score
|
||||||
interpret.execute(&Inventory::empty(), topology);
|
.create_interpret()
|
||||||
|
.execute(&Inventory::empty(), topology)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
todo!("1. Create ArgoCD score that installs argo using helm chart, see if Taha's already done it
|
todo!("1. Create ArgoCD score that installs argo using helm chart, see if Taha's already done it
|
||||||
- [X] Package app (docker image, helm chart)
|
- [X] Package app (docker image, helm chart)
|
||||||
- [X] Push to registry
|
- [X] Push to registry
|
||||||
- [ ] Push only if staging or prod
|
- [X] Push only if staging or prod
|
||||||
- [ ] Deploy to local k3d when target is local
|
- [X] Deploy to local k3d when target is local
|
||||||
- [ ] Poke Argo
|
- [ ] Poke Argo
|
||||||
- [ ] Ensure app is up")
|
- [ ] Ensure app is up")
|
||||||
}
|
}
|
||||||
@@ -212,9 +223,12 @@ impl<
|
|||||||
/// For now this is entirely bound to K8s / ArgoCD, will have to be revisited when we support
|
/// For now this is entirely bound to K8s / ArgoCD, will have to be revisited when we support
|
||||||
/// more CD systems
|
/// more CD systems
|
||||||
pub struct CDApplicationConfig {
|
pub struct CDApplicationConfig {
|
||||||
version: Version,
|
pub version: Version,
|
||||||
helm_chart_url: Url,
|
pub helm_chart_repo_url: Url,
|
||||||
values_overrides: Value,
|
pub helm_chart_name: String,
|
||||||
|
pub values_overrides: Value,
|
||||||
|
pub name: String,
|
||||||
|
pub namespace: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait ContinuousDeliveryApplication {
|
pub trait ContinuousDeliveryApplication {
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use async_trait::async_trait;
|
|||||||
use log::info;
|
use log::info;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
modules::application::{Application, ApplicationFeature},
|
modules::application::ApplicationFeature,
|
||||||
topology::{K8sclient, Topology},
|
topology::{K8sclient, Topology},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
1002
harmony/src/modules/application/features/helm_argocd_score.rs
Normal file
1002
harmony/src/modules/application/features/helm_argocd_score.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -6,3 +6,9 @@ pub use monitoring::*;
|
|||||||
|
|
||||||
mod continuous_delivery;
|
mod continuous_delivery;
|
||||||
pub use continuous_delivery::*;
|
pub use continuous_delivery::*;
|
||||||
|
|
||||||
|
mod helm_argocd_score;
|
||||||
|
pub use helm_argocd_score::*;
|
||||||
|
|
||||||
|
mod argo_types;
|
||||||
|
pub use argo_types::*;
|
||||||
|
|||||||
@@ -1,19 +1,53 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use log::info;
|
use log::info;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
modules::application::{Application, ApplicationFeature},
|
inventory::Inventory,
|
||||||
topology::{HelmCommand, Topology},
|
modules::{
|
||||||
|
application::{Application, ApplicationFeature},
|
||||||
|
monitoring::{
|
||||||
|
application_monitoring::k8s_application_monitoring_score::ApplicationPrometheusMonitoringScore,
|
||||||
|
kube_prometheus::types::{NamespaceSelector, ServiceMonitor}, prometheus::prometheus::Prometheus,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
score::Score,
|
||||||
|
topology::{oberservability::monitoring::{AlertReceiver, AlertRule, AlertSender}, tenant::TenantManager, HelmCommand, K8sclient, Topology},
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Debug, Default, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Monitoring {}
|
pub struct PrometheusMonitoring {
|
||||||
|
pub application: Arc<dyn Application>,
|
||||||
|
pub alert_receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>,
|
||||||
|
pub alert_rules: Vec<Box<dyn AlertRule<Prometheus>>>,
|
||||||
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl<T: Topology + HelmCommand + 'static> ApplicationFeature<T> for Monitoring {
|
impl<T: Topology + HelmCommand + 'static + TenantManager> ApplicationFeature<T> for PrometheusMonitoring {
|
||||||
async fn ensure_installed(&self, _topology: &T) -> Result<(), String> {
|
async fn ensure_installed(&self, topology: &T) -> Result<(), String> {
|
||||||
info!("Ensuring monitoring is available for application");
|
info!("Ensuring monitoring is available for application");
|
||||||
todo!("create and execute k8s prometheus score, depends on Will's work")
|
let ns = self.application.name();
|
||||||
|
let mut service_monitor = ServiceMonitor::default();
|
||||||
|
service_monitor.name = ns.clone();
|
||||||
|
service_monitor.namespace = ns.clone();
|
||||||
|
service_monitor.namespace_selector = Some(NamespaceSelector {
|
||||||
|
any: true,
|
||||||
|
match_names: vec![ns.clone()],
|
||||||
|
});
|
||||||
|
let alerting_score = ApplicationPrometheusMonitoringScore {
|
||||||
|
namespace: ns,
|
||||||
|
receivers: self.alert_receivers.clone(),
|
||||||
|
rules: self.alert_rules.clone(),
|
||||||
|
service_monitors: vec![service_monitor],
|
||||||
|
};
|
||||||
|
|
||||||
|
alerting_score
|
||||||
|
.create_interpret()
|
||||||
|
.execute(&Inventory::empty(), topology)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
fn name(&self) -> String {
|
fn name(&self) -> String {
|
||||||
"Monitoring".to_string()
|
"Monitoring".to_string()
|
||||||
|
|||||||
@@ -23,13 +23,13 @@ pub trait Application: std::fmt::Debug + Send + Sync {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct ApplicationInterpret<T: Topology + std::fmt::Debug> {
|
pub struct ApplicationInterpret<A: Application, T: Topology + std::fmt::Debug> {
|
||||||
features: Vec<Box<dyn ApplicationFeature<T>>>,
|
features: Vec<Box<dyn ApplicationFeature<T>>>,
|
||||||
application: Arc<Box<dyn Application>>,
|
application: Arc<A>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl<T: Topology + std::fmt::Debug> Interpret<T> for ApplicationInterpret<T> {
|
impl<A: Application, T: Topology + std::fmt::Debug> Interpret<T> for ApplicationInterpret<A, T> {
|
||||||
async fn execute(
|
async fn execute(
|
||||||
&self,
|
&self,
|
||||||
_inventory: &Inventory,
|
_inventory: &Inventory,
|
||||||
|
|||||||
@@ -19,23 +19,30 @@ use crate::{
|
|||||||
use super::{Application, ApplicationFeature, ApplicationInterpret, HelmPackage, OCICompliant};
|
use super::{Application, ApplicationFeature, ApplicationInterpret, HelmPackage, OCICompliant};
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Clone)]
|
#[derive(Debug, Serialize, Clone)]
|
||||||
pub struct RustWebappScore<T: Topology + Clone + Serialize> {
|
pub struct ApplicationScore<A: Application + Serialize, T: Topology + Clone + Serialize>
|
||||||
pub name: String,
|
where
|
||||||
pub domain: Url,
|
Arc<A>: Serialize + Clone,
|
||||||
|
{
|
||||||
pub features: Vec<Box<dyn ApplicationFeature<T>>>,
|
pub features: Vec<Box<dyn ApplicationFeature<T>>>,
|
||||||
pub application: RustWebapp,
|
pub application: Arc<A>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: Topology + std::fmt::Debug + Clone + Serialize + 'static> Score<T> for RustWebappScore<T> {
|
impl<
|
||||||
|
A: Application + Serialize + Clone + 'static,
|
||||||
|
T: Topology + std::fmt::Debug + Clone + Serialize + 'static,
|
||||||
|
> Score<T> for ApplicationScore<A, T>
|
||||||
|
where
|
||||||
|
Arc<A>: Serialize,
|
||||||
|
{
|
||||||
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
|
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
|
||||||
Box::new(ApplicationInterpret {
|
Box::new(ApplicationInterpret {
|
||||||
features: self.features.clone(),
|
features: self.features.clone(),
|
||||||
application: Arc::new(Box::new(self.application.clone())),
|
application: self.application.clone(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn name(&self) -> String {
|
fn name(&self) -> String {
|
||||||
format!("{}-RustWebapp", self.name)
|
format!("Application: {}", self.application.name())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -47,6 +54,7 @@ pub enum RustWebFramework {
|
|||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
pub struct RustWebapp {
|
pub struct RustWebapp {
|
||||||
pub name: String,
|
pub name: String,
|
||||||
|
pub domain: Url,
|
||||||
/// The path to the root of the Rust project to be containerized.
|
/// The path to the root of the Rust project to be containerized.
|
||||||
pub project_root: PathBuf,
|
pub project_root: PathBuf,
|
||||||
pub framework: Option<RustWebFramework>,
|
pub framework: Option<RustWebFramework>,
|
||||||
|
|||||||
@@ -220,7 +220,6 @@ impl<T: Topology + HelmCommand> Interpret<T> for HelmChartInterpret {
|
|||||||
yaml_path,
|
yaml_path,
|
||||||
Some(&helm_options),
|
Some(&helm_options),
|
||||||
);
|
);
|
||||||
|
|
||||||
let status = match res {
|
let status = match res {
|
||||||
Ok(status) => status,
|
Ok(status) => status,
|
||||||
Err(err) => return Err(InterpretError::new(err.to_string())),
|
Err(err) => return Err(InterpretError::new(err.to_string())),
|
||||||
|
|||||||
@@ -1,14 +1,22 @@
|
|||||||
|
use std::any::Any;
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use serde_yaml::{Mapping, Value};
|
use serde_yaml::{Mapping, Value};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
interpret::{InterpretError, Outcome},
|
interpret::{InterpretError, Outcome},
|
||||||
modules::monitoring::kube_prometheus::{
|
modules::monitoring::{
|
||||||
prometheus::{Prometheus, PrometheusReceiver},
|
kube_prometheus::{
|
||||||
types::{AlertChannelConfig, AlertManagerChannelConfig},
|
prometheus::{KubePrometheus, KubePrometheusReceiver},
|
||||||
|
types::{AlertChannelConfig, AlertManagerChannelConfig},
|
||||||
|
},
|
||||||
|
prometheus::prometheus::{Prometheus, PrometheusReceiver},
|
||||||
|
},
|
||||||
|
topology::{
|
||||||
|
Url,
|
||||||
|
oberservability::monitoring::{AlertReceiver, AlertSender},
|
||||||
},
|
},
|
||||||
topology::{Url, oberservability::monitoring::AlertReceiver},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
@@ -37,6 +45,26 @@ impl PrometheusReceiver for DiscordWebhook {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl AlertReceiver<KubePrometheus> for DiscordWebhook {
|
||||||
|
async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> {
|
||||||
|
sender.install_receiver(self).await
|
||||||
|
}
|
||||||
|
fn clone_box(&self) -> Box<dyn AlertReceiver<KubePrometheus>> {
|
||||||
|
Box::new(self.clone())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl KubePrometheusReceiver for DiscordWebhook {
|
||||||
|
fn name(&self) -> String {
|
||||||
|
self.name.clone()
|
||||||
|
}
|
||||||
|
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
|
||||||
|
self.get_config().await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl AlertChannelConfig for DiscordWebhook {
|
impl AlertChannelConfig for DiscordWebhook {
|
||||||
async fn get_config(&self) -> AlertManagerChannelConfig {
|
async fn get_config(&self) -> AlertManagerChannelConfig {
|
||||||
|
|||||||
@@ -4,9 +4,12 @@ use serde_yaml::{Mapping, Value};
|
|||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
interpret::{InterpretError, Outcome},
|
interpret::{InterpretError, Outcome},
|
||||||
modules::monitoring::kube_prometheus::{
|
modules::monitoring::{
|
||||||
prometheus::{Prometheus, PrometheusReceiver},
|
kube_prometheus::{
|
||||||
types::{AlertChannelConfig, AlertManagerChannelConfig},
|
prometheus::{KubePrometheus, KubePrometheusReceiver},
|
||||||
|
types::{AlertChannelConfig, AlertManagerChannelConfig},
|
||||||
|
},
|
||||||
|
prometheus::prometheus::{Prometheus, PrometheusReceiver},
|
||||||
},
|
},
|
||||||
topology::{Url, oberservability::monitoring::AlertReceiver},
|
topology::{Url, oberservability::monitoring::AlertReceiver},
|
||||||
};
|
};
|
||||||
@@ -36,6 +39,25 @@ impl PrometheusReceiver for WebhookReceiver {
|
|||||||
self.get_config().await
|
self.get_config().await
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#[async_trait]
|
||||||
|
impl AlertReceiver<KubePrometheus> for WebhookReceiver {
|
||||||
|
async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> {
|
||||||
|
sender.install_receiver(self).await
|
||||||
|
}
|
||||||
|
fn clone_box(&self) -> Box<dyn AlertReceiver<KubePrometheus>> {
|
||||||
|
Box::new(self.clone())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl KubePrometheusReceiver for WebhookReceiver {
|
||||||
|
fn name(&self) -> String {
|
||||||
|
self.name.clone()
|
||||||
|
}
|
||||||
|
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
|
||||||
|
self.get_config().await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl AlertChannelConfig for WebhookReceiver {
|
impl AlertChannelConfig for WebhookReceiver {
|
||||||
|
|||||||
@@ -5,13 +5,26 @@ use serde::Serialize;
|
|||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
interpret::{InterpretError, Outcome},
|
interpret::{InterpretError, Outcome},
|
||||||
modules::monitoring::kube_prometheus::{
|
modules::monitoring::{
|
||||||
prometheus::{Prometheus, PrometheusRule},
|
kube_prometheus::{
|
||||||
types::{AlertGroup, AlertManagerAdditionalPromRules},
|
prometheus::{KubePrometheus, KubePrometheusRule},
|
||||||
|
types::{AlertGroup, AlertManagerAdditionalPromRules},
|
||||||
|
},
|
||||||
|
prometheus::prometheus::{Prometheus, PrometheusRule},
|
||||||
},
|
},
|
||||||
topology::oberservability::monitoring::AlertRule,
|
topology::oberservability::monitoring::AlertRule,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl AlertRule<KubePrometheus> for AlertManagerRuleGroup {
|
||||||
|
async fn install(&self, sender: &KubePrometheus) -> Result<Outcome, InterpretError> {
|
||||||
|
sender.install_rule(&self).await
|
||||||
|
}
|
||||||
|
fn clone_box(&self) -> Box<dyn AlertRule<KubePrometheus>> {
|
||||||
|
Box::new(self.clone())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl AlertRule<Prometheus> for AlertManagerRuleGroup {
|
impl AlertRule<Prometheus> for AlertManagerRuleGroup {
|
||||||
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
|
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
|
||||||
@@ -41,6 +54,25 @@ impl PrometheusRule for AlertManagerRuleGroup {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#[async_trait]
|
||||||
|
impl KubePrometheusRule for AlertManagerRuleGroup {
|
||||||
|
fn name(&self) -> String {
|
||||||
|
self.name.clone()
|
||||||
|
}
|
||||||
|
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules {
|
||||||
|
let mut additional_prom_rules = BTreeMap::new();
|
||||||
|
|
||||||
|
additional_prom_rules.insert(
|
||||||
|
self.name.clone(),
|
||||||
|
AlertGroup {
|
||||||
|
groups: vec![self.clone()],
|
||||||
|
},
|
||||||
|
);
|
||||||
|
AlertManagerAdditionalPromRules {
|
||||||
|
rules: additional_prom_rules,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl AlertManagerRuleGroup {
|
impl AlertManagerRuleGroup {
|
||||||
pub fn new(name: &str, rules: Vec<PrometheusAlertRule>) -> AlertManagerRuleGroup {
|
pub fn new(name: &str, rules: Vec<PrometheusAlertRule>) -> AlertManagerRuleGroup {
|
||||||
|
|||||||
@@ -0,0 +1,45 @@
|
|||||||
|
use std::sync::{Arc, Mutex};
|
||||||
|
|
||||||
|
use log::debug;
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
modules::monitoring::{
|
||||||
|
kube_prometheus::types::ServiceMonitor,
|
||||||
|
prometheus::{prometheus::Prometheus, prometheus_config::HelmPrometheusConfig},
|
||||||
|
},
|
||||||
|
score::Score,
|
||||||
|
topology::{
|
||||||
|
oberservability::monitoring::{AlertReceiver, AlertRule, AlertingInterpret}, tenant::TenantManager, HelmCommand, K8sclient, Topology
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize)]
|
||||||
|
pub struct ApplicationPrometheusMonitoringScore {
|
||||||
|
pub namespace: String,
|
||||||
|
pub receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>,
|
||||||
|
pub rules: Vec<Box<dyn AlertRule<Prometheus>>>,
|
||||||
|
pub service_monitors: Vec<ServiceMonitor>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Topology + HelmCommand + TenantManager> Score<T> for ApplicationPrometheusMonitoringScore {
|
||||||
|
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
|
||||||
|
let config = Arc::new(Mutex::new(HelmPrometheusConfig::new()));
|
||||||
|
config
|
||||||
|
.try_lock()
|
||||||
|
.expect("couldn't lock config")
|
||||||
|
.additional_service_monitors = self.service_monitors.clone();
|
||||||
|
let ns = self.namespace.clone();
|
||||||
|
|
||||||
|
config.try_lock().expect("couldn't lock config").namespace = Some(ns.clone());
|
||||||
|
debug!("set namespace to {}", ns);
|
||||||
|
Box::new(AlertingInterpret {
|
||||||
|
sender: Prometheus { config },
|
||||||
|
receivers: self.receivers.clone(),
|
||||||
|
rules: self.rules.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
fn name(&self) -> String {
|
||||||
|
"ApplicationPrometheusMonitoringScore".to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
pub mod k8s_application_monitoring_score;
|
||||||
64
harmony/src/modules/monitoring/grafana/helm/helm_grafana.rs
Normal file
64
harmony/src/modules/monitoring/grafana/helm/helm_grafana.rs
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
use std::str::FromStr;
|
||||||
|
use non_blank_string_rs::NonBlankString;
|
||||||
|
use crate::modules::helm::chart::HelmChartScore;
|
||||||
|
|
||||||
|
pub fn grafana_helm_chart_score(ns: &str) -> HelmChartScore {
|
||||||
|
let values = format!(
|
||||||
|
r#"
|
||||||
|
rbac:
|
||||||
|
namespaced: true
|
||||||
|
|
||||||
|
datasources:
|
||||||
|
datasources.yaml:
|
||||||
|
apiVersion: 1
|
||||||
|
datasources:
|
||||||
|
- name: Prometheus
|
||||||
|
type: prometheus
|
||||||
|
access: proxy
|
||||||
|
url: http://prometheus-server.{ns}.svc.cluster.local
|
||||||
|
isDefault: true
|
||||||
|
|
||||||
|
dashboardProviders:
|
||||||
|
dashboardproviders.yaml:
|
||||||
|
apiVersion: 1
|
||||||
|
providers:
|
||||||
|
- name: 'default'
|
||||||
|
orgId: 1
|
||||||
|
folder: ''
|
||||||
|
type: file
|
||||||
|
disableDeletion: false
|
||||||
|
updateIntervalSeconds: 10
|
||||||
|
allowUiUpdates: true
|
||||||
|
editable: true
|
||||||
|
options:
|
||||||
|
path: /var/lib/grafana/dashboards/default
|
||||||
|
|
||||||
|
dashboards:
|
||||||
|
default:
|
||||||
|
compute-usage:
|
||||||
|
url: https://grafana.com/api/dashboards/315/revisions/1/download
|
||||||
|
pod-health:
|
||||||
|
url: https://grafana.com/api/dashboards/15758/revisions/1/download
|
||||||
|
namespace-resources:
|
||||||
|
url: https://grafana.com/api/dashboards/9809/revisions/1/download
|
||||||
|
namespace-resources-vs-quotas:
|
||||||
|
url: https://grafana.com/api/dashboards/17044/revisions/1/download
|
||||||
|
persistent-volume-usage:
|
||||||
|
url: https://grafana.com/api/dashboards/7685/revisions/1/download
|
||||||
|
"#,
|
||||||
|
ns = ns
|
||||||
|
);
|
||||||
|
|
||||||
|
HelmChartScore {
|
||||||
|
namespace: Some(NonBlankString::from_str(ns).unwrap()),
|
||||||
|
release_name: NonBlankString::from_str("grafana").unwrap(),
|
||||||
|
chart_name: NonBlankString::from_str("oci://ghcr.io/grafana/helm-charts/grafana").unwrap(),
|
||||||
|
chart_version: None,
|
||||||
|
values_overrides: None,
|
||||||
|
values_yaml: Some(values),
|
||||||
|
create_namespace: true,
|
||||||
|
install_only: false,
|
||||||
|
repository: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
1
harmony/src/modules/monitoring/grafana/helm/mod.rs
Normal file
1
harmony/src/modules/monitoring/grafana/helm/mod.rs
Normal file
@@ -0,0 +1 @@
|
|||||||
|
pub mod helm_grafana;
|
||||||
1
harmony/src/modules/monitoring/grafana/mod.rs
Normal file
1
harmony/src/modules/monitoring/grafana/mod.rs
Normal file
@@ -0,0 +1 @@
|
|||||||
|
pub mod helm;
|
||||||
@@ -68,6 +68,9 @@ pub fn kube_prometheus_helm_chart_score(
|
|||||||
|
|
||||||
let mut values = format!(
|
let mut values = format!(
|
||||||
r#"
|
r#"
|
||||||
|
global:
|
||||||
|
rbac:
|
||||||
|
create: false
|
||||||
prometheus:
|
prometheus:
|
||||||
enabled: {prometheus}
|
enabled: {prometheus}
|
||||||
prometheusSpec:
|
prometheusSpec:
|
||||||
@@ -242,7 +245,7 @@ prometheus-node-exporter:
|
|||||||
cpu: 200m
|
cpu: 200m
|
||||||
memory: 250Mi
|
memory: 250Mi
|
||||||
prometheusOperator:
|
prometheusOperator:
|
||||||
enabled: {prometheus_operator}
|
enabled: false
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 100m
|
cpu: 100m
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use std::sync::{Arc, Mutex};
|
|||||||
|
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
|
||||||
use super::{helm::config::KubePrometheusConfig, prometheus::Prometheus};
|
use super::{helm::config::KubePrometheusConfig, prometheus::KubePrometheus};
|
||||||
use crate::{
|
use crate::{
|
||||||
modules::monitoring::kube_prometheus::types::ServiceMonitor,
|
modules::monitoring::kube_prometheus::types::ServiceMonitor,
|
||||||
score::Score,
|
score::Score,
|
||||||
@@ -15,8 +15,8 @@ use crate::{
|
|||||||
|
|
||||||
#[derive(Clone, Debug, Serialize)]
|
#[derive(Clone, Debug, Serialize)]
|
||||||
pub struct HelmPrometheusAlertingScore {
|
pub struct HelmPrometheusAlertingScore {
|
||||||
pub receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>,
|
pub receivers: Vec<Box<dyn AlertReceiver<KubePrometheus>>>,
|
||||||
pub rules: Vec<Box<dyn AlertRule<Prometheus>>>,
|
pub rules: Vec<Box<dyn AlertRule<KubePrometheus>>>,
|
||||||
pub service_monitors: Vec<ServiceMonitor>,
|
pub service_monitors: Vec<ServiceMonitor>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -28,7 +28,7 @@ impl<T: Topology + HelmCommand + TenantManager> Score<T> for HelmPrometheusAlert
|
|||||||
.expect("couldn't lock config")
|
.expect("couldn't lock config")
|
||||||
.additional_service_monitors = self.service_monitors.clone();
|
.additional_service_monitors = self.service_monitors.clone();
|
||||||
Box::new(AlertingInterpret {
|
Box::new(AlertingInterpret {
|
||||||
sender: Prometheus::new(),
|
sender: KubePrometheus::new(),
|
||||||
receivers: self.receivers.clone(),
|
receivers: self.receivers.clone(),
|
||||||
rules: self.rules.clone(),
|
rules: self.rules.clone(),
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ use crate::{
|
|||||||
modules::monitoring::alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
|
modules::monitoring::alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
|
||||||
score,
|
score,
|
||||||
topology::{
|
topology::{
|
||||||
HelmCommand, K8sAnywhereTopology, Topology,
|
HelmCommand, Topology,
|
||||||
installable::Installable,
|
installable::Installable,
|
||||||
oberservability::monitoring::{AlertReceiver, AlertRule, AlertSender},
|
oberservability::monitoring::{AlertReceiver, AlertRule, AlertSender},
|
||||||
tenant::TenantManager,
|
tenant::TenantManager,
|
||||||
@@ -27,14 +27,14 @@ use super::{
|
|||||||
};
|
};
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl AlertSender for Prometheus {
|
impl AlertSender for KubePrometheus {
|
||||||
fn name(&self) -> String {
|
fn name(&self) -> String {
|
||||||
"HelmKubePrometheus".to_string()
|
"HelmKubePrometheus".to_string()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl<T: Topology + HelmCommand + TenantManager> Installable<T> for Prometheus {
|
impl<T: Topology + HelmCommand + TenantManager> Installable<T> for KubePrometheus {
|
||||||
async fn configure(&self, _inventory: &Inventory, topology: &T) -> Result<(), InterpretError> {
|
async fn configure(&self, _inventory: &Inventory, topology: &T) -> Result<(), InterpretError> {
|
||||||
self.configure_with_topology(topology).await;
|
self.configure_with_topology(topology).await;
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -51,11 +51,11 @@ impl<T: Topology + HelmCommand + TenantManager> Installable<T> for Prometheus {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Prometheus {
|
pub struct KubePrometheus {
|
||||||
pub config: Arc<Mutex<KubePrometheusConfig>>,
|
pub config: Arc<Mutex<KubePrometheusConfig>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Prometheus {
|
impl KubePrometheus {
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
config: Arc::new(Mutex::new(KubePrometheusConfig::new())),
|
config: Arc::new(Mutex::new(KubePrometheusConfig::new())),
|
||||||
@@ -75,7 +75,7 @@ impl Prometheus {
|
|||||||
|
|
||||||
pub async fn install_receiver(
|
pub async fn install_receiver(
|
||||||
&self,
|
&self,
|
||||||
prometheus_receiver: &dyn PrometheusReceiver,
|
prometheus_receiver: &dyn KubePrometheusReceiver,
|
||||||
) -> Result<Outcome, InterpretError> {
|
) -> Result<Outcome, InterpretError> {
|
||||||
let prom_receiver = prometheus_receiver.configure_receiver().await;
|
let prom_receiver = prometheus_receiver.configure_receiver().await;
|
||||||
debug!(
|
debug!(
|
||||||
@@ -120,12 +120,12 @@ impl Prometheus {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
pub trait PrometheusReceiver: Send + Sync + std::fmt::Debug {
|
pub trait KubePrometheusReceiver: Send + Sync + std::fmt::Debug {
|
||||||
fn name(&self) -> String;
|
fn name(&self) -> String;
|
||||||
async fn configure_receiver(&self) -> AlertManagerChannelConfig;
|
async fn configure_receiver(&self) -> AlertManagerChannelConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Serialize for Box<dyn AlertReceiver<Prometheus>> {
|
impl Serialize for Box<dyn AlertReceiver<KubePrometheus>> {
|
||||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||||
where
|
where
|
||||||
S: serde::Serializer,
|
S: serde::Serializer,
|
||||||
@@ -134,19 +134,19 @@ impl Serialize for Box<dyn AlertReceiver<Prometheus>> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Clone for Box<dyn AlertReceiver<Prometheus>> {
|
impl Clone for Box<dyn AlertReceiver<KubePrometheus>> {
|
||||||
fn clone(&self) -> Self {
|
fn clone(&self) -> Self {
|
||||||
self.clone_box()
|
self.clone_box()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
pub trait PrometheusRule: Send + Sync + std::fmt::Debug {
|
pub trait KubePrometheusRule: Send + Sync + std::fmt::Debug {
|
||||||
fn name(&self) -> String;
|
fn name(&self) -> String;
|
||||||
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules;
|
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Serialize for Box<dyn AlertRule<Prometheus>> {
|
impl Serialize for Box<dyn AlertRule<KubePrometheus>> {
|
||||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||||
where
|
where
|
||||||
S: serde::Serializer,
|
S: serde::Serializer,
|
||||||
@@ -155,7 +155,7 @@ impl Serialize for Box<dyn AlertRule<Prometheus>> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Clone for Box<dyn AlertRule<Prometheus>> {
|
impl Clone for Box<dyn AlertRule<KubePrometheus>> {
|
||||||
fn clone(&self) -> Self {
|
fn clone(&self) -> Self {
|
||||||
self.clone_box()
|
self.clone_box()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -211,8 +211,10 @@ pub struct Selector {
|
|||||||
pub struct ServiceMonitor {
|
pub struct ServiceMonitor {
|
||||||
pub name: String,
|
pub name: String,
|
||||||
|
|
||||||
|
pub namespace: String,
|
||||||
|
|
||||||
// # Additional labels to set used for the ServiceMonitorSelector. Together with standard labels from the chart
|
// # Additional labels to set used for the ServiceMonitorSelector. Together with standard labels from the chart
|
||||||
pub additional_labels: Option<Mapping>,
|
pub additional_labels: Option<HashMap<String, String>>,
|
||||||
|
|
||||||
// # Service label for use in assembling a job name of the form <label value>-<port>
|
// # Service label for use in assembling a job name of the form <label value>-<port>
|
||||||
// # If no label is specified, the service name is used.
|
// # If no label is specified, the service name is used.
|
||||||
@@ -240,7 +242,7 @@ pub struct ServiceMonitor {
|
|||||||
// any: bool,
|
// any: bool,
|
||||||
// # Explicit list of namespace names to select
|
// # Explicit list of namespace names to select
|
||||||
// matchNames: Vec,
|
// matchNames: Vec,
|
||||||
pub namespace_selector: Option<Mapping>,
|
pub namespace_selector: Option<NamespaceSelector>,
|
||||||
|
|
||||||
// # Endpoints of the selected service to be monitored
|
// # Endpoints of the selected service to be monitored
|
||||||
pub endpoints: Vec<ServiceMonitorEndpoint>,
|
pub endpoints: Vec<ServiceMonitorEndpoint>,
|
||||||
@@ -250,10 +252,18 @@ pub struct ServiceMonitor {
|
|||||||
pub fallback_scrape_protocol: Option<String>,
|
pub fallback_scrape_protocol: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Clone)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct NamespaceSelector {
|
||||||
|
pub any: bool,
|
||||||
|
pub match_names: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
impl Default for ServiceMonitor {
|
impl Default for ServiceMonitor {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self {
|
Self {
|
||||||
name: Default::default(),
|
name: Default::default(),
|
||||||
|
namespace: Default::default(),
|
||||||
additional_labels: Default::default(),
|
additional_labels: Default::default(),
|
||||||
job_label: Default::default(),
|
job_label: Default::default(),
|
||||||
target_labels: Default::default(),
|
target_labels: Default::default(),
|
||||||
|
|||||||
@@ -1,4 +1,7 @@
|
|||||||
pub mod alert_channel;
|
pub mod alert_channel;
|
||||||
pub mod alert_rule;
|
pub mod alert_rule;
|
||||||
|
pub mod application_monitoring;
|
||||||
|
pub mod grafana;
|
||||||
pub mod kube_prometheus;
|
pub mod kube_prometheus;
|
||||||
pub mod ntfy;
|
pub mod ntfy;
|
||||||
|
pub mod prometheus;
|
||||||
|
|||||||
2
harmony/src/modules/monitoring/prometheus/helm/mod.rs
Normal file
2
harmony/src/modules/monitoring/prometheus/helm/mod.rs
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
pub mod prometheus_helm;
|
||||||
|
pub mod types;
|
||||||
@@ -0,0 +1,155 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::str::FromStr;
|
||||||
|
use std::sync::{Arc, Mutex};
|
||||||
|
|
||||||
|
use log::debug;
|
||||||
|
use non_blank_string_rs::NonBlankString;
|
||||||
|
use serde_yaml::{Mapping, Value};
|
||||||
|
|
||||||
|
use crate::modules::helm::chart::HelmChartScore;
|
||||||
|
use crate::modules::monitoring::kube_prometheus::types::{
|
||||||
|
AlertGroup, AlertManager, AlertManagerConfig, AlertManagerRoute, AlertManagerSpec,
|
||||||
|
ConfigReloader, Limits, Requests, Resources,
|
||||||
|
};
|
||||||
|
use crate::modules::monitoring::prometheus::helm::types::{
|
||||||
|
AlertFile, EnabledConfig, KsmRbacConfig, KubeStateMetricsConfig, LabelSelector, Monitor,
|
||||||
|
Prometheus, PrometheusHelmValues, RbacConfig, ServerConfig, ServerRbacConfig,
|
||||||
|
};
|
||||||
|
use crate::modules::monitoring::prometheus::prometheus_config::HelmPrometheusConfig;
|
||||||
|
|
||||||
|
pub fn prometheus_helm_chart_score(config: Arc<Mutex<HelmPrometheusConfig>>) -> HelmChartScore {
|
||||||
|
let config = config.lock().unwrap();
|
||||||
|
let ns = config.namespace.clone().unwrap();
|
||||||
|
|
||||||
|
let rbac_config = RbacConfig { create: false };
|
||||||
|
|
||||||
|
let ksm_config = KubeStateMetricsConfig {
|
||||||
|
enabled: true,
|
||||||
|
rbac: KsmRbacConfig {
|
||||||
|
use_cluster_role: false,
|
||||||
|
},
|
||||||
|
prometheus: Prometheus {
|
||||||
|
monitor: Monitor { enabled: true },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut selector_labels = BTreeMap::new();
|
||||||
|
selector_labels.insert("kubernetes.io/metadata.name".to_string(), ns.clone());
|
||||||
|
let mut kube_state_metrics_labels = BTreeMap::new();
|
||||||
|
kube_state_metrics_labels.insert(
|
||||||
|
"app.kubernetes.io/name".to_string(),
|
||||||
|
"kube-state-metrics".to_string(),
|
||||||
|
);
|
||||||
|
let selector = LabelSelector {
|
||||||
|
match_labels: selector_labels,
|
||||||
|
};
|
||||||
|
|
||||||
|
let server_config = ServerConfig {
|
||||||
|
namespaces: vec![ns.clone()],
|
||||||
|
use_existing_cluster_role_name: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut null_receiver = Mapping::new();
|
||||||
|
null_receiver.insert(
|
||||||
|
Value::String("receiver".to_string()),
|
||||||
|
Value::String("default-receiver".to_string()),
|
||||||
|
);
|
||||||
|
null_receiver.insert(
|
||||||
|
Value::String("matchers".to_string()),
|
||||||
|
Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]),
|
||||||
|
);
|
||||||
|
null_receiver.insert(Value::String("continue".to_string()), Value::Bool(true));
|
||||||
|
|
||||||
|
let mut alert_manager_channel_config = AlertManagerConfig {
|
||||||
|
global: Mapping::new(),
|
||||||
|
route: AlertManagerRoute {
|
||||||
|
routes: vec![Value::Mapping(null_receiver)],
|
||||||
|
},
|
||||||
|
receivers: vec![serde_yaml::from_str("name: 'default-receiver'").unwrap()],
|
||||||
|
};
|
||||||
|
for receiver in config.alert_receiver_configs.iter() {
|
||||||
|
if let Some(global) = receiver.channel_global_config.clone() {
|
||||||
|
alert_manager_channel_config
|
||||||
|
.global
|
||||||
|
.insert(global.0, global.1);
|
||||||
|
}
|
||||||
|
alert_manager_channel_config
|
||||||
|
.route
|
||||||
|
.routes
|
||||||
|
.push(receiver.channel_route.clone());
|
||||||
|
alert_manager_channel_config
|
||||||
|
.receivers
|
||||||
|
.push(receiver.channel_receiver.clone());
|
||||||
|
}
|
||||||
|
let alert_manager_values = AlertManager {
|
||||||
|
enabled: config.alert_manager,
|
||||||
|
config: alert_manager_channel_config,
|
||||||
|
alertmanager_spec: AlertManagerSpec {
|
||||||
|
resources: Resources {
|
||||||
|
limits: Limits {
|
||||||
|
memory: "100Mi".to_string(),
|
||||||
|
cpu: "100m".to_string(),
|
||||||
|
},
|
||||||
|
requests: Requests {
|
||||||
|
memory: "100Mi".to_string(),
|
||||||
|
cpu: "100m".to_string(),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
init_config_reloader: ConfigReloader {
|
||||||
|
resources: Resources {
|
||||||
|
limits: Limits {
|
||||||
|
memory: "100Mi".to_string(),
|
||||||
|
cpu: "100m".to_string(),
|
||||||
|
},
|
||||||
|
requests: Requests {
|
||||||
|
memory: "100Mi".to_string(),
|
||||||
|
cpu: "100m".to_string(),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut result: BTreeMap<String, AlertFile> = BTreeMap::new();
|
||||||
|
for rule in config.alert_rules.clone().iter() {
|
||||||
|
for (name, group) in &rule.rules {
|
||||||
|
result
|
||||||
|
.entry("alerting_rules.yml".to_string())
|
||||||
|
.and_modify(|e| e.groups.extend(group.groups.clone()))
|
||||||
|
.or_insert(AlertFile {
|
||||||
|
groups: group.groups.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let final_values = PrometheusHelmValues {
|
||||||
|
rbac: rbac_config,
|
||||||
|
kube_state_metrics: ksm_config,
|
||||||
|
server: server_config,
|
||||||
|
alertmanager: alert_manager_values,
|
||||||
|
server_files: result,
|
||||||
|
additional_service_monitors: config.additional_service_monitors.clone(),
|
||||||
|
prometheus_node_exporter: EnabledConfig { enabled: false },
|
||||||
|
prometheus_pushgateway: EnabledConfig { enabled: false },
|
||||||
|
};
|
||||||
|
|
||||||
|
let values_yaml =
|
||||||
|
serde_yaml::to_string(&final_values).expect("Failed to serialize final Helm values");
|
||||||
|
|
||||||
|
debug!("full values.yaml: \n{}", values_yaml);
|
||||||
|
|
||||||
|
HelmChartScore {
|
||||||
|
namespace: Some(NonBlankString::from_str(&ns).unwrap()),
|
||||||
|
release_name: NonBlankString::from_str("prometheus").unwrap(),
|
||||||
|
chart_name: NonBlankString::from_str(
|
||||||
|
"oci://ghcr.io/prometheus-community/charts/prometheus",
|
||||||
|
)
|
||||||
|
.unwrap(),
|
||||||
|
chart_version: None,
|
||||||
|
values_overrides: None,
|
||||||
|
values_yaml: Some(values_yaml),
|
||||||
|
create_namespace: true,
|
||||||
|
install_only: true,
|
||||||
|
repository: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
94
harmony/src/modules/monitoring/prometheus/helm/types.rs
Normal file
94
harmony/src/modules/monitoring/prometheus/helm/types.rs
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
use crate::modules::monitoring::{alert_rule::prometheus_alert_rule::AlertManagerRuleGroup, kube_prometheus::types::{
|
||||||
|
AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerValues, ServiceMonitor
|
||||||
|
}};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize)]
|
||||||
|
pub struct RuleFilesConfig {
|
||||||
|
#[serde(rename = "ruleFiles")]
|
||||||
|
pub files: BTreeMap<String, AlertGroup>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct PrometheusHelmValues {
|
||||||
|
pub rbac: RbacConfig,
|
||||||
|
#[serde(rename = "kube-state-metrics")]
|
||||||
|
pub kube_state_metrics: KubeStateMetricsConfig,
|
||||||
|
pub server: ServerConfig,
|
||||||
|
pub alertmanager: AlertManager, // You already have this
|
||||||
|
#[serde(rename = "serverFiles")]
|
||||||
|
pub server_files: BTreeMap<String, AlertFile>, // You already have this
|
||||||
|
pub additional_service_monitors: Vec<ServiceMonitor>, // You already have this
|
||||||
|
#[serde(rename = "prometheus-node-exporter")]
|
||||||
|
pub prometheus_node_exporter: EnabledConfig,
|
||||||
|
#[serde(rename = "prometheus-pushgateway")]
|
||||||
|
pub prometheus_pushgateway: EnabledConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug, Clone)]
|
||||||
|
pub struct AlertFile {
|
||||||
|
pub groups: Vec<AlertManagerRuleGroup>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct RbacConfig {
|
||||||
|
pub create: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct KubeStateMetricsConfig {
|
||||||
|
pub enabled: bool,
|
||||||
|
pub rbac: KsmRbacConfig,
|
||||||
|
pub prometheus: Prometheus,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct Prometheus {
|
||||||
|
pub monitor: Monitor
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct Monitor{
|
||||||
|
pub enabled: bool
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct KsmRbacConfig {
|
||||||
|
pub use_cluster_role: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct ServerConfig {
|
||||||
|
pub namespaces: Vec<String>,
|
||||||
|
pub use_existing_cluster_role_name: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct ServerRbacConfig {
|
||||||
|
pub create: bool,
|
||||||
|
pub use_cluster_role: bool,
|
||||||
|
pub namespaced: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug, Clone)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct LabelSelector {
|
||||||
|
#[serde(rename = "matchLabels")]
|
||||||
|
pub match_labels: BTreeMap<String, String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug)]
|
||||||
|
pub struct EnabledConfig {
|
||||||
|
pub enabled: bool,
|
||||||
|
}
|
||||||
3
harmony/src/modules/monitoring/prometheus/mod.rs
Normal file
3
harmony/src/modules/monitoring/prometheus/mod.rs
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
pub mod helm;
|
||||||
|
pub mod prometheus;
|
||||||
|
pub mod prometheus_config;
|
||||||
189
harmony/src/modules/monitoring/prometheus/prometheus.rs
Normal file
189
harmony/src/modules/monitoring/prometheus/prometheus.rs
Normal file
@@ -0,0 +1,189 @@
|
|||||||
|
use std::sync::{Arc, Mutex};
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use log::{debug, error};
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
interpret::{InterpretError, Outcome},
|
||||||
|
inventory::Inventory,
|
||||||
|
modules::monitoring::{
|
||||||
|
alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
|
||||||
|
grafana::helm::helm_grafana::grafana_helm_chart_score,
|
||||||
|
kube_prometheus::types::{AlertManagerAdditionalPromRules, AlertManagerChannelConfig},
|
||||||
|
},
|
||||||
|
score::Score,
|
||||||
|
topology::{
|
||||||
|
HelmCommand, K8sclient, Topology,
|
||||||
|
installable::Installable,
|
||||||
|
oberservability::monitoring::{AlertReceiver, AlertRule, AlertSender},
|
||||||
|
tenant::TenantManager,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::{
|
||||||
|
helm::prometheus_helm::prometheus_helm_chart_score, prometheus_config::HelmPrometheusConfig,
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Prometheus {
|
||||||
|
pub config: Arc<Mutex<HelmPrometheusConfig>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl AlertSender for Prometheus {
|
||||||
|
fn name(&self) -> String {
|
||||||
|
"Prometheus".to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Prometheus {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
config: Arc::new(Mutex::new(HelmPrometheusConfig::new())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub async fn configure_with_topology<T: TenantManager>(&self, topology: &T) {
|
||||||
|
if let Some(cfg) = topology.get_tenant_config().await {
|
||||||
|
debug!("Overriding namespace with tenant config: {}", cfg.name);
|
||||||
|
self.config.lock().unwrap().namespace = Some(cfg.name.clone());
|
||||||
|
} else {
|
||||||
|
debug!("No tenant config found; keeping existing namespace.");
|
||||||
|
}
|
||||||
|
error!("This must be refactored, see comments in pr #74");
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn install_receiver(
|
||||||
|
&self,
|
||||||
|
prometheus_receiver: &dyn PrometheusReceiver,
|
||||||
|
) -> Result<Outcome, InterpretError> {
|
||||||
|
let prom_receiver = prometheus_receiver.configure_receiver().await;
|
||||||
|
debug!(
|
||||||
|
"adding alert receiver to prometheus config: {:#?}",
|
||||||
|
&prom_receiver
|
||||||
|
);
|
||||||
|
let mut config = self.config.lock().unwrap();
|
||||||
|
|
||||||
|
config.alert_receiver_configs.push(prom_receiver);
|
||||||
|
let prom_receiver_name = prometheus_receiver.name();
|
||||||
|
debug!("installed alert receiver {}", &prom_receiver_name);
|
||||||
|
Ok(Outcome::success(format!(
|
||||||
|
"Sucessfully installed receiver {}",
|
||||||
|
prom_receiver_name
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn install_rule(
|
||||||
|
&self,
|
||||||
|
prometheus_rule: &AlertManagerRuleGroup,
|
||||||
|
) -> Result<Outcome, InterpretError> {
|
||||||
|
let prometheus_rule = prometheus_rule.configure_rule().await;
|
||||||
|
let mut config = self.config.lock().unwrap();
|
||||||
|
|
||||||
|
config.alert_rules.push(prometheus_rule.clone());
|
||||||
|
Ok(Outcome::success(format!(
|
||||||
|
"Successfully installed alert rule: {:#?},",
|
||||||
|
prometheus_rule
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn install_prometheus<T: Topology + HelmCommand + Send + Sync>(
|
||||||
|
&self,
|
||||||
|
inventory: &Inventory,
|
||||||
|
topology: &T,
|
||||||
|
) -> Result<Outcome, InterpretError> {
|
||||||
|
prometheus_helm_chart_score(self.config.clone())
|
||||||
|
.create_interpret()
|
||||||
|
.execute(inventory, topology)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
pub async fn install_grafana<T: Topology + HelmCommand + Send + Sync>(
|
||||||
|
&self,
|
||||||
|
inventory: &Inventory,
|
||||||
|
topology: &T,
|
||||||
|
) -> Result<Outcome, InterpretError> {
|
||||||
|
let namespace = {
|
||||||
|
let config = self.config.lock().unwrap();
|
||||||
|
config.namespace.clone()
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(ns) = namespace.as_deref() {
|
||||||
|
grafana_helm_chart_score(ns)
|
||||||
|
.create_interpret()
|
||||||
|
.execute(inventory, topology)
|
||||||
|
.await
|
||||||
|
} else {
|
||||||
|
Err(InterpretError::new(format!(
|
||||||
|
"could not install grafana, missing namespace",
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[async_trait]
|
||||||
|
impl<T: Topology + HelmCommand + TenantManager> Installable<T> for Prometheus {
|
||||||
|
async fn configure(&self, _inventory: &Inventory, topology: &T) -> Result<(), InterpretError> {
|
||||||
|
self.configure_with_topology(topology).await;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn ensure_installed(
|
||||||
|
&self,
|
||||||
|
inventory: &Inventory,
|
||||||
|
topology: &T,
|
||||||
|
) -> Result<(), InterpretError> {
|
||||||
|
self.install_prometheus(inventory, topology).await?;
|
||||||
|
|
||||||
|
let install_grafana = {
|
||||||
|
let config = self.config.lock().unwrap();
|
||||||
|
config.grafana
|
||||||
|
};
|
||||||
|
|
||||||
|
if install_grafana {
|
||||||
|
self.install_grafana(inventory, topology).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
pub trait PrometheusReceiver: Send + Sync + std::fmt::Debug {
|
||||||
|
fn name(&self) -> String;
|
||||||
|
async fn configure_receiver(&self) -> AlertManagerChannelConfig;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Serialize for Box<dyn AlertReceiver<Prometheus>> {
|
||||||
|
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||||
|
where
|
||||||
|
S: serde::Serializer,
|
||||||
|
{
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Clone for Box<dyn AlertReceiver<Prometheus>> {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
self.clone_box()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
pub trait PrometheusRule: Send + Sync + std::fmt::Debug {
|
||||||
|
fn name(&self) -> String;
|
||||||
|
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Serialize for Box<dyn AlertRule<Prometheus>> {
|
||||||
|
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||||
|
where
|
||||||
|
S: serde::Serializer,
|
||||||
|
{
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Clone for Box<dyn AlertRule<Prometheus>> {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
self.clone_box()
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
use crate::modules::monitoring::kube_prometheus::types::{
|
||||||
|
AlertManagerAdditionalPromRules, AlertManagerChannelConfig, ServiceMonitor,
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct HelmPrometheusConfig {
|
||||||
|
pub namespace: Option<String>,
|
||||||
|
pub alert_manager: bool,
|
||||||
|
pub node_exporter: bool,
|
||||||
|
pub kube_state_metrics: bool,
|
||||||
|
pub grafana: bool,
|
||||||
|
pub prometheus_pushgateway: bool,
|
||||||
|
pub alert_receiver_configs: Vec<AlertManagerChannelConfig>,
|
||||||
|
pub alert_rules: Vec<AlertManagerAdditionalPromRules>,
|
||||||
|
pub additional_service_monitors: Vec<ServiceMonitor>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl HelmPrometheusConfig {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
namespace: None,
|
||||||
|
alert_manager: true,
|
||||||
|
node_exporter: false,
|
||||||
|
kube_state_metrics: false,
|
||||||
|
grafana: true,
|
||||||
|
prometheus_pushgateway: false,
|
||||||
|
alert_receiver_configs: vec![],
|
||||||
|
alert_rules: vec![],
|
||||||
|
additional_service_monitors: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1 +1,2 @@
|
|||||||
pub mod pvc;
|
pub mod pvc;
|
||||||
|
pub mod pod;
|
||||||
|
|||||||
38
harmony/src/modules/prometheus/alerts/k8s/pod.rs
Normal file
38
harmony/src/modules/prometheus/alerts/k8s/pod.rs
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
|
||||||
|
|
||||||
|
pub fn pod_in_failed_state() -> PrometheusAlertRule {
|
||||||
|
PrometheusAlertRule::new(
|
||||||
|
"PodInFailedState",
|
||||||
|
// This expression checks for any pod where the status phase is 'Failed' and the value is 1 (true).
|
||||||
|
"kube_pod_status_phase{phase=\"Failed\"} == 1",
|
||||||
|
)
|
||||||
|
.for_duration("1m") // Fire if the pod is in this state for 1 minute.
|
||||||
|
.label("severity", "critical") // A failed pod is a critical issue.
|
||||||
|
.annotation(
|
||||||
|
"summary",
|
||||||
|
"Pod {{ $labels.pod }} in namespace {{ $labels.namespace }} has failed.",
|
||||||
|
)
|
||||||
|
.annotation(
|
||||||
|
"description",
|
||||||
|
"The pod {{ $labels.pod }} in namespace {{ $labels.namespace }} has entered the 'Failed' state. This is a terminal error and the pod will not be automatically restarted. Please check the pod logs to diagnose the issue.",
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn pod_restarting_frequently() -> PrometheusAlertRule {
|
||||||
|
PrometheusAlertRule::new(
|
||||||
|
"PodRestartingFrequently",
|
||||||
|
// This expression calculates the increase in the restart count over the last 30 minutes.
|
||||||
|
// Alert if a container has restarted more than 5 times.
|
||||||
|
"increase(kube_pod_container_status_restarts_total[30m]) > 5",
|
||||||
|
)
|
||||||
|
.for_duration("15m") // The condition must persist for 15 minutes to avoid alerts for minor flaps.
|
||||||
|
.label("severity", "critical") // A crash-looping pod is effectively down.
|
||||||
|
.annotation(
|
||||||
|
"summary",
|
||||||
|
"Container {{ $labels.container }} in pod {{ $labels.pod }} is restarting frequently.",
|
||||||
|
)
|
||||||
|
.annotation(
|
||||||
|
"description",
|
||||||
|
"The container '{{ $labels.container }}' in pod '{{ $labels.pod }}' (namespace '{{ $labels.namespace }}') has restarted more than 5 times in the last 30 minutes. The pod is likely in a CrashLoopBackOff state.",
|
||||||
|
)
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user