WIP: added implementation to deploy crd-alertmanagerconfigs Co-authored-by: Ian Letourneau <letourneau.ian@gmail.com> Reviewed-on: #86 Co-authored-by: Willem <wrolleman@nationtech.io> Co-committed-by: Willem <wrolleman@nationtech.io>
56 lines
1.9 KiB
Rust
56 lines
1.9 KiB
Rust
use std::collections::HashMap;
|
|
|
|
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
|
|
|
|
pub fn pod_failed() -> PrometheusAlertRule {
|
|
PrometheusAlertRule {
|
|
alert: "PodFailed".into(),
|
|
expr: "kube_pod_status_phase{phase=\"Failed\"} > 2".into(),
|
|
r#for: Some("2m".into()),
|
|
labels: HashMap::from([("severity".into(), "critical".into())]),
|
|
annotations: HashMap::from([
|
|
("summary".into(), "A pod has failed".into()),
|
|
(
|
|
"description".into(),
|
|
"One or more pods are in Failed phase.".into(),
|
|
),
|
|
]),
|
|
}
|
|
}
|
|
|
|
pub fn alert_container_restarting() -> PrometheusAlertRule {
|
|
PrometheusAlertRule {
|
|
alert: "ContainerRestarting".into(),
|
|
expr: "increase(kube_pod_container_status_restarts_total[5m]) > 3".into(),
|
|
r#for: Some("5m".into()),
|
|
labels: HashMap::from([("severity".into(), "warning".into())]),
|
|
annotations: HashMap::from([
|
|
(
|
|
"summary".into(),
|
|
"Container is restarting frequently".into(),
|
|
),
|
|
(
|
|
"description".into(),
|
|
"A container in this namespace has restarted more than 3 times in 5 minutes."
|
|
.into(),
|
|
),
|
|
]),
|
|
}
|
|
}
|
|
|
|
pub fn alert_pod_not_ready() -> PrometheusAlertRule {
|
|
PrometheusAlertRule {
|
|
alert: "PodNotReady".into(),
|
|
expr: "kube_pod_status_ready{condition=\"true\"} == 0".into(),
|
|
r#for: Some("2m".into()),
|
|
labels: HashMap::from([("severity".into(), "warning".into())]),
|
|
annotations: HashMap::from([
|
|
("summary".into(), "Pod is not ready".into()),
|
|
(
|
|
"description".into(),
|
|
"A pod in the namespace is not reporting Ready status.".into(),
|
|
),
|
|
]),
|
|
}
|
|
}
|