harmony/harmony/src/modules/prometheus/alerts/k8s/pod.rs
Willem 024084859e Monitor an application within a tenant (#86)
WIP: added implementation to deploy crd-alertmanagerconfigs
Co-authored-by: Ian Letourneau <letourneau.ian@gmail.com>
Reviewed-on: https://git.nationtech.io/NationTech/harmony/pulls/86
Co-authored-by: Willem <wrolleman@nationtech.io>
Co-committed-by: Willem <wrolleman@nationtech.io>
2025-08-04 21:42:01 +00:00

56 lines
1.9 KiB
Rust

use std::collections::HashMap;
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
pub fn pod_failed() -> PrometheusAlertRule {
PrometheusAlertRule {
alert: "PodFailed".into(),
expr: "kube_pod_status_phase{phase=\"Failed\"} > 2".into(),
r#for: Some("2m".into()),
labels: HashMap::from([("severity".into(), "critical".into())]),
annotations: HashMap::from([
("summary".into(), "A pod has failed".into()),
(
"description".into(),
"One or more pods are in Failed phase.".into(),
),
]),
}
}
pub fn alert_container_restarting() -> PrometheusAlertRule {
PrometheusAlertRule {
alert: "ContainerRestarting".into(),
expr: "increase(kube_pod_container_status_restarts_total[5m]) > 3".into(),
r#for: Some("5m".into()),
labels: HashMap::from([("severity".into(), "warning".into())]),
annotations: HashMap::from([
(
"summary".into(),
"Container is restarting frequently".into(),
),
(
"description".into(),
"A container in this namespace has restarted more than 3 times in 5 minutes."
.into(),
),
]),
}
}
pub fn alert_pod_not_ready() -> PrometheusAlertRule {
PrometheusAlertRule {
alert: "PodNotReady".into(),
expr: "kube_pod_status_ready{condition=\"true\"} == 0".into(),
r#for: Some("2m".into()),
labels: HashMap::from([("severity".into(), "warning".into())]),
annotations: HashMap::from([
("summary".into(), "Pod is not ready".into()),
(
"description".into(),
"A pod in the namespace is not reporting Ready status.".into(),
),
]),
}
}