Merge branch 'master' into feat/applicationModule
All checks were successful
Run Check Script / check (pull_request) Successful in 1m50s
All checks were successful
Run Check Script / check (pull_request) Successful in 1m50s
This commit is contained in:
commit
6684a35b33
@ -19,13 +19,6 @@ jobs:
|
||||
- name: Build for Windows x86_64 GNU
|
||||
run: cargo build --release --bin harmony_composer --target x86_64-pc-windows-gnu
|
||||
|
||||
- uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: binaries
|
||||
path: |
|
||||
target/x86_64-unknown-linux-gnu/release/harmony_composer
|
||||
target/x86_64-pc-windows-gnu/release/harmony_composer.exe
|
||||
|
||||
- name: Setup log into hub.nationtech.io
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
@ -35,6 +28,62 @@ jobs:
|
||||
|
||||
# TODO: build ARM images and MacOS binaries (or other targets) too
|
||||
|
||||
- name: Update snapshot-latest tag
|
||||
run: |
|
||||
git config user.name "Gitea CI"
|
||||
git config user.email "ci@nationtech.io"
|
||||
git tag -f snapshot-latest
|
||||
git push origin snapshot-latest --force
|
||||
|
||||
- name: Install jq
|
||||
run: apt install -y jq # The current image includes apt lists so we don't have to apt update and rm /var/lib/apt... every time. But if the image is optimized it won't work anymore
|
||||
|
||||
- name: Create or update release
|
||||
run: |
|
||||
# First, check if release exists and delete it if it does
|
||||
RELEASE_ID=$(curl -s -X GET \
|
||||
-H "Authorization: token ${{ secrets.GITEATOKEN }}" \
|
||||
"https://git.nationtech.io/api/v1/repos/nationtech/harmony/releases/tags/snapshot-latest" \
|
||||
| jq -r '.id // empty')
|
||||
|
||||
if [ -n "$RELEASE_ID" ]; then
|
||||
# Delete existing release
|
||||
curl -X DELETE \
|
||||
-H "Authorization: token ${{ secrets.GITEATOKEN }}" \
|
||||
"https://git.nationtech.io/api/v1/repos/nationtech/harmony/releases/$RELEASE_ID"
|
||||
fi
|
||||
|
||||
# Create new release
|
||||
RESPONSE=$(curl -X POST \
|
||||
-H "Authorization: token ${{ secrets.GITEATOKEN }}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"tag_name": "snapshot-latest",
|
||||
"name": "Latest Snapshot",
|
||||
"body": "Automated snapshot build from master branch",
|
||||
"draft": false,
|
||||
"prerelease": true
|
||||
}' \
|
||||
"https://git.nationtech.io/api/v1/repos/nationtech/harmony/releases")
|
||||
|
||||
echo "RELEASE_ID=$(echo $RESPONSE | jq -r '.id')" >> $GITHUB_ENV
|
||||
|
||||
- name: Upload Linux binary
|
||||
run: |
|
||||
curl -X POST \
|
||||
-H "Authorization: token ${{ secrets.GITEATOKEN }}" \
|
||||
-H "Content-Type: application/octet-stream" \
|
||||
--data-binary "@target/x86_64-unknown-linux-gnu/release/harmony_composer" \
|
||||
"https://git.nationtech.io/api/v1/repos/nationtech/harmony/releases/${{ env.RELEASE_ID }}/assets?name=harmony_composer"
|
||||
|
||||
- name: Upload Windows binary
|
||||
run: |
|
||||
curl -X POST \
|
||||
-H "Authorization: token ${{ secrets.GITEATOKEN }}" \
|
||||
-H "Content-Type: application/octet-stream" \
|
||||
--data-binary "@target/x86_64-pc-windows-gnu/release/harmony_composer.exe" \
|
||||
"https://git.nationtech.io/api/v1/repos/nationtech/harmony/releases/${{ env.RELEASE_ID }}/assets?name=harmony_composer.exe"
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
|
||||
8
Cargo.lock
generated
8
Cargo.lock
generated
@ -1161,6 +1161,7 @@ dependencies = [
|
||||
"harmony",
|
||||
"harmony_cli",
|
||||
"tokio",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -1576,6 +1577,7 @@ dependencies = [
|
||||
"serde-value",
|
||||
"serde_json",
|
||||
"serde_yaml",
|
||||
"similar",
|
||||
"temp-dir",
|
||||
"temp-file",
|
||||
"tokio",
|
||||
@ -4090,6 +4092,12 @@ dependencies = [
|
||||
"rand_core 0.6.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "similar"
|
||||
version = "2.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa"
|
||||
|
||||
[[package]]
|
||||
name = "simple_asn1"
|
||||
version = "0.6.3"
|
||||
|
||||
47
Cargo.toml
47
Cargo.toml
@ -20,34 +20,23 @@ readme = "README.md"
|
||||
license = "GNU AGPL v3"
|
||||
|
||||
[workspace.dependencies]
|
||||
log = "0.4.22"
|
||||
env_logger = "0.11.5"
|
||||
derive-new = "0.7.0"
|
||||
async-trait = "0.1.82"
|
||||
tokio = { version = "1.40.0", features = [
|
||||
"io-std",
|
||||
"fs",
|
||||
"macros",
|
||||
"rt-multi-thread",
|
||||
] }
|
||||
log = "0.4"
|
||||
env_logger = "0.11"
|
||||
derive-new = "0.7"
|
||||
async-trait = "0.1"
|
||||
tokio = { version = "1.40", features = ["io-std", "fs", "macros", "rt-multi-thread"] }
|
||||
cidr = { features = ["serde"], version = "0.2" }
|
||||
russh = "0.45.0"
|
||||
russh-keys = "0.45.0"
|
||||
rand = "0.8.5"
|
||||
url = "2.5.4"
|
||||
kube = "0.98.0"
|
||||
k8s-openapi = { version = "0.24.0", features = ["v1_30"] }
|
||||
serde_yaml = "0.9.34"
|
||||
serde-value = "0.7.0"
|
||||
http = "1.2.0"
|
||||
inquire = "0.7.5"
|
||||
convert_case = "0.8.0"
|
||||
russh = "0.45"
|
||||
russh-keys = "0.45"
|
||||
rand = "0.8"
|
||||
url = "2.5"
|
||||
kube = "0.98"
|
||||
k8s-openapi = { version = "0.24", features = ["v1_30"] }
|
||||
serde_yaml = "0.9"
|
||||
serde-value = "0.7"
|
||||
http = "1.2"
|
||||
inquire = "0.7"
|
||||
convert_case = "0.8"
|
||||
chrono = "0.4"
|
||||
|
||||
[workspace.dependencies.uuid]
|
||||
version = "1.11.0"
|
||||
features = [
|
||||
"v4", # Lets you generate random UUIDs
|
||||
"fast-rng", # Use a faster (but still sufficiently random) RNG
|
||||
"macro-diagnostics", # Enable better diagnostics for compile-time UUIDs
|
||||
]
|
||||
similar = "2"
|
||||
uuid = { version = "1.11", features = [ "v4", "fast-rng", "macro-diagnostics" ] }
|
||||
|
||||
@ -2,10 +2,7 @@ use harmony::{
|
||||
data::Version,
|
||||
inventory::Inventory,
|
||||
maestro::Maestro,
|
||||
modules::{
|
||||
lamp::{LAMPConfig, LAMPScore},
|
||||
monitoring::alert_channel::discord_alert_channel::DiscordWebhook,
|
||||
},
|
||||
modules::lamp::{LAMPConfig, LAMPScore},
|
||||
topology::{K8sAnywhereTopology, Url},
|
||||
};
|
||||
|
||||
@ -53,7 +50,7 @@ async fn main() {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// maestro.register_all(vec![Box::new(lamp_stack)]);
|
||||
maestro.register_all(vec![Box::new(lamp_stack)]);
|
||||
// Here we bootstrap the CLI, this gives some nice features if you need them
|
||||
harmony_cli::init(maestro, None).await.unwrap();
|
||||
}
|
||||
|
||||
@ -9,3 +9,4 @@ license.workspace = true
|
||||
harmony = { version = "0.1.0", path = "../../harmony" }
|
||||
harmony_cli = { version = "0.1.0", path = "../../harmony_cli" }
|
||||
tokio.workspace = true
|
||||
url.workspace = true
|
||||
|
||||
@ -1,12 +1,50 @@
|
||||
use harmony::{
|
||||
inventory::Inventory, maestro::Maestro,
|
||||
modules::monitoring::kube_prometheus::helm_prometheus_alert_score::HelmPrometheusAlertingScore,
|
||||
topology::K8sAnywhereTopology,
|
||||
inventory::Inventory,
|
||||
maestro::Maestro,
|
||||
modules::{
|
||||
monitoring::{
|
||||
alert_channel::discord_alert_channel::DiscordWebhook,
|
||||
alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
|
||||
kube_prometheus::helm_prometheus_alert_score::HelmPrometheusAlertingScore,
|
||||
},
|
||||
prometheus::alerts::{
|
||||
infra::dell_server::{
|
||||
alert_global_storage_status_critical, alert_global_storage_status_non_recoverable,
|
||||
global_storage_status_degraded_non_critical,
|
||||
},
|
||||
k8s::pvc::high_pvc_fill_rate_over_two_days,
|
||||
},
|
||||
},
|
||||
topology::{K8sAnywhereTopology, Url},
|
||||
};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let alerting_score = HelmPrometheusAlertingScore { receivers: vec![] };
|
||||
let discord_receiver = DiscordWebhook {
|
||||
name: "test-discord".to_string(),
|
||||
url: Url::Url(url::Url::parse("https://discord.doesnt.exist.com").unwrap()),
|
||||
};
|
||||
|
||||
let high_pvc_fill_rate_over_two_days_alert = high_pvc_fill_rate_over_two_days();
|
||||
let dell_system_storage_degraded = global_storage_status_degraded_non_critical();
|
||||
let alert_global_storage_status_critical = alert_global_storage_status_critical();
|
||||
let alert_global_storage_status_non_recoverable = alert_global_storage_status_non_recoverable();
|
||||
|
||||
let additional_rules =
|
||||
AlertManagerRuleGroup::new("pvc-alerts", vec![high_pvc_fill_rate_over_two_days_alert]);
|
||||
let additional_rules2 = AlertManagerRuleGroup::new(
|
||||
"dell-server-alerts",
|
||||
vec![
|
||||
dell_system_storage_degraded,
|
||||
alert_global_storage_status_critical,
|
||||
alert_global_storage_status_non_recoverable,
|
||||
],
|
||||
);
|
||||
|
||||
let alerting_score = HelmPrometheusAlertingScore {
|
||||
receivers: vec![Box::new(discord_receiver)],
|
||||
rules: vec![Box::new(additional_rules), Box::new(additional_rules2)],
|
||||
};
|
||||
let mut maestro = Maestro::<K8sAnywhereTopology>::initialize(
|
||||
Inventory::autoload(),
|
||||
K8sAnywhereTopology::from_env(),
|
||||
@ -14,12 +52,6 @@ async fn main() {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
//let monitoring = MonitoringAlertingScore {
|
||||
// alert_receivers: vec![],
|
||||
// alert_rules: vec![],
|
||||
// scrape_targets: vec![],
|
||||
//};
|
||||
//maestro.register_all(vec![Box::new(monitoring)]);
|
||||
maestro.register_all(vec![Box::new(alerting_score)]);
|
||||
harmony_cli::init(maestro, None).await.unwrap();
|
||||
}
|
||||
|
||||
@ -53,3 +53,4 @@ fqdn = { version = "0.4.6", features = [
|
||||
] }
|
||||
temp-dir = "0.1.14"
|
||||
dyn-clone = "1.0.19"
|
||||
similar.workspace = true
|
||||
|
||||
@ -10,4 +10,6 @@ lazy_static! {
|
||||
std::env::var("HARMONY_REGISTRY_URL").unwrap_or_else(|_| "hub.nationtech.io".to_string());
|
||||
pub static ref REGISTRY_PROJECT: String =
|
||||
std::env::var("HARMONY_REGISTRY_PROJECT").unwrap_or_else(|_| "harmony".to_string());
|
||||
pub static ref DRY_RUN: bool =
|
||||
std::env::var("HARMONY_DRY_RUN").map_or(true, |value| value.parse().unwrap_or(true));
|
||||
}
|
||||
|
||||
@ -1,8 +1,12 @@
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::interpret::InterpretError;
|
||||
use crate::{interpret::InterpretError, inventory::Inventory};
|
||||
|
||||
#[async_trait]
|
||||
pub trait Installable {
|
||||
async fn ensure_installed(&self) -> Result<(), InterpretError>;
|
||||
pub trait Installable<T>: Send + Sync {
|
||||
async fn ensure_installed(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<(), InterpretError>;
|
||||
}
|
||||
|
||||
@ -4,9 +4,11 @@ use kube::{
|
||||
Api, Client, Config, Error, Resource,
|
||||
api::{Patch, PatchParams},
|
||||
config::{KubeConfigOptions, Kubeconfig},
|
||||
core::ErrorResponse,
|
||||
};
|
||||
use log::{debug, error, trace};
|
||||
use serde::de::DeserializeOwned;
|
||||
use similar::TextDiff;
|
||||
|
||||
#[derive(new, Clone)]
|
||||
pub struct K8sClient {
|
||||
@ -59,8 +61,79 @@ impl K8sClient {
|
||||
.name
|
||||
.as_ref()
|
||||
.expect("K8s Resource should have a name");
|
||||
api.patch(name, &patch_params, &Patch::Apply(resource))
|
||||
.await
|
||||
|
||||
if *crate::config::DRY_RUN {
|
||||
match api.get(name).await {
|
||||
Ok(current) => {
|
||||
trace!("Received current value {current:#?}");
|
||||
// The resource exists, so we calculate and display a diff.
|
||||
println!("\nPerforming dry-run for resource: '{}'", name);
|
||||
let mut current_yaml = serde_yaml::to_value(¤t)
|
||||
.expect(&format!("Could not serialize current value : {current:#?}"));
|
||||
if current_yaml.is_mapping() && current_yaml.get("status").is_some() {
|
||||
let map = current_yaml.as_mapping_mut().unwrap();
|
||||
let removed = map.remove_entry("status");
|
||||
trace!("Removed status {:?}", removed);
|
||||
} else {
|
||||
trace!(
|
||||
"Did not find status entry for current object {}/{}",
|
||||
current.meta().namespace.as_ref().unwrap_or(&"".to_string()),
|
||||
current.meta().name.as_ref().unwrap_or(&"".to_string())
|
||||
);
|
||||
}
|
||||
let current_yaml = serde_yaml::to_string(¤t_yaml)
|
||||
.unwrap_or_else(|_| "Failed to serialize current resource".to_string());
|
||||
let new_yaml = serde_yaml::to_string(resource)
|
||||
.unwrap_or_else(|_| "Failed to serialize new resource".to_string());
|
||||
|
||||
if current_yaml == new_yaml {
|
||||
println!("No changes detected.");
|
||||
// Return the current resource state as there are no changes.
|
||||
return Ok(current);
|
||||
}
|
||||
|
||||
println!("Changes detected:");
|
||||
let diff = TextDiff::from_lines(¤t_yaml, &new_yaml);
|
||||
|
||||
// Iterate over the changes and print them in a git-like diff format.
|
||||
for change in diff.iter_all_changes() {
|
||||
let sign = match change.tag() {
|
||||
similar::ChangeTag::Delete => "-",
|
||||
similar::ChangeTag::Insert => "+",
|
||||
similar::ChangeTag::Equal => " ",
|
||||
};
|
||||
print!("{}{}", sign, change);
|
||||
}
|
||||
// In a dry run, we return the new resource state that would have been applied.
|
||||
Ok(resource.clone())
|
||||
}
|
||||
Err(Error::Api(ErrorResponse { code: 404, .. })) => {
|
||||
// The resource does not exist, so the "diff" is the entire new resource.
|
||||
println!("\nPerforming dry-run for new resource: '{}'", name);
|
||||
println!(
|
||||
"Resource does not exist. It would be created with the following content:"
|
||||
);
|
||||
let new_yaml = serde_yaml::to_string(resource)
|
||||
.unwrap_or_else(|_| "Failed to serialize new resource".to_string());
|
||||
|
||||
// Print each line of the new resource with a '+' prefix.
|
||||
for line in new_yaml.lines() {
|
||||
println!("+{}", line);
|
||||
}
|
||||
// In a dry run, we return the new resource state that would have been created.
|
||||
Ok(resource.clone())
|
||||
}
|
||||
Err(e) => {
|
||||
// Another API error occurred.
|
||||
error!("Failed to get resource '{}': {}", name, e);
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return api
|
||||
.patch(name, &patch_params, &Patch::Apply(resource))
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn apply_many<K>(&self, resource: &Vec<K>, ns: Option<&str>) -> Result<Vec<K>, Error>
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
use async_trait::async_trait;
|
||||
use log::debug;
|
||||
|
||||
use crate::{
|
||||
data::{Id, Version},
|
||||
@ -7,16 +8,20 @@ use crate::{
|
||||
topology::{Topology, installable::Installable},
|
||||
};
|
||||
|
||||
pub trait AlertSender: Send + Sync + std::fmt::Debug + Installable {}
|
||||
#[async_trait]
|
||||
pub trait AlertSender: Send + Sync + std::fmt::Debug {
|
||||
fn name(&self) -> String;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct AlertingInterpret<S: AlertSender> {
|
||||
pub sender: S,
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<S>>>,
|
||||
pub rules: Vec<Box<dyn AlertRule<S>>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<S: AlertSender, T: Topology> Interpret<T> for AlertingInterpret<S> {
|
||||
impl<S: AlertSender + Installable<T>, T: Topology> Interpret<T> for AlertingInterpret<S> {
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
@ -25,7 +30,15 @@ impl<S: AlertSender, T: Topology> Interpret<T> for AlertingInterpret<S> {
|
||||
for receiver in self.receivers.iter() {
|
||||
receiver.install(&self.sender).await?;
|
||||
}
|
||||
todo!()
|
||||
for rule in self.rules.iter() {
|
||||
debug!("installing rule: {:#?}", rule);
|
||||
rule.install(&self.sender).await?;
|
||||
}
|
||||
self.sender.ensure_installed(inventory, topology).await?;
|
||||
Ok(Outcome::success(format!(
|
||||
"successfully installed alert sender {}",
|
||||
self.sender.name()
|
||||
)))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
@ -47,12 +60,14 @@ impl<S: AlertSender, T: Topology> Interpret<T> for AlertingInterpret<S> {
|
||||
|
||||
#[async_trait]
|
||||
pub trait AlertReceiver<S: AlertSender>: std::fmt::Debug + Send + Sync {
|
||||
async fn install(&self, sender: &S) -> Result<(), InterpretError>;
|
||||
async fn install(&self, sender: &S) -> Result<Outcome, InterpretError>;
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<S>>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait AlertRule<S: AlertSender> {
|
||||
async fn install(&self, sender: &S) -> Result<(), InterpretError>;
|
||||
pub trait AlertRule<S: AlertSender>: std::fmt::Debug + Send + Sync {
|
||||
async fn install(&self, sender: &S) -> Result<Outcome, InterpretError>;
|
||||
fn clone_box(&self) -> Box<dyn AlertRule<S>>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
|
||||
@ -138,6 +138,7 @@ impl K8sTenantManager {
|
||||
"kind": "NetworkPolicy",
|
||||
"metadata": {
|
||||
"name": format!("{}-network-policy", config.name),
|
||||
"namespace": self.get_namespace_name(config),
|
||||
},
|
||||
"spec": {
|
||||
"podSelector": {},
|
||||
@ -219,8 +220,29 @@ impl K8sTenantManager {
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
let ports: Option<Vec<NetworkPolicyPort>> =
|
||||
c.1.as_ref().map(|spec| match &spec.data {
|
||||
super::PortSpecData::SinglePort(port) => vec![NetworkPolicyPort {
|
||||
port: Some(IntOrString::Int(port.clone().into())),
|
||||
..Default::default()
|
||||
}],
|
||||
super::PortSpecData::PortRange(start, end) => vec![NetworkPolicyPort {
|
||||
port: Some(IntOrString::Int(start.clone().into())),
|
||||
end_port: Some(end.clone().into()),
|
||||
protocol: None, // Not currently supported by Harmony
|
||||
}],
|
||||
|
||||
super::PortSpecData::ListOfPorts(items) => items
|
||||
.iter()
|
||||
.map(|i| NetworkPolicyPort {
|
||||
port: Some(IntOrString::Int(i.clone().into())),
|
||||
..Default::default()
|
||||
})
|
||||
.collect(),
|
||||
});
|
||||
let rule = serde_json::from_value::<NetworkPolicyIngressRule>(json!({
|
||||
"from": cidr_list
|
||||
"from": cidr_list,
|
||||
"ports": ports,
|
||||
}))
|
||||
.map_err(|e| {
|
||||
ExecutorError::ConfigurationError(format!(
|
||||
|
||||
@ -13,5 +13,6 @@ pub mod load_balancer;
|
||||
pub mod monitoring;
|
||||
pub mod okd;
|
||||
pub mod opnsense;
|
||||
pub mod prometheus;
|
||||
pub mod tenant;
|
||||
pub mod tftp;
|
||||
|
||||
@ -1,12 +1,17 @@
|
||||
use async_trait::async_trait;
|
||||
use serde::Serialize;
|
||||
use serde_yaml::{Mapping, Value};
|
||||
|
||||
use crate::{
|
||||
interpret::InterpretError,
|
||||
modules::monitoring::kube_prometheus::prometheus::{Prometheus, PrometheusReceiver},
|
||||
interpret::{InterpretError, Outcome},
|
||||
modules::monitoring::kube_prometheus::{
|
||||
prometheus::{Prometheus, PrometheusReceiver},
|
||||
types::{AlertChannelConfig, AlertManagerChannelConfig},
|
||||
},
|
||||
topology::{Url, oberservability::monitoring::AlertReceiver},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct DiscordWebhook {
|
||||
pub name: String,
|
||||
pub url: Url,
|
||||
@ -14,7 +19,107 @@ pub struct DiscordWebhook {
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<Prometheus> for DiscordWebhook {
|
||||
async fn install(&self, sender: &Prometheus) -> Result<(), InterpretError> {
|
||||
sender.install_receiver(PrometheusReceiver {}).await
|
||||
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_receiver(self).await
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<Prometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PrometheusReceiver for DiscordWebhook {
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
|
||||
self.get_config().await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertChannelConfig for DiscordWebhook {
|
||||
async fn get_config(&self) -> AlertManagerChannelConfig {
|
||||
let channel_global_config = None;
|
||||
let channel_receiver = self.alert_channel_receiver().await;
|
||||
let channel_route = self.alert_channel_route().await;
|
||||
|
||||
AlertManagerChannelConfig {
|
||||
channel_global_config,
|
||||
channel_receiver,
|
||||
channel_route,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DiscordWebhook {
|
||||
async fn alert_channel_route(&self) -> serde_yaml::Value {
|
||||
let mut route = Mapping::new();
|
||||
route.insert(
|
||||
Value::String("receiver".to_string()),
|
||||
Value::String(self.name.clone()),
|
||||
);
|
||||
route.insert(
|
||||
Value::String("matchers".to_string()),
|
||||
Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]),
|
||||
);
|
||||
route.insert(Value::String("continue".to_string()), Value::Bool(true));
|
||||
Value::Mapping(route)
|
||||
}
|
||||
|
||||
async fn alert_channel_receiver(&self) -> serde_yaml::Value {
|
||||
let mut receiver = Mapping::new();
|
||||
receiver.insert(
|
||||
Value::String("name".to_string()),
|
||||
Value::String(self.name.clone()),
|
||||
);
|
||||
|
||||
let mut discord_config = Mapping::new();
|
||||
discord_config.insert(
|
||||
Value::String("webhook_url".to_string()),
|
||||
Value::String(self.url.to_string()),
|
||||
);
|
||||
|
||||
receiver.insert(
|
||||
Value::String("discord_configs".to_string()),
|
||||
Value::Sequence(vec![Value::Mapping(discord_config)]),
|
||||
);
|
||||
|
||||
Value::Mapping(receiver)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn discord_serialize_should_match() {
|
||||
let discord_receiver = DiscordWebhook {
|
||||
name: "test-discord".to_string(),
|
||||
url: Url::Url(url::Url::parse("https://discord.i.dont.exist.com").unwrap()),
|
||||
};
|
||||
|
||||
let discord_receiver_receiver =
|
||||
serde_yaml::to_string(&discord_receiver.alert_channel_receiver().await).unwrap();
|
||||
println!("receiver \n{:#}", discord_receiver_receiver);
|
||||
let discord_receiver_receiver_yaml = r#"name: test-discord
|
||||
discord_configs:
|
||||
- webhook_url: https://discord.i.dont.exist.com/
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
let discord_receiver_route =
|
||||
serde_yaml::to_string(&discord_receiver.alert_channel_route().await).unwrap();
|
||||
println!("route \n{:#}", discord_receiver_route);
|
||||
let discord_receiver_route_yaml = r#"receiver: test-discord
|
||||
matchers:
|
||||
- alertname!=Watchdog
|
||||
continue: true
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
assert_eq!(discord_receiver_receiver, discord_receiver_receiver_yaml);
|
||||
assert_eq!(discord_receiver_route, discord_receiver_route_yaml);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1 +1,2 @@
|
||||
pub mod discord_alert_channel;
|
||||
pub mod webhook_receiver;
|
||||
|
||||
124
harmony/src/modules/monitoring/alert_channel/webhook_receiver.rs
Normal file
124
harmony/src/modules/monitoring/alert_channel/webhook_receiver.rs
Normal file
@ -0,0 +1,124 @@
|
||||
use async_trait::async_trait;
|
||||
use serde::Serialize;
|
||||
use serde_yaml::{Mapping, Value};
|
||||
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
modules::monitoring::kube_prometheus::{
|
||||
prometheus::{Prometheus, PrometheusReceiver},
|
||||
types::{AlertChannelConfig, AlertManagerChannelConfig},
|
||||
},
|
||||
topology::{Url, oberservability::monitoring::AlertReceiver},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct WebhookReceiver {
|
||||
pub name: String,
|
||||
pub url: Url,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertReceiver<Prometheus> for WebhookReceiver {
|
||||
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_receiver(self).await
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertReceiver<Prometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PrometheusReceiver for WebhookReceiver {
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
async fn configure_receiver(&self) -> AlertManagerChannelConfig {
|
||||
self.get_config().await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AlertChannelConfig for WebhookReceiver {
|
||||
async fn get_config(&self) -> AlertManagerChannelConfig {
|
||||
let channel_global_config = None;
|
||||
let channel_receiver = self.alert_channel_receiver().await;
|
||||
let channel_route = self.alert_channel_route().await;
|
||||
|
||||
AlertManagerChannelConfig {
|
||||
channel_global_config,
|
||||
channel_receiver,
|
||||
channel_route,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl WebhookReceiver {
|
||||
async fn alert_channel_route(&self) -> serde_yaml::Value {
|
||||
let mut route = Mapping::new();
|
||||
route.insert(
|
||||
Value::String("receiver".to_string()),
|
||||
Value::String(self.name.clone()),
|
||||
);
|
||||
route.insert(
|
||||
Value::String("matchers".to_string()),
|
||||
Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]),
|
||||
);
|
||||
route.insert(Value::String("continue".to_string()), Value::Bool(true));
|
||||
Value::Mapping(route)
|
||||
}
|
||||
|
||||
async fn alert_channel_receiver(&self) -> serde_yaml::Value {
|
||||
let mut receiver = Mapping::new();
|
||||
receiver.insert(
|
||||
Value::String("name".to_string()),
|
||||
Value::String(self.name.clone()),
|
||||
);
|
||||
|
||||
let mut webhook_config = Mapping::new();
|
||||
webhook_config.insert(
|
||||
Value::String("url".to_string()),
|
||||
Value::String(self.url.to_string()),
|
||||
);
|
||||
|
||||
receiver.insert(
|
||||
Value::String("webhook_configs".to_string()),
|
||||
Value::Sequence(vec![Value::Mapping(webhook_config)]),
|
||||
);
|
||||
|
||||
Value::Mapping(receiver)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
#[tokio::test]
|
||||
async fn webhook_serialize_should_match() {
|
||||
let webhook_receiver = WebhookReceiver {
|
||||
name: "test-webhook".to_string(),
|
||||
url: Url::Url(url::Url::parse("https://webhook.i.dont.exist.com").unwrap()),
|
||||
};
|
||||
|
||||
let webhook_receiver_receiver =
|
||||
serde_yaml::to_string(&webhook_receiver.alert_channel_receiver().await).unwrap();
|
||||
println!("receiver \n{:#}", webhook_receiver_receiver);
|
||||
let webhook_receiver_receiver_yaml = r#"name: test-webhook
|
||||
webhook_configs:
|
||||
- url: https://webhook.i.dont.exist.com/
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
let webhook_receiver_route =
|
||||
serde_yaml::to_string(&webhook_receiver.alert_channel_route().await).unwrap();
|
||||
println!("route \n{:#}", webhook_receiver_route);
|
||||
let webhook_receiver_route_yaml = r#"receiver: test-webhook
|
||||
matchers:
|
||||
- alertname!=Watchdog
|
||||
continue: true
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
assert_eq!(webhook_receiver_receiver, webhook_receiver_receiver_yaml);
|
||||
assert_eq!(webhook_receiver_route, webhook_receiver_route_yaml);
|
||||
}
|
||||
}
|
||||
1
harmony/src/modules/monitoring/alert_rule/mod.rs
Normal file
1
harmony/src/modules/monitoring/alert_rule/mod.rs
Normal file
@ -0,0 +1 @@
|
||||
pub mod prometheus_alert_rule;
|
||||
@ -0,0 +1,99 @@
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::{InterpretError, Outcome},
|
||||
modules::monitoring::kube_prometheus::{
|
||||
prometheus::{Prometheus, PrometheusRule},
|
||||
types::{AlertGroup, AlertManagerAdditionalPromRules},
|
||||
},
|
||||
topology::oberservability::monitoring::AlertRule,
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
impl AlertRule<Prometheus> for AlertManagerRuleGroup {
|
||||
async fn install(&self, sender: &Prometheus) -> Result<Outcome, InterpretError> {
|
||||
sender.install_rule(&self).await
|
||||
}
|
||||
fn clone_box(&self) -> Box<dyn AlertRule<Prometheus>> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PrometheusRule for AlertManagerRuleGroup {
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules {
|
||||
let mut additional_prom_rules = BTreeMap::new();
|
||||
|
||||
additional_prom_rules.insert(
|
||||
self.name.clone(),
|
||||
AlertGroup {
|
||||
groups: vec![self.clone()],
|
||||
},
|
||||
);
|
||||
AlertManagerAdditionalPromRules {
|
||||
rules: additional_prom_rules,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AlertManagerRuleGroup {
|
||||
pub fn new(name: &str, rules: Vec<PrometheusAlertRule>) -> AlertManagerRuleGroup {
|
||||
AlertManagerRuleGroup {
|
||||
name: name.to_string().to_lowercase(),
|
||||
rules,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
///logical group of alert rules
|
||||
///evaluates to:
|
||||
///name:
|
||||
/// groups:
|
||||
/// - name: name
|
||||
/// rules: PrometheusAlertRule
|
||||
pub struct AlertManagerRuleGroup {
|
||||
pub name: String,
|
||||
pub rules: Vec<PrometheusAlertRule>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct PrometheusAlertRule {
|
||||
pub alert: String,
|
||||
pub expr: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub r#for: Option<String>,
|
||||
pub labels: HashMap<String, String>,
|
||||
pub annotations: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl PrometheusAlertRule {
|
||||
pub fn new(alert_name: &str, expr: &str) -> Self {
|
||||
Self {
|
||||
alert: alert_name.into(),
|
||||
expr: expr.into(),
|
||||
r#for: Some("1m".into()),
|
||||
labels: HashMap::new(),
|
||||
annotations: HashMap::new(),
|
||||
}
|
||||
}
|
||||
pub fn for_duration(mut self, duration: &str) -> Self {
|
||||
self.r#for = Some(duration.into());
|
||||
self
|
||||
}
|
||||
pub fn label(mut self, key: &str, value: &str) -> Self {
|
||||
self.labels.insert(key.into(), value.into());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn annotation(mut self, key: &str, value: &str) -> Self {
|
||||
self.annotations.insert(key.into(), value.into());
|
||||
self
|
||||
}
|
||||
}
|
||||
@ -1,5 +1,10 @@
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::modules::monitoring::{
|
||||
alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
|
||||
kube_prometheus::types::{AlertManagerAdditionalPromRules, AlertManagerChannelConfig},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct KubePrometheusConfig {
|
||||
pub namespace: String,
|
||||
@ -19,6 +24,8 @@ pub struct KubePrometheusConfig {
|
||||
pub kube_proxy: bool,
|
||||
pub kube_state_metrics: bool,
|
||||
pub prometheus_operator: bool,
|
||||
pub alert_receiver_configs: Vec<AlertManagerChannelConfig>,
|
||||
pub alert_rules: Vec<AlertManagerAdditionalPromRules>,
|
||||
}
|
||||
impl KubePrometheusConfig {
|
||||
pub fn new() -> Self {
|
||||
@ -40,6 +47,8 @@ impl KubePrometheusConfig {
|
||||
prometheus_operator: true,
|
||||
core_dns: false,
|
||||
kube_scheduler: false,
|
||||
alert_receiver_configs: vec![],
|
||||
alert_rules: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,17 +1,28 @@
|
||||
use super::config::KubePrometheusConfig;
|
||||
use log::debug;
|
||||
use non_blank_string_rs::NonBlankString;
|
||||
use std::str::FromStr;
|
||||
use serde_yaml::{Mapping, Value};
|
||||
use std::{
|
||||
collections::BTreeMap,
|
||||
str::FromStr,
|
||||
sync::{Arc, Mutex},
|
||||
};
|
||||
|
||||
use crate::modules::helm::chart::HelmChartScore;
|
||||
use crate::modules::{
|
||||
helm::chart::HelmChartScore,
|
||||
monitoring::kube_prometheus::types::{
|
||||
AlertGroup, AlertManager, AlertManagerAdditionalPromRules, AlertManagerConfig,
|
||||
AlertManagerRoute, AlertManagerValues,
|
||||
},
|
||||
};
|
||||
|
||||
pub fn kube_prometheus_helm_chart_score() -> HelmChartScore {
|
||||
let config = KubePrometheusConfig::new();
|
||||
pub fn kube_prometheus_helm_chart_score(
|
||||
config: Arc<Mutex<KubePrometheusConfig>>,
|
||||
) -> HelmChartScore {
|
||||
let config = config.lock().unwrap();
|
||||
|
||||
//TODO this should be make into a rule with default formatting that can be easily passed as a vec
|
||||
//to the overrides or something leaving the user to deal with formatting here seems bad
|
||||
let default_rules = config.default_rules.to_string();
|
||||
let windows_monitoring = config.windows_monitoring.to_string();
|
||||
let alert_manager = config.alert_manager.to_string();
|
||||
let grafana = config.grafana.to_string();
|
||||
let kubernetes_service_monitors = config.kubernetes_service_monitors.to_string();
|
||||
let kubernetes_api_server = config.kubernetes_api_server.to_string();
|
||||
@ -25,58 +36,8 @@ pub fn kube_prometheus_helm_chart_score() -> HelmChartScore {
|
||||
let node_exporter = config.node_exporter.to_string();
|
||||
let prometheus_operator = config.prometheus_operator.to_string();
|
||||
let prometheus = config.prometheus.to_string();
|
||||
let values = format!(
|
||||
let mut values = format!(
|
||||
r#"
|
||||
additionalPrometheusRulesMap:
|
||||
pods-status-alerts:
|
||||
groups:
|
||||
- name: pods
|
||||
rules:
|
||||
- alert: "[CRIT] POD not healthy"
|
||||
expr: min_over_time(sum by (namespace, pod) (kube_pod_status_phase{{phase=~"Pending|Unknown|Failed"}})[15m:1m]) > 0
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
title: "[CRIT] POD not healthy : {{{{ $labels.pod }}}}"
|
||||
description: |
|
||||
A POD is in a non-ready state!
|
||||
- **Pod**: {{{{ $labels.pod }}}}
|
||||
- **Namespace**: {{{{ $labels.namespace }}}}
|
||||
- alert: "[CRIT] POD crash looping"
|
||||
expr: increase(kube_pod_container_status_restarts_total[5m]) > 3
|
||||
for: 0m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
title: "[CRIT] POD crash looping : {{{{ $labels.pod }}}}"
|
||||
description: |
|
||||
A POD is drowning in a crash loop!
|
||||
- **Pod**: {{{{ $labels.pod }}}}
|
||||
- **Namespace**: {{{{ $labels.namespace }}}}
|
||||
- **Instance**: {{{{ $labels.instance }}}}
|
||||
pvc-alerts:
|
||||
groups:
|
||||
- name: pvc-alerts
|
||||
rules:
|
||||
- alert: 'PVC Fill Over 95 Percent In 2 Days'
|
||||
expr: |
|
||||
(
|
||||
kubelet_volume_stats_used_bytes
|
||||
/
|
||||
kubelet_volume_stats_capacity_bytes
|
||||
) > 0.95
|
||||
AND
|
||||
predict_linear(kubelet_volume_stats_used_bytes[2d], 2 * 24 * 60 * 60)
|
||||
/
|
||||
kubelet_volume_stats_capacity_bytes
|
||||
> 0.95
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
description: The PVC {{{{ $labels.persistentvolumeclaim }}}} in namespace {{{{ $labels.namespace }}}} is predicted to fill over 95% in less than 2 days.
|
||||
title: PVC {{{{ $labels.persistentvolumeclaim }}}} in namespace {{{{ $labels.namespace }}}} will fill over 95% in less than 2 days
|
||||
defaultRules:
|
||||
create: {default_rules}
|
||||
rules:
|
||||
@ -144,6 +105,85 @@ prometheus:
|
||||
enabled: {prometheus}
|
||||
"#,
|
||||
);
|
||||
|
||||
// add required null receiver for prometheus alert manager
|
||||
let mut null_receiver = Mapping::new();
|
||||
null_receiver.insert(
|
||||
Value::String("receiver".to_string()),
|
||||
Value::String("null".to_string()),
|
||||
);
|
||||
null_receiver.insert(
|
||||
Value::String("matchers".to_string()),
|
||||
Value::Sequence(vec![Value::String("alertname!=Watchdog".to_string())]),
|
||||
);
|
||||
null_receiver.insert(Value::String("continue".to_string()), Value::Bool(true));
|
||||
|
||||
//add alert channels
|
||||
let mut alert_manager_channel_config = AlertManagerConfig {
|
||||
global: Mapping::new(),
|
||||
route: AlertManagerRoute {
|
||||
routes: vec![Value::Mapping(null_receiver)],
|
||||
},
|
||||
receivers: vec![serde_yaml::from_str("name: 'null'").unwrap()],
|
||||
};
|
||||
for receiver in config.alert_receiver_configs.iter() {
|
||||
if let Some(global) = receiver.channel_global_config.clone() {
|
||||
alert_manager_channel_config
|
||||
.global
|
||||
.insert(global.0, global.1);
|
||||
}
|
||||
alert_manager_channel_config
|
||||
.route
|
||||
.routes
|
||||
.push(receiver.channel_route.clone());
|
||||
alert_manager_channel_config
|
||||
.receivers
|
||||
.push(receiver.channel_receiver.clone());
|
||||
}
|
||||
|
||||
let alert_manager_values = AlertManagerValues {
|
||||
alertmanager: AlertManager {
|
||||
enabled: config.alert_manager,
|
||||
config: alert_manager_channel_config,
|
||||
},
|
||||
};
|
||||
|
||||
let alert_manager_yaml =
|
||||
serde_yaml::to_string(&alert_manager_values).expect("Failed to serialize YAML");
|
||||
debug!("serialized alert manager: \n {:#}", alert_manager_yaml);
|
||||
values.push_str(&alert_manager_yaml);
|
||||
|
||||
//format alert manager additional rules for helm chart
|
||||
let mut merged_rules: BTreeMap<String, AlertGroup> = BTreeMap::new();
|
||||
|
||||
for additional_rule in config.alert_rules.clone() {
|
||||
for (key, group) in additional_rule.rules {
|
||||
merged_rules.insert(key, group);
|
||||
}
|
||||
}
|
||||
|
||||
let merged_rules = AlertManagerAdditionalPromRules {
|
||||
rules: merged_rules,
|
||||
};
|
||||
|
||||
let mut alert_manager_additional_rules = serde_yaml::Mapping::new();
|
||||
let rules_value = serde_yaml::to_value(merged_rules).unwrap();
|
||||
|
||||
alert_manager_additional_rules.insert(
|
||||
serde_yaml::Value::String("additionalPrometheusRulesMap".to_string()),
|
||||
rules_value,
|
||||
);
|
||||
|
||||
let alert_manager_additional_rules_yaml =
|
||||
serde_yaml::to_string(&alert_manager_additional_rules).expect("Failed to serialize YAML");
|
||||
debug!(
|
||||
"alert_rules_yaml:\n{:#}",
|
||||
alert_manager_additional_rules_yaml
|
||||
);
|
||||
|
||||
values.push_str(&alert_manager_additional_rules_yaml);
|
||||
debug!("full values.yaml: \n {:#}", values);
|
||||
|
||||
HelmChartScore {
|
||||
namespace: Some(NonBlankString::from_str(&config.namespace).unwrap()),
|
||||
release_name: NonBlankString::from_str("kube-prometheus").unwrap(),
|
||||
|
||||
@ -1,47 +1,33 @@
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use serde::Serialize;
|
||||
|
||||
use super::{helm::config::KubePrometheusConfig, prometheus::Prometheus};
|
||||
use crate::{
|
||||
modules::monitoring::alert_channel::discord_alert_channel::DiscordWebhook,
|
||||
score::Score,
|
||||
topology::{
|
||||
HelmCommand, Topology,
|
||||
oberservability::monitoring::{AlertReceiver, AlertingInterpret},
|
||||
oberservability::monitoring::{AlertReceiver, AlertRule, AlertingInterpret},
|
||||
},
|
||||
};
|
||||
|
||||
use super::prometheus::Prometheus;
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct HelmPrometheusAlertingScore {
|
||||
pub receivers: Vec<Box<dyn AlertReceiver<Prometheus>>>,
|
||||
pub rules: Vec<Box<dyn AlertRule<Prometheus>>>,
|
||||
}
|
||||
|
||||
impl<T: Topology + HelmCommand> Score<T> for HelmPrometheusAlertingScore {
|
||||
fn create_interpret(&self) -> Box<dyn crate::interpret::Interpret<T>> {
|
||||
Box::new(AlertingInterpret {
|
||||
sender: Prometheus {},
|
||||
receivers: vec![Box::new(DiscordWebhook {
|
||||
url: todo!(),
|
||||
name: todo!(),
|
||||
})],
|
||||
sender: Prometheus {
|
||||
config: Arc::new(Mutex::new(KubePrometheusConfig::new())),
|
||||
},
|
||||
receivers: self.receivers.clone(),
|
||||
rules: self.rules.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
fn name(&self) -> String {
|
||||
"HelmPrometheusAlertingScore".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertReceiver<Prometheus>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
impl Clone for Box<dyn AlertReceiver<Prometheus>> {
|
||||
fn clone(&self) -> Self {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,34 +1,139 @@
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use log::debug;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
interpret::InterpretError,
|
||||
topology::{installable::Installable, oberservability::monitoring::AlertSender},
|
||||
interpret::{InterpretError, Outcome},
|
||||
inventory::Inventory,
|
||||
modules::monitoring::alert_rule::prometheus_alert_rule::AlertManagerRuleGroup,
|
||||
score,
|
||||
topology::{
|
||||
HelmCommand, Topology,
|
||||
installable::Installable,
|
||||
oberservability::monitoring::{AlertReceiver, AlertRule, AlertSender},
|
||||
},
|
||||
};
|
||||
|
||||
impl AlertSender for Prometheus {}
|
||||
use score::Score;
|
||||
|
||||
use super::{
|
||||
helm::{
|
||||
config::KubePrometheusConfig, kube_prometheus_helm_chart::kube_prometheus_helm_chart_score,
|
||||
},
|
||||
types::{AlertManagerAdditionalPromRules, AlertManagerChannelConfig},
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
impl Installable for Prometheus {
|
||||
async fn ensure_installed(&self) -> Result<(), InterpretError> {
|
||||
todo!()
|
||||
impl AlertSender for Prometheus {
|
||||
fn name(&self) -> String {
|
||||
"HelmKubePrometheus".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + HelmCommand> Installable<T> for Prometheus {
|
||||
async fn ensure_installed(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<(), InterpretError> {
|
||||
self.install_prometheus(inventory, topology).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Prometheus;
|
||||
pub struct Prometheus {
|
||||
pub config: Arc<Mutex<KubePrometheusConfig>>,
|
||||
}
|
||||
|
||||
impl Prometheus {
|
||||
pub async fn install_receiver(
|
||||
&self,
|
||||
prometheus_receiver: PrometheusReceiver,
|
||||
) -> Result<(), InterpretError> {
|
||||
prometheus_receiver: &dyn PrometheusReceiver,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let prom_receiver = prometheus_receiver.configure_receiver().await;
|
||||
debug!(
|
||||
"adding alert receiver to prometheus config: {:#?}",
|
||||
&prom_receiver
|
||||
);
|
||||
let mut config = self.config.lock().unwrap();
|
||||
|
||||
config.alert_receiver_configs.push(prom_receiver);
|
||||
let prom_receiver_name = prometheus_receiver.name();
|
||||
debug!("installed alert receiver {}", &prom_receiver_name);
|
||||
Ok(Outcome::success(format!(
|
||||
"Sucessfully installed receiver {}",
|
||||
prom_receiver_name
|
||||
)))
|
||||
}
|
||||
|
||||
pub async fn install_rule(
|
||||
&self,
|
||||
prometheus_rule: &AlertManagerRuleGroup,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let prometheus_rule = prometheus_rule.configure_rule().await;
|
||||
let mut config = self.config.lock().unwrap();
|
||||
|
||||
config.alert_rules.push(prometheus_rule.clone());
|
||||
Ok(Outcome::success(format!(
|
||||
"Successfully installed alert rule: {:#?},",
|
||||
prometheus_rule
|
||||
)))
|
||||
}
|
||||
|
||||
pub async fn install_prometheus<T: Topology + HelmCommand + Send + Sync>(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
kube_prometheus_helm_chart_score(self.config.clone())
|
||||
.create_interpret()
|
||||
.execute(inventory, topology)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait PrometheusReceiver: Send + Sync + std::fmt::Debug {
|
||||
fn name(&self) -> String;
|
||||
async fn configure_receiver(&self) -> AlertManagerChannelConfig;
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertReceiver<Prometheus>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PrometheusReceiver {}
|
||||
|
||||
impl PrometheusReceiver {
|
||||
fn get_prometheus_receiver_config(&self) {}
|
||||
impl Clone for Box<dyn AlertReceiver<Prometheus>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AlertChannelGlobalConfig {}
|
||||
#[async_trait]
|
||||
pub trait PrometheusRule: Send + Sync + std::fmt::Debug {
|
||||
fn name(&self) -> String;
|
||||
async fn configure_rule(&self) -> AlertManagerAdditionalPromRules;
|
||||
}
|
||||
|
||||
impl Serialize for Box<dyn AlertRule<Prometheus>> {
|
||||
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Box<dyn AlertRule<Prometheus>> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_box()
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,12 +1,55 @@
|
||||
use serde::Serialize;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct AlertReceiverRoute {
|
||||
pub receiver: String,
|
||||
pub matchers: Vec<String>,
|
||||
#[serde(default)]
|
||||
pub r#continue: bool,
|
||||
use async_trait::async_trait;
|
||||
use serde::Serialize;
|
||||
use serde_yaml::{Mapping, Sequence, Value};
|
||||
|
||||
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::AlertManagerRuleGroup;
|
||||
|
||||
#[async_trait]
|
||||
pub trait AlertChannelConfig {
|
||||
async fn get_config(&self) -> AlertManagerChannelConfig;
|
||||
}
|
||||
pub struct AlertChannelReceiver {
|
||||
pub name: String,
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertManagerValues {
|
||||
pub alertmanager: AlertManager,
|
||||
}
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertManager {
|
||||
pub enabled: bool,
|
||||
pub config: AlertManagerConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertManagerConfig {
|
||||
pub global: Mapping,
|
||||
pub route: AlertManagerRoute,
|
||||
pub receivers: Sequence,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertManagerRoute {
|
||||
pub routes: Sequence,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertManagerChannelConfig {
|
||||
///expecting an option that contains two values
|
||||
///if necessary for the alertchannel
|
||||
///[ jira_api_url: <string> ]
|
||||
pub channel_global_config: Option<(Value, Value)>,
|
||||
pub channel_route: Value,
|
||||
pub channel_receiver: Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertManagerAdditionalPromRules {
|
||||
#[serde(flatten)]
|
||||
pub rules: BTreeMap<String, AlertGroup>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct AlertGroup {
|
||||
pub groups: Vec<AlertManagerRuleGroup>,
|
||||
}
|
||||
|
||||
@ -1,2 +1,3 @@
|
||||
pub mod alert_channel;
|
||||
pub mod alert_rule;
|
||||
pub mod kube_prometheus;
|
||||
|
||||
40
harmony/src/modules/prometheus/alerts/infra/dell_server.rs
Normal file
40
harmony/src/modules/prometheus/alerts/infra/dell_server.rs
Normal file
@ -0,0 +1,40 @@
|
||||
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
|
||||
|
||||
pub fn global_storage_status_degraded_non_critical() -> PrometheusAlertRule {
|
||||
PrometheusAlertRule::new("GlobalStorageStatusNonCritical", "globalStorageStatus == 4")
|
||||
.for_duration("5m")
|
||||
.label("severity", "warning")
|
||||
.annotation(
|
||||
"description",
|
||||
"- **system**: {{ $labels.instance }}\n- **Status**: nonCritical\n- **Value**: {{ $value }}\n- **Job**: {{ $labels.job }}",
|
||||
)
|
||||
.annotation("title", " System storage status is in degraded state")
|
||||
}
|
||||
|
||||
pub fn alert_global_storage_status_critical() -> PrometheusAlertRule {
|
||||
PrometheusAlertRule::new(
|
||||
"GlobalStorageStatus critical",
|
||||
"globalStorageStatus == 5",
|
||||
)
|
||||
.for_duration("5m")
|
||||
.label("severity", "warning")
|
||||
.annotation("title", "System storage status is critical at {{ $labels.instance }}")
|
||||
.annotation(
|
||||
"description",
|
||||
"- **System**: {{ $labels.instance }}\n- **Status**: Critical\n- **Value**: {{ $value }}\n- **Job**: {{ $labels.job }}",
|
||||
)
|
||||
}
|
||||
|
||||
pub fn alert_global_storage_status_non_recoverable() -> PrometheusAlertRule {
|
||||
PrometheusAlertRule::new(
|
||||
"GlobalStorageStatus nonRecoverable",
|
||||
"globalStorageStatus == 6",
|
||||
)
|
||||
.for_duration("5m")
|
||||
.label("severity", "warning")
|
||||
.annotation("title", "System storage status is nonRecoverable at {{ $labels.instance }}")
|
||||
.annotation(
|
||||
"description",
|
||||
"- **System**: {{ $labels.instance }}\n- **Status**: nonRecoverable\n- **Value**: {{ $value }}\n- **Job**: {{ $labels.job }}",
|
||||
)
|
||||
}
|
||||
1
harmony/src/modules/prometheus/alerts/infra/mod.rs
Normal file
1
harmony/src/modules/prometheus/alerts/infra/mod.rs
Normal file
@ -0,0 +1 @@
|
||||
pub mod dell_server;
|
||||
1
harmony/src/modules/prometheus/alerts/k8s/mod.rs
Normal file
1
harmony/src/modules/prometheus/alerts/k8s/mod.rs
Normal file
@ -0,0 +1 @@
|
||||
pub mod pvc;
|
||||
11
harmony/src/modules/prometheus/alerts/k8s/pvc.rs
Normal file
11
harmony/src/modules/prometheus/alerts/k8s/pvc.rs
Normal file
@ -0,0 +1,11 @@
|
||||
use crate::modules::monitoring::alert_rule::prometheus_alert_rule::PrometheusAlertRule;
|
||||
|
||||
pub fn high_pvc_fill_rate_over_two_days() -> PrometheusAlertRule {
|
||||
PrometheusAlertRule::new(
|
||||
"PVC Fill Over 95 Percent In 2 Days",
|
||||
"(kubelet_volume_stats_used_bytes/kubelet_volume_stats_capacity_bytes) > 0.95 AND predict_linear(kubelet_volume_stats_used_bytes[2d], 2 * 24 * 60 * 60)/kubelet_volume_stats_capacity_bytes > 0.95",)
|
||||
.for_duration("1m")
|
||||
.label("severity", "warning")
|
||||
.annotation("summary", "The PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} is predicted to fill over 95% in less than 2 days.")
|
||||
.annotation("description", "PVC {{ $labels.persistentvolumeclaim }} in namespace {{ $labels.namespace }} will fill over 95% in less than 2 days",)
|
||||
}
|
||||
2
harmony/src/modules/prometheus/alerts/mod.rs
Normal file
2
harmony/src/modules/prometheus/alerts/mod.rs
Normal file
@ -0,0 +1,2 @@
|
||||
pub mod infra;
|
||||
pub mod k8s;
|
||||
1
harmony/src/modules/prometheus/mod.rs
Normal file
1
harmony/src/modules/prometheus/mod.rs
Normal file
@ -0,0 +1 @@
|
||||
pub mod alerts;
|
||||
74
harmony_composer/README.md
Normal file
74
harmony_composer/README.md
Normal file
@ -0,0 +1,74 @@
|
||||
# harmony_composer
|
||||
|
||||
`harmony_composer` is a command-line utility for compiling and bootstrapping deployments for the Harmony orchestration framework.
|
||||
|
||||
It's designed to simplify the build process by either compiling a Harmony project found in a local harmony directory or by bootstrapping a new deployment through auto-detection of the current project type.
|
||||
|
||||
## ⚡ Quick Install & Run (Linux x86-64)
|
||||
|
||||
You can download and run the latest snapshot build with a single command. This will place the binary in ~/.local/bin, which should be in your PATH on most modern Linux distributions.
|
||||
|
||||
```bash
|
||||
mkdir -p ~/.local/bin && \
|
||||
curl -L https://git.nationtech.io/NationTech/harmony/releases/download/snapshot-latest/harmony_composer \
|
||||
-o ~/.local/bin/harmony_composer && \
|
||||
chmod +x ~/.local/bin/harmony_composer && \
|
||||
alias hc=~/.local/bin/harmony_composer && \
|
||||
echo "\n\nharmony_composer installed successfully\!\n\nUse \`hc\` to run it.\n\nNote : this hc alias only works for the current shell session. Add 'alias hc=~/.local/bin/harmony_composer' to your '~/.bashrc' or '~/.zshrc' file to make it permanently available to your user."
|
||||
```
|
||||
|
||||
Then you can start using it with either :
|
||||
|
||||
- `harmony_composer` if `~/.local/bin` is in you `$PATH`
|
||||
- `hc` alias set up in your current shell session.
|
||||
- If you want to make the `hc` command always available, add `alias hc=~/.local/bin/harmony_composer` to your shell profile. Usually `~/.bashrc` for bash, `~/.zshrc` for zsh.
|
||||
|
||||
> ⚠️ Warning: Unstable Builds
|
||||
> The snapshot-latest tag points to the latest build from the master branch. It is unstable, unsupported, and intended only for early testing of new features. Please do not use it in production environments.
|
||||
|
||||
## ⚙️ How It Works
|
||||
|
||||
harmony_composer requires either cargo or docker to be available on your system to compile the Harmony project.
|
||||
|
||||
- If cargo is found: It will be used to compile the project locally.
|
||||
- If cargo is not found: It will automatically download and run the harmony_composer Docker image. This image is a self-contained build environment with the required Cargo binary and build targets for both Linux and Windows.
|
||||
- If both cargo and docker are unavailable, `harmony_composer` will fail. Please install one of them.
|
||||
|
||||
## 📖 Basic Usage
|
||||
|
||||
Here are some common commands:
|
||||
|
||||
```bash
|
||||
|
||||
# Compile the repo's Harmony module
|
||||
harmony_composer compile
|
||||
|
||||
# Run check script on the project
|
||||
harmony_composer check
|
||||
|
||||
# Run the repo's entire harmony deployment sequence
|
||||
harmony_composer deploy
|
||||
|
||||
# Run the full check, compile, and deploy pipeline
|
||||
harmony_composer all
|
||||
```
|
||||
|
||||
For a full list of commands and their options, run:
|
||||
|
||||
```bash
|
||||
|
||||
harmony_composer --help
|
||||
```
|
||||
|
||||
## 🏗️ Supported Architectures
|
||||
|
||||
The build system currently supports compiling for:
|
||||
|
||||
x86_64-unknown-linux-gnu
|
||||
x86_64-pc-windows-gnu
|
||||
|
||||
More target architectures are planned. If your platform is not yet supported, please open a feature request in the main repository.
|
||||
|
||||
## 🔗 Main Project
|
||||
|
||||
This tool is a small part of the main Harmony project. For complete documentation, contribution guidelines, and license information, please refer to the main repository.
|
||||
Loading…
Reference in New Issue
Block a user