From 06a0c44c3cfa982f0ce585ad815caec4270208f7 Mon Sep 17 00:00:00 2001 From: Willem Date: Tue, 14 Oct 2025 15:53:42 -0400 Subject: [PATCH] wip: connected the thanos-datasource to grafana, need to complete connecting the openshift-userworkload-monitoring as well --- harmony/src/domain/topology/k8s.rs | 17 +- harmony/src/domain/topology/k8s_anywhere.rs | 194 ++++++++++++------ .../kube_prometheus/crd/crd_grafana.rs | 12 +- .../k8s_prometheus_alerting_score.rs | 16 +- 4 files changed, 165 insertions(+), 74 deletions(-) diff --git a/harmony/src/domain/topology/k8s.rs b/harmony/src/domain/topology/k8s.rs index 144533c..f1a783f 100644 --- a/harmony/src/domain/topology/k8s.rs +++ b/harmony/src/domain/topology/k8s.rs @@ -1,12 +1,20 @@ use derive_new::new; +use http::StatusCode; use k8s_openapi::{ ClusterResourceScope, NamespaceResourceScope, - api::{apps::v1::Deployment, core::v1::Pod}, + api::{ + apps::v1::Deployment, + authentication::v1::{TokenRequest, TokenRequestSpec, TokenRequestStatus}, + core::v1::{Pod, ServiceAccount}, + }, apimachinery::pkg::version::Info, }; use kube::{ Client, Config, Discovery, Error, Resource, - api::{Api, AttachParams, DeleteParams, ListParams, Patch, PatchParams, ResourceExt}, + api::{ + Api, AttachParams, DeleteParams, ListParams, ObjectMeta, Patch, PatchParams, PostParams, + ResourceExt, + }, config::{KubeConfigOptions, Kubeconfig}, core::ErrorResponse, runtime::reflector::Lookup, @@ -54,6 +62,11 @@ impl K8sClient { }) } + pub async fn service_account_api(&self, namespace: &str) -> Api { + let api: Api = Api::namespaced(self.client.clone(), namespace); + api + } + pub async fn get_apiserver_version(&self) -> Result { let client: Client = self.client.clone(); let version_info: Info = client.apiserver_version().await?; diff --git a/harmony/src/domain/topology/k8s_anywhere.rs b/harmony/src/domain/topology/k8s_anywhere.rs index efbe33f..cb37ece 100644 --- a/harmony/src/domain/topology/k8s_anywhere.rs +++ b/harmony/src/domain/topology/k8s_anywhere.rs @@ -1,8 +1,12 @@ -use std::{collections::BTreeMap, process::Command, sync::Arc}; +use std::{ + collections::{BTreeMap, HashMap}, + process::Command, + sync::Arc, +}; use async_trait::async_trait; use k8s_openapi::api::{ - authentication::v1::{TokenRequest, TokenRequestSpec}, + authentication::v1::{TokenRequest, TokenRequestSpec, TokenRequestStatus}, core::v1::{Secret, ServiceAccount}, rbac::v1::{ClusterRoleBinding, RoleRef, Subject}, }; @@ -150,39 +154,90 @@ impl Grafana for K8sAnywhereTopology { match_labels: label.clone(), match_expressions: vec![], }; - + debug!("getting client"); let client = self.k8s_client().await?; - let url = format!("{}:9091", self.get_domain("thanos-querier").await.unwrap()); - - let sa = self.build_service_account(); - //TODO finish this section - //needs apply Api or something - client.apply(&sa, Some(ns)).await?; - - let token_request =self.get_token_request(); - //this wont work needs a new function for apply secret - client.apply(&token_request, Some(ns)).await?; - - let clusterrolebinding = self.build_cluster_rolebinding(); - - client.apply(&clusterrolebinding, Some(ns)).await?; - - let secret = self.build_token_secret(); - - client.apply(&secret, Some(ns)).await?; - - let datasource = self.build_grafana_datasource(ns, &label_selector, &url); - - client.apply(&datasource, Some(ns)).await?; - - let dashboard = self.build_grafana_dashboard(ns, &label_selector); - - client.apply(&dashboard, Some(ns)).await?; + info!("creating grafanas crd"); let grafana = self.build_grafana(ns, &label); client.apply(&grafana, Some(ns)).await?; + client + .wait_until_deployment_ready( + "grafana-grafana-deployment".to_string(), + Some("grafana"), + Some(15), + ) + .await?; + + let sa_name = "grafana-grafana-sa"; + + debug!("creating token for sevice account {sa_name}"); + let token = self.create_service_account_token(sa_name, ns).await?; + + debug!("creating secret"); + let secret_name = "grafana-sa-secret"; + let secret = self.build_token_secret(secret_name, &token.token, ns).await; + + client.apply(&secret, Some(ns)).await?; + + debug!("creating grafana clusterrole binding"); + let clusterrolebinding = + self.build_cluster_rolebinding(sa_name, "cluster-monitoring-view", ns); + + client.apply(&clusterrolebinding, Some(ns)).await?; + + debug!("creating grafana datasource crd"); + + let token_str = format!("Bearer {}", token.token); + + let thanos_url = format!( + "https://{}", + self.get_domain("thanos-querier-openshift-monitoring") + .await + .unwrap() + ); + + let thanos_openshift_datasource = self.build_grafana_datasource( + "thanos-openshift-monitoring", + ns, + &label_selector, + &thanos_url, + token_str.clone(), + ); + + client.apply(&thanos_openshift_datasource, Some(ns)).await?; + + //TODO user workload datasource returns 503 -> need to figure out how to correctly add the + //userworkload thanos-ruler or prometheus-federate to the grafana datasource + //it may alrady be included in the overall monitoring stack + + let user_thanos_url = format!( + "https://{}", + self.get_domain( + "thanos-ruler-openshift-user-workload-monitoring.apps.ncd0.harmony.mcd" + ) + .await + .unwrap() + ); + + let thanos_openshift_userworkload_datasource = self.build_grafana_datasource( + "thanos-openshift-userworkload-monitoring", + ns, + &label_selector, + &user_thanos_url, + token_str.clone(), + ); + + client + .apply(&thanos_openshift_userworkload_datasource, Some(ns)) + .await?; + + debug!("creating grafana dashboard crd"); + let dashboard = self.build_grafana_dashboard(ns, &label_selector); + + client.apply(&dashboard, Some(ns)).await?; + debug!("creating grafana ingress"); let grafana_ingress = self.build_grafana_ingress(ns).await; grafana_ingress @@ -368,31 +423,36 @@ impl K8sAnywhereTopology { pub fn build_cluster_rolebinding( &self, + service_account_name: &str, + clusterrole_name: &str, ns: &str, - account_name: &str, - role: &str, ) -> ClusterRoleBinding { ClusterRoleBinding { metadata: ObjectMeta { - name: Some(format!("{}-view-binding", account_name)), + name: Some(format!("{}-view-binding", service_account_name)), ..Default::default() }, role_ref: RoleRef { api_group: "rbac.authorization.k8s.io".into(), kind: "ClusterRole".into(), - name: role.into(), + name: clusterrole_name.into(), }, subjects: Some(vec![Subject { kind: "ServiceAccount".into(), - name: account_name.into(), + name: service_account_name.into(), namespace: Some(ns.into()), ..Default::default() }]), } } - pub fn get_token_request(&self) -> TokenRequest { + pub fn get_token_request(&self, ns: &str) -> TokenRequest { + debug!("building token request"); TokenRequest { + metadata: ObjectMeta { + namespace: Some(ns.to_string()), + ..Default::default() + }, spec: TokenRequestSpec { audiences: vec!["https://kubernetes.default.svc".to_string()], expiration_seconds: Some(3600), @@ -402,15 +462,39 @@ impl K8sAnywhereTopology { } } - pub fn build_token_secret(&self, token: &str, ns: &str) -> Secret { + pub async fn create_service_account_token( + &self, + service_account_name: &str, + ns: &str, + ) -> Result { + debug!("creating service account token"); + let token_request = self.get_token_request(ns); + let client = self.k8s_client().await?; + let pp = PostParams::default(); + let token_requests_api = client.service_account_api(ns).await; + + let data = serde_json::to_vec(&token_request).unwrap(); + + let created_token_request = token_requests_api + .create_subresource::("token", service_account_name, &pp, data) + .await?; + + let status = created_token_request + .status + .ok_or_else(|| PreparationError::new("missing token request status".to_string()))?; + + Ok(status) + } + + pub async fn build_token_secret(&self, secret_name: &str, token: &str, ns: &str) -> Secret { Secret { metadata: ObjectMeta { - name: Some("grafana-credentials".into()), + name: Some(secret_name.into()), namespace: Some(ns.into()), ..Default::default() }, string_data: Some(std::collections::BTreeMap::from([( - "PROMETHEUS_TOKEN".into(), + secret_name.into(), format!("Bearer {}", token), )])), ..Default::default() @@ -419,39 +503,18 @@ impl K8sAnywhereTopology { fn build_grafana_datasource( &self, + name: &str, ns: &str, label_selector: &LabelSelector, url: &str, + token: String, ) -> GrafanaDatasource { let mut json_data = BTreeMap::new(); json_data.insert("timeInterval".to_string(), "5s".to_string()); - // - // let graf_data_source = GrafanaDatasource { - // metadata: ObjectMeta { - // name: Some(format!("grafana-datasource-{}", ns)), - // namespace: Some(ns.to_string()), - // ..Default::default() - // }, - // spec: GrafanaDatasourceSpec { - // instance_selector: label_selector.clone(), - // allow_cross_namespace_import: Some(false), - // datasource: GrafanaDatasourceConfig { - // access: "proxy".to_string(), - // database: Some("prometheus".to_string()), - // json_data: Some(json_data), - // //this is fragile - // name: format!("prometheus-{}-0", ns), - // r#type: "prometheus".to_string(), - // url: url.to_string(), - // //url: format!("http://prometheus-operated.{}.svc.cluster.local:9090", ns), - // }, - // }, - // }; - // graf_data_source GrafanaDatasource { metadata: ObjectMeta { - name: Some("thanos-prometheus".to_string()), + name: Some(name.to_string()), namespace: Some(ns.to_string()), ..Default::default() }, @@ -460,20 +523,21 @@ impl K8sAnywhereTopology { allow_cross_namespace_import: Some(true), datasource: GrafanaDatasourceConfig { access: "proxy".to_string(), - name: "OpenShift-Thanos".to_string(), + name: name.to_string(), r#type: "prometheus".to_string(), url: url.to_string(), database: None, json_data: Some(GrafanaDatasourceJsonData { time_interval: Some("60s".to_string()), http_header_name1: Some("Authorization".to_string()), + tls_skip_verify: Some(true), + oauth_pass_thru: Some(true), }), secure_json_data: Some(GrafanaDatasourceSecureJsonData { - http_header_value1: Some("Bearer eyJhbGc...".to_string()), + http_header_value1: Some(token), }), is_default: Some(false), editable: Some(true), - version: Some(1), }, }, } diff --git a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_grafana.rs b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_grafana.rs index 4134670..e58f4ca 100644 --- a/harmony/src/modules/monitoring/kube_prometheus/crd/crd_grafana.rs +++ b/harmony/src/modules/monitoring/kube_prometheus/crd/crd_grafana.rs @@ -132,6 +132,7 @@ pub struct GrafanaDatasourceSpec { #[serde(rename_all = "camelCase")] pub struct GrafanaDatasourceConfig { pub access: String, + #[serde(default, skip_serializing_if = "Option::is_none")] pub database: Option, pub name: String, pub r#type: String, @@ -149,9 +150,6 @@ pub struct GrafanaDatasourceConfig { #[serde(default, skip_serializing_if = "Option::is_none")] pub editable: Option, - - #[serde(default, skip_serializing_if = "Option::is_none")] - pub version: Option, } #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] @@ -162,6 +160,14 @@ pub struct GrafanaDatasourceJsonData { #[serde(default, skip_serializing_if = "Option::is_none")] pub http_header_name1: Option, + + /// Disable TLS skip verification (false = verify) + #[serde(default, skip_serializing_if = "Option::is_none")] + pub tls_skip_verify: Option, + + /// Auth type - set to "forward" for OpenShift OAuth identity + #[serde(default, skip_serializing_if = "Option::is_none")] + pub oauth_pass_thru: Option, } #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] diff --git a/harmony/src/modules/prometheus/k8s_prometheus_alerting_score.rs b/harmony/src/modules/prometheus/k8s_prometheus_alerting_score.rs index 2cb4ffb..f9e8531 100644 --- a/harmony/src/modules/prometheus/k8s_prometheus_alerting_score.rs +++ b/harmony/src/modules/prometheus/k8s_prometheus_alerting_score.rs @@ -12,7 +12,7 @@ use crate::modules::monitoring::kube_prometheus::crd::crd_alertmanager_config::C use crate::modules::monitoring::kube_prometheus::crd::crd_default_rules::build_default_application_rules; use crate::modules::monitoring::kube_prometheus::crd::crd_grafana::{ Grafana, GrafanaDashboard, GrafanaDashboardSpec, GrafanaDatasource, GrafanaDatasourceConfig, - GrafanaDatasourceSpec, GrafanaSpec, + GrafanaDatasourceJsonData, GrafanaDatasourceSpec, GrafanaSpec, }; use crate::modules::monitoring::kube_prometheus::crd::crd_prometheus_rules::{ PrometheusRule, PrometheusRuleSpec, RuleGroup, @@ -466,10 +466,15 @@ impl K8sPrometheusCRDAlertingInterpret { match_labels: label.clone(), match_expressions: vec![], }; - let mut json_data = BTreeMap::new(); - json_data.insert("timeInterval".to_string(), "5s".to_string()); + // let mut json_data = BTreeMap::new(); + // json_data.insert("timeInterval".to_string(), "5s".to_string()); let namespace = self.sender.namespace.clone(); - + let json_data = GrafanaDatasourceJsonData { + time_interval: Some("5s".to_string()), + http_header_name1: None, + tls_skip_verify: Some(true), + oauth_pass_thru: Some(true), + }; let json = build_default_dashboard(&namespace); let graf_data_source = GrafanaDatasource { @@ -495,6 +500,9 @@ impl K8sPrometheusCRDAlertingInterpret { "http://prometheus-operated.{}.svc.cluster.local:9090", self.sender.namespace.clone() ), + secure_json_data: None, + is_default: None, + editable: None, }, }, };