Compare commits
9 Commits
49370af176
...
doc/worker
| Author | SHA1 | Date | |
|---|---|---|---|
| 1802b10ddf | |||
| dd3f07e5b7 | |||
| cbbaae2ac8 | |||
| c84b2413ed | |||
| f83fd09f11 | |||
| c15bd53331 | |||
| 6e6f57e38c | |||
| 6f55f79281 | |||
| 19f87fdaf7 |
69
README.md
69
README.md
@@ -36,48 +36,59 @@ These principles surface as simple, ergonomic Rust APIs that let teams focus on
|
||||
|
||||
## 2 · Quick Start
|
||||
|
||||
The snippet below spins up a complete **production-grade LAMP stack** with monitoring. Swap it for your own scores to deploy anything from microservices to machine-learning pipelines.
|
||||
The snippet below spins up a complete **production-grade Rust + Leptos Webapp** with monitoring. Swap it for your own scores to deploy anything from microservices to machine-learning pipelines.
|
||||
|
||||
```rust
|
||||
use harmony::{
|
||||
data::Version,
|
||||
inventory::Inventory,
|
||||
maestro::Maestro,
|
||||
modules::{
|
||||
lamp::{LAMPConfig, LAMPScore},
|
||||
monitoring::monitoring_alerting::MonitoringAlertingStackScore,
|
||||
application::{
|
||||
ApplicationScore, RustWebFramework, RustWebapp,
|
||||
features::{PackagingDeployment, rhob_monitoring::Monitoring},
|
||||
},
|
||||
monitoring::alert_channel::discord_alert_channel::DiscordWebhook,
|
||||
},
|
||||
topology::{K8sAnywhereTopology, Url},
|
||||
topology::K8sAnywhereTopology,
|
||||
};
|
||||
use harmony_macros::hurl;
|
||||
use std::{path::PathBuf, sync::Arc};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
// 1. Describe what you want
|
||||
let lamp_stack = LAMPScore {
|
||||
name: "harmony-lamp-demo".into(),
|
||||
domain: Url::Url(url::Url::parse("https://lampdemo.example.com").unwrap()),
|
||||
php_version: Version::from("8.3.0").unwrap(),
|
||||
config: LAMPConfig {
|
||||
project_root: "./php".into(),
|
||||
database_size: "4Gi".into(),
|
||||
..Default::default()
|
||||
},
|
||||
let application = Arc::new(RustWebapp {
|
||||
name: "harmony-example-leptos".to_string(),
|
||||
project_root: PathBuf::from(".."), // <== Your project root, usually .. if you use the standard `/harmony` folder
|
||||
framework: Some(RustWebFramework::Leptos),
|
||||
service_port: 8080,
|
||||
});
|
||||
|
||||
// Define your Application deployment and the features you want
|
||||
let app = ApplicationScore {
|
||||
features: vec![
|
||||
Box::new(PackagingDeployment {
|
||||
application: application.clone(),
|
||||
}),
|
||||
Box::new(Monitoring {
|
||||
application: application.clone(),
|
||||
alert_receiver: vec![
|
||||
Box::new(DiscordWebhook {
|
||||
name: "test-discord".to_string(),
|
||||
url: hurl!("https://discord.doesnt.exist.com"), // <== Get your discord webhook url
|
||||
}),
|
||||
],
|
||||
}),
|
||||
],
|
||||
application,
|
||||
};
|
||||
|
||||
// 2. Enhance with extra scores (monitoring, CI/CD, …)
|
||||
let mut monitoring = MonitoringAlertingStackScore::new();
|
||||
monitoring.namespace = Some(lamp_stack.config.namespace.clone());
|
||||
|
||||
// 3. Run your scores on the desired topology & inventory
|
||||
harmony_cli::run(
|
||||
Inventory::autoload(), // auto-detect hardware / kube-config
|
||||
K8sAnywhereTopology::from_env(), // local k3d, CI, staging, prod…
|
||||
vec![
|
||||
Box::new(lamp_stack),
|
||||
Box::new(monitoring)
|
||||
],
|
||||
None
|
||||
).await.unwrap();
|
||||
Inventory::autoload(),
|
||||
K8sAnywhereTopology::from_env(), // <== Deploy to local automatically provisioned local k3d by default or connect to any kubernetes cluster
|
||||
vec![Box::new(app)],
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 8.3 KiB |
File diff suppressed because one or more lines are too long
@@ -225,3 +225,17 @@ Demo time
|
||||
---
|
||||
|
||||
<img src="./Happy_swimmer.jpg" width="300"/>
|
||||
|
||||
---
|
||||
|
||||
# 🎼
|
||||
|
||||
Harmony : [https://git.nationtech.io/nationtech/harmony](https://git.nationtech.io/nationtech/harmony)
|
||||
|
||||
|
||||
<img src="./qrcode_gitea_nationtech.png" width="120"/>
|
||||
|
||||
|
||||
LinkedIn : [https://www.linkedin.com/in/jean-gabriel-gill-couture/](https://www.linkedin.com/in/jean-gabriel-gill-couture/)
|
||||
|
||||
Courriel : [jg@nationtech.io](mailto:jg@nationtech.io)
|
||||
|
||||
56
docs/doc-remove-worker-flag.md
Normal file
56
docs/doc-remove-worker-flag.md
Normal file
@@ -0,0 +1,56 @@
|
||||
## **Remove Worker flag from OKD Control Planes**
|
||||
|
||||
### **Context**
|
||||
On OKD user provisioned infrastructure the control plane nodes can have the flag node-role.kubernetes.io/worker which allows non critical workloads to be scheduled on the control-planes
|
||||
|
||||
### **Observed Symptoms**
|
||||
- After adding HAProxy servers to the backend each back end appears down
|
||||
- Traffic is redirected to the control planes instead of workers
|
||||
- The pods router-default are incorrectly applied on the control planes rather than on the workers
|
||||
- Pods are being scheduled on the control planes causing cluster instability
|
||||
|
||||
```
|
||||
ss -tlnp | grep 80
|
||||
```
|
||||
- shows process haproxy is listening at 0.0.0.0:80 on cps
|
||||
- same problem for port 443
|
||||
- In namespace rook-ceph certain pods are deploted on cps rather than on worker nodes
|
||||
|
||||
### **Cause**
|
||||
- when intalling UPI, the roles (master, worker) are not managed by the Machine Config operator and the cps are made schedulable by default.
|
||||
|
||||
### **Diagnostic**
|
||||
check node labels:
|
||||
```
|
||||
oc get nodes --show-labels | grep control-plane
|
||||
```
|
||||
Inspecter kubelet configuration:
|
||||
|
||||
```
|
||||
cat /etc/systemd/system/kubelet.service
|
||||
```
|
||||
|
||||
find the line:
|
||||
```
|
||||
--node-labels=node-role.kubernetes.io/control-plane,node-role.kubernetes.io/master,node-role.kubernetes.io/worker
|
||||
```
|
||||
→ presence of label worker confirms the problem.
|
||||
|
||||
Verify the flag doesnt come from MCO
|
||||
```
|
||||
oc get machineconfig | grep rendered-master
|
||||
```
|
||||
|
||||
**Solution:**
|
||||
To make the control planes non schedulable you must patch the cluster scheduler resource
|
||||
|
||||
```
|
||||
oc patch scheduler cluster --type merge -p '{"spec":{"mastersSchedulable":false}}'
|
||||
```
|
||||
after the patch is applied the workloads can be deplaced by draining the nodes
|
||||
|
||||
```
|
||||
oc adm cordon <cp-node>
|
||||
oc adm drain <cp-node> --ignore-daemonsets –delete-emptydir-data
|
||||
```
|
||||
|
||||
@@ -16,16 +16,13 @@ use std::{path::PathBuf, sync::Arc};
|
||||
async fn main() {
|
||||
let application = Arc::new(RustWebapp {
|
||||
name: "harmony-example-tryrust".to_string(),
|
||||
project_root: PathBuf::from("./tryrust.org"),
|
||||
project_root: PathBuf::from("./tryrust.org"), // <== Project root, in this case it is a
|
||||
// submodule
|
||||
framework: Some(RustWebFramework::Leptos),
|
||||
service_port: 8080,
|
||||
});
|
||||
|
||||
let discord_receiver = DiscordWebhook {
|
||||
name: "test-discord".to_string(),
|
||||
url: hurl!("https://discord.doesnt.exist.com"),
|
||||
};
|
||||
|
||||
// Define your Application deployment and the features you want
|
||||
let app = ApplicationScore {
|
||||
features: vec![
|
||||
Box::new(PackagingDeployment {
|
||||
@@ -33,7 +30,10 @@ async fn main() {
|
||||
}),
|
||||
Box::new(Monitoring {
|
||||
application: application.clone(),
|
||||
alert_receiver: vec![Box::new(discord_receiver)],
|
||||
alert_receiver: vec![Box::new(DiscordWebhook {
|
||||
name: "test-discord".to_string(),
|
||||
url: hurl!("https://discord.doesnt.exist.com"),
|
||||
})],
|
||||
}),
|
||||
],
|
||||
application,
|
||||
@@ -41,7 +41,7 @@ async fn main() {
|
||||
|
||||
harmony_cli::run(
|
||||
Inventory::autoload(),
|
||||
K8sAnywhereTopology::from_env(),
|
||||
K8sAnywhereTopology::from_env(), // <== Deploy to local automatically provisioned k3d by default or connect to any kubernetes cluster
|
||||
vec![Box::new(app)],
|
||||
None,
|
||||
)
|
||||
|
||||
@@ -1,13 +1,19 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use derive_new::new;
|
||||
use k8s_openapi::{
|
||||
ClusterResourceScope, NamespaceResourceScope,
|
||||
api::{apps::v1::Deployment, core::v1::Pod},
|
||||
api::{
|
||||
apps::v1::Deployment,
|
||||
core::v1::{Pod, PodStatus},
|
||||
},
|
||||
};
|
||||
use kube::{
|
||||
Client, Config, Error, Resource,
|
||||
api::{Api, AttachParams, DeleteParams, ListParams, Patch, PatchParams, ResourceExt},
|
||||
config::{KubeConfigOptions, Kubeconfig},
|
||||
core::ErrorResponse,
|
||||
error::DiscoveryError,
|
||||
runtime::reflector::Lookup,
|
||||
};
|
||||
use kube::{api::DynamicObject, runtime::conditions};
|
||||
@@ -19,7 +25,7 @@ use log::{debug, error, trace};
|
||||
use serde::{Serialize, de::DeserializeOwned};
|
||||
use serde_json::{Value, json};
|
||||
use similar::TextDiff;
|
||||
use tokio::io::AsyncReadExt;
|
||||
use tokio::{io::AsyncReadExt, time::sleep};
|
||||
|
||||
#[derive(new, Clone)]
|
||||
pub struct K8sClient {
|
||||
@@ -153,6 +159,41 @@ impl K8sClient {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn wait_for_pod_ready(
|
||||
&self,
|
||||
pod_name: &str,
|
||||
namespace: Option<&str>,
|
||||
) -> Result<(), Error> {
|
||||
let mut elapsed = 0;
|
||||
let interval = 5; // seconds between checks
|
||||
let timeout_secs = 120;
|
||||
loop {
|
||||
let pod = self.get_pod(pod_name, namespace).await?;
|
||||
|
||||
if let Some(p) = pod {
|
||||
if let Some(status) = p.status {
|
||||
if let Some(phase) = status.phase {
|
||||
if phase.to_lowercase() == "running" {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if elapsed >= timeout_secs {
|
||||
return Err(Error::Discovery(DiscoveryError::MissingResource(format!(
|
||||
"'{}' in ns '{}' did not become ready within {}s",
|
||||
pod_name,
|
||||
namespace.unwrap(),
|
||||
timeout_secs
|
||||
))));
|
||||
}
|
||||
|
||||
sleep(Duration::from_secs(interval)).await;
|
||||
elapsed += interval;
|
||||
}
|
||||
}
|
||||
|
||||
/// Will execute a commond in the first pod found that matches the specified label
|
||||
/// '{label}={name}'
|
||||
pub async fn exec_app_capture_output(
|
||||
@@ -419,9 +460,12 @@ impl K8sClient {
|
||||
.as_str()
|
||||
.expect("couldn't get kind as str");
|
||||
|
||||
let split: Vec<&str> = api_version.splitn(2, "/").collect();
|
||||
let g = split[0];
|
||||
let v = split[1];
|
||||
let mut it = api_version.splitn(2, '/');
|
||||
let first = it.next().unwrap();
|
||||
let (g, v) = match it.next() {
|
||||
Some(second) => (first, second),
|
||||
None => ("", first),
|
||||
};
|
||||
|
||||
let gvk = GroupVersionKind::gvk(g, v, kind);
|
||||
let api_resource = ApiResource::from_gvk(&gvk);
|
||||
|
||||
@@ -55,7 +55,8 @@ impl<T: Topology + K8sclient + HelmCommand + Ingress> Interpret<T> for ArgoInter
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let k8s_client = topology.k8s_client().await?;
|
||||
let domain = topology.get_domain("argo").await?;
|
||||
let svc = format!("argo-{}", self.score.namespace.clone());
|
||||
let domain = topology.get_domain(&svc).await?;
|
||||
let helm_score =
|
||||
argo_helm_chart_score(&self.score.namespace, self.score.openshift, &domain);
|
||||
|
||||
@@ -66,14 +67,17 @@ impl<T: Topology + K8sclient + HelmCommand + Ingress> Interpret<T> for ArgoInter
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
Ok(Outcome::success(format!(
|
||||
"ArgoCD installed with {} {}",
|
||||
self.argo_apps.len(),
|
||||
match self.argo_apps.len() {
|
||||
1 => "application",
|
||||
_ => "applications",
|
||||
}
|
||||
)))
|
||||
Ok(Outcome::success_with_details(
|
||||
format!(
|
||||
"ArgoCD {} {}",
|
||||
self.argo_apps.len(),
|
||||
match self.argo_apps.len() {
|
||||
1 => "application",
|
||||
_ => "applications",
|
||||
}
|
||||
),
|
||||
vec![format!("argo application: http://{}", domain)],
|
||||
))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
@@ -156,6 +160,9 @@ global:
|
||||
## Used for ingresses, certificates, SSO, notifications, etc.
|
||||
domain: {domain}
|
||||
|
||||
securityContext:
|
||||
runAsUser: null
|
||||
|
||||
# -- Runtime class name for all components
|
||||
runtimeClassName: ""
|
||||
|
||||
@@ -467,6 +474,13 @@ redis:
|
||||
# -- Redis name
|
||||
name: redis
|
||||
|
||||
serviceAccount:
|
||||
create: true
|
||||
|
||||
securityContext:
|
||||
runAsUser: null
|
||||
|
||||
|
||||
## Redis image
|
||||
image:
|
||||
# -- Redis repository
|
||||
|
||||
@@ -141,7 +141,10 @@ impl<T: Topology + K8sclient> Interpret<T> for K8sIngressInterpret {
|
||||
InterpretStatus::SUCCESS => {
|
||||
let details = match &self.namespace {
|
||||
Some(namespace) => {
|
||||
vec![format!("{} ({namespace}): {}", self.service, self.host)]
|
||||
vec![format!(
|
||||
"{} ({namespace}): http://{}",
|
||||
self.service, self.host
|
||||
)]
|
||||
}
|
||||
None => vec![format!("{}: {}", self.service, self.host)],
|
||||
};
|
||||
|
||||
@@ -4,4 +4,5 @@ pub mod application_monitoring;
|
||||
pub mod grafana;
|
||||
pub mod kube_prometheus;
|
||||
pub mod ntfy;
|
||||
pub mod okd;
|
||||
pub mod prometheus;
|
||||
|
||||
149
harmony/src/modules/monitoring/okd/enable_user_workload.rs
Normal file
149
harmony/src/modules/monitoring/okd/enable_user_workload.rs
Normal file
@@ -0,0 +1,149 @@
|
||||
use std::{collections::BTreeMap, sync::Arc};
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, k8s::K8sClient},
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use harmony_types::id::Id;
|
||||
use k8s_openapi::api::core::v1::ConfigMap;
|
||||
use kube::api::ObjectMeta;
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct OpenshiftUserWorkloadMonitoring {}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for OpenshiftUserWorkloadMonitoring {
|
||||
fn name(&self) -> String {
|
||||
"OpenshiftUserWorkloadMonitoringScore".to_string()
|
||||
}
|
||||
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
Box::new(OpenshiftUserWorkloadMonitoringInterpret {})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct OpenshiftUserWorkloadMonitoringInterpret {}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient> Interpret<T> for OpenshiftUserWorkloadMonitoringInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
_inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let client = topology.k8s_client().await.unwrap();
|
||||
self.update_cluster_monitoring_config_cm(&client).await?;
|
||||
self.update_user_workload_monitoring_config_cm(&client)
|
||||
.await?;
|
||||
self.verify_user_workload(&client).await?;
|
||||
Ok(Outcome::success(
|
||||
"successfully enabled user-workload-monitoring".to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
InterpretName::Custom("OpenshiftUserWorkloadMonitoring")
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl OpenshiftUserWorkloadMonitoringInterpret {
|
||||
pub async fn update_cluster_monitoring_config_cm(
|
||||
&self,
|
||||
client: &Arc<K8sClient>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let mut data = BTreeMap::new();
|
||||
data.insert(
|
||||
"config.yaml".to_string(),
|
||||
r#"
|
||||
enableUserWorkload: true
|
||||
alertmanagerMain:
|
||||
enableUserAlertmanagerConfig: true
|
||||
"#
|
||||
.to_string(),
|
||||
);
|
||||
|
||||
let cm = ConfigMap {
|
||||
metadata: ObjectMeta {
|
||||
name: Some("cluster-monitoring-config".to_string()),
|
||||
namespace: Some("openshift-monitoring".to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
data: Some(data),
|
||||
..Default::default()
|
||||
};
|
||||
client.apply(&cm, Some("openshift-monitoring")).await?;
|
||||
|
||||
Ok(Outcome::success(
|
||||
"updated cluster-monitoring-config-map".to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
pub async fn update_user_workload_monitoring_config_cm(
|
||||
&self,
|
||||
client: &Arc<K8sClient>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let mut data = BTreeMap::new();
|
||||
data.insert(
|
||||
"config.yaml".to_string(),
|
||||
r#"
|
||||
alertmanager:
|
||||
enabled: true
|
||||
enableAlertmanagerConfig: true
|
||||
"#
|
||||
.to_string(),
|
||||
);
|
||||
let cm = ConfigMap {
|
||||
metadata: ObjectMeta {
|
||||
name: Some("user-workload-monitoring-config".to_string()),
|
||||
namespace: Some("openshift-user-workload-monitoring".to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
data: Some(data),
|
||||
..Default::default()
|
||||
};
|
||||
client
|
||||
.apply(&cm, Some("openshift-user-workload-monitoring"))
|
||||
.await?;
|
||||
|
||||
Ok(Outcome::success(
|
||||
"updated openshift-user-monitoring-config-map".to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
pub async fn verify_user_workload(
|
||||
&self,
|
||||
client: &Arc<K8sClient>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let namespace = "openshift-user-workload-monitoring";
|
||||
let alertmanager_name = "alertmanager-user-workload-0";
|
||||
let prometheus_name = "prometheus-user-workload-0";
|
||||
client
|
||||
.wait_for_pod_ready(alertmanager_name, Some(namespace))
|
||||
.await?;
|
||||
client
|
||||
.wait_for_pod_ready(prometheus_name, Some(namespace))
|
||||
.await?;
|
||||
|
||||
Ok(Outcome::success(format!(
|
||||
"pods: {}, {} ready in ns: {}",
|
||||
alertmanager_name, prometheus_name, namespace
|
||||
)))
|
||||
}
|
||||
}
|
||||
1
harmony/src/modules/monitoring/okd/mod.rs
Normal file
1
harmony/src/modules/monitoring/okd/mod.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod enable_user_workload;
|
||||
@@ -12,9 +12,6 @@ use std::process::Command;
|
||||
use crate::modules::k8s::ingress::{K8sIngressScore, PathType};
|
||||
use crate::modules::monitoring::kube_prometheus::crd::grafana_default_dashboard::build_default_dashboard;
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanager_config::RHOBObservability;
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_alertmanagers::{
|
||||
Alertmanager, AlertmanagerSpec,
|
||||
};
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_grafana::{
|
||||
Grafana, GrafanaDashboard, GrafanaDashboardSpec, GrafanaDatasource, GrafanaDatasourceConfig,
|
||||
GrafanaDatasourceSpec, GrafanaSpec,
|
||||
@@ -25,13 +22,8 @@ use crate::modules::monitoring::kube_prometheus::crd::rhob_monitoring_stack::{
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_prometheus_rules::{
|
||||
PrometheusRule, PrometheusRuleSpec, RuleGroup,
|
||||
};
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_prometheuses::{
|
||||
AlertmanagerEndpoints, LabelSelector, PrometheusSpec, PrometheusSpecAlerting,
|
||||
};
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_prometheuses::LabelSelector;
|
||||
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_role::{
|
||||
build_prom_role, build_prom_rolebinding, build_prom_service_account,
|
||||
};
|
||||
use crate::modules::monitoring::kube_prometheus::crd::rhob_service_monitor::{
|
||||
ServiceMonitor, ServiceMonitorSpec,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user