forked from NationTech/harmony
Compare commits
1 Commits
doc/worker
...
feat/gen-c
| Author | SHA1 | Date | |
|---|---|---|---|
| de65f68739 |
@@ -1,56 +0,0 @@
|
|||||||
## **Remove Worker flag from OKD Control Planes**
|
|
||||||
|
|
||||||
### **Context**
|
|
||||||
On OKD user provisioned infrastructure the control plane nodes can have the flag node-role.kubernetes.io/worker which allows non critical workloads to be scheduled on the control-planes
|
|
||||||
|
|
||||||
### **Observed Symptoms**
|
|
||||||
- After adding HAProxy servers to the backend each back end appears down
|
|
||||||
- Traffic is redirected to the control planes instead of workers
|
|
||||||
- The pods router-default are incorrectly applied on the control planes rather than on the workers
|
|
||||||
- Pods are being scheduled on the control planes causing cluster instability
|
|
||||||
|
|
||||||
```
|
|
||||||
ss -tlnp | grep 80
|
|
||||||
```
|
|
||||||
- shows process haproxy is listening at 0.0.0.0:80 on cps
|
|
||||||
- same problem for port 443
|
|
||||||
- In namespace rook-ceph certain pods are deploted on cps rather than on worker nodes
|
|
||||||
|
|
||||||
### **Cause**
|
|
||||||
- when intalling UPI, the roles (master, worker) are not managed by the Machine Config operator and the cps are made schedulable by default.
|
|
||||||
|
|
||||||
### **Diagnostic**
|
|
||||||
check node labels:
|
|
||||||
```
|
|
||||||
oc get nodes --show-labels | grep control-plane
|
|
||||||
```
|
|
||||||
Inspecter kubelet configuration:
|
|
||||||
|
|
||||||
```
|
|
||||||
cat /etc/systemd/system/kubelet.service
|
|
||||||
```
|
|
||||||
|
|
||||||
find the line:
|
|
||||||
```
|
|
||||||
--node-labels=node-role.kubernetes.io/control-plane,node-role.kubernetes.io/master,node-role.kubernetes.io/worker
|
|
||||||
```
|
|
||||||
→ presence of label worker confirms the problem.
|
|
||||||
|
|
||||||
Verify the flag doesnt come from MCO
|
|
||||||
```
|
|
||||||
oc get machineconfig | grep rendered-master
|
|
||||||
```
|
|
||||||
|
|
||||||
**Solution:**
|
|
||||||
To make the control planes non schedulable you must patch the cluster scheduler resource
|
|
||||||
|
|
||||||
```
|
|
||||||
oc patch scheduler cluster --type merge -p '{"spec":{"mastersSchedulable":false}}'
|
|
||||||
```
|
|
||||||
after the patch is applied the workloads can be deplaced by draining the nodes
|
|
||||||
|
|
||||||
```
|
|
||||||
oc adm cordon <cp-node>
|
|
||||||
oc adm drain <cp-node> --ignore-daemonsets –delete-emptydir-data
|
|
||||||
```
|
|
||||||
|
|
||||||
@@ -194,3 +194,11 @@ impl From<String> for InterpretError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<serde_yaml::Error> for InterpretError {
|
||||||
|
fn from(value: serde_yaml::Error) -> Self {
|
||||||
|
Self {
|
||||||
|
msg: format!("InterpretError : {value}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,19 +1,13 @@
|
|||||||
use std::time::Duration;
|
|
||||||
|
|
||||||
use derive_new::new;
|
use derive_new::new;
|
||||||
use k8s_openapi::{
|
use k8s_openapi::{
|
||||||
ClusterResourceScope, NamespaceResourceScope,
|
ClusterResourceScope, NamespaceResourceScope,
|
||||||
api::{
|
api::{apps::v1::Deployment, core::v1::Pod},
|
||||||
apps::v1::Deployment,
|
|
||||||
core::v1::{Pod, PodStatus},
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
use kube::{
|
use kube::{
|
||||||
Client, Config, Error, Resource,
|
Client, Config, Error, Resource,
|
||||||
api::{Api, AttachParams, DeleteParams, ListParams, Patch, PatchParams, ResourceExt},
|
api::{Api, AttachParams, DeleteParams, ListParams, Patch, PatchParams, ResourceExt},
|
||||||
config::{KubeConfigOptions, Kubeconfig},
|
config::{KubeConfigOptions, Kubeconfig},
|
||||||
core::ErrorResponse,
|
core::ErrorResponse,
|
||||||
error::DiscoveryError,
|
|
||||||
runtime::reflector::Lookup,
|
runtime::reflector::Lookup,
|
||||||
};
|
};
|
||||||
use kube::{api::DynamicObject, runtime::conditions};
|
use kube::{api::DynamicObject, runtime::conditions};
|
||||||
@@ -25,7 +19,7 @@ use log::{debug, error, trace};
|
|||||||
use serde::{Serialize, de::DeserializeOwned};
|
use serde::{Serialize, de::DeserializeOwned};
|
||||||
use serde_json::{Value, json};
|
use serde_json::{Value, json};
|
||||||
use similar::TextDiff;
|
use similar::TextDiff;
|
||||||
use tokio::{io::AsyncReadExt, time::sleep};
|
use tokio::io::AsyncReadExt;
|
||||||
|
|
||||||
#[derive(new, Clone)]
|
#[derive(new, Clone)]
|
||||||
pub struct K8sClient {
|
pub struct K8sClient {
|
||||||
@@ -159,41 +153,6 @@ impl K8sClient {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn wait_for_pod_ready(
|
|
||||||
&self,
|
|
||||||
pod_name: &str,
|
|
||||||
namespace: Option<&str>,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
let mut elapsed = 0;
|
|
||||||
let interval = 5; // seconds between checks
|
|
||||||
let timeout_secs = 120;
|
|
||||||
loop {
|
|
||||||
let pod = self.get_pod(pod_name, namespace).await?;
|
|
||||||
|
|
||||||
if let Some(p) = pod {
|
|
||||||
if let Some(status) = p.status {
|
|
||||||
if let Some(phase) = status.phase {
|
|
||||||
if phase.to_lowercase() == "running" {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if elapsed >= timeout_secs {
|
|
||||||
return Err(Error::Discovery(DiscoveryError::MissingResource(format!(
|
|
||||||
"'{}' in ns '{}' did not become ready within {}s",
|
|
||||||
pod_name,
|
|
||||||
namespace.unwrap(),
|
|
||||||
timeout_secs
|
|
||||||
))));
|
|
||||||
}
|
|
||||||
|
|
||||||
sleep(Duration::from_secs(interval)).await;
|
|
||||||
elapsed += interval;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Will execute a commond in the first pod found that matches the specified label
|
/// Will execute a commond in the first pod found that matches the specified label
|
||||||
/// '{label}={name}'
|
/// '{label}={name}'
|
||||||
pub async fn exec_app_capture_output(
|
pub async fn exec_app_capture_output(
|
||||||
@@ -460,12 +419,9 @@ impl K8sClient {
|
|||||||
.as_str()
|
.as_str()
|
||||||
.expect("couldn't get kind as str");
|
.expect("couldn't get kind as str");
|
||||||
|
|
||||||
let mut it = api_version.splitn(2, '/');
|
let split: Vec<&str> = api_version.splitn(2, "/").collect();
|
||||||
let first = it.next().unwrap();
|
let g = split[0];
|
||||||
let (g, v) = match it.next() {
|
let v = split[1];
|
||||||
Some(second) => (first, second),
|
|
||||||
None => ("", first),
|
|
||||||
};
|
|
||||||
|
|
||||||
let gvk = GroupVersionKind::gvk(g, v, kind);
|
let gvk = GroupVersionKind::gvk(g, v, kind);
|
||||||
let api_resource = ApiResource::from_gvk(&gvk);
|
let api_resource = ApiResource::from_gvk(&gvk);
|
||||||
|
|||||||
106
harmony/src/modules/cert_manager/gen_ca_cert.rs
Normal file
106
harmony/src/modules/cert_manager/gen_ca_cert.rs
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use harmony_types::id::Id;
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
data::Version,
|
||||||
|
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||||
|
inventory::Inventory,
|
||||||
|
score::Score,
|
||||||
|
topology::{K8sclient, Topology, k8s::K8sClient},
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Debug)]
|
||||||
|
pub struct GenerateCaCertScore {
|
||||||
|
cluster_issuer_name: String,
|
||||||
|
dns_names: String,
|
||||||
|
operator_namespace: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Topology + K8sclient> Score<T> for GenerateCaCertScore {
|
||||||
|
fn name(&self) -> String {
|
||||||
|
"GenerateCaCertScore".to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||||
|
Box::new(GenerateCaCertIntepret {
|
||||||
|
score: self.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Debug)]
|
||||||
|
pub struct GenerateCaCertIntepret {
|
||||||
|
score: GenerateCaCertScore,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl<T: Topology + K8sclient> Interpret<T> for GenerateCaCertIntepret {
|
||||||
|
async fn execute(
|
||||||
|
&self,
|
||||||
|
_inventory: &Inventory,
|
||||||
|
topology: &T,
|
||||||
|
) -> Result<Outcome, InterpretError> {
|
||||||
|
let client = topology.k8s_client().await.unwrap();
|
||||||
|
let cert_yaml = self
|
||||||
|
.build_cert_request_yaml(&self.score.cluster_issuer_name, &self.score.dns_names)
|
||||||
|
.unwrap();
|
||||||
|
self.apply_cert_request(&client, cert_yaml, &self.score.operator_namespace)
|
||||||
|
.await?;
|
||||||
|
Ok(Outcome::success("created ca cert".to_string()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_name(&self) -> InterpretName {
|
||||||
|
InterpretName::Custom("GenerateCaCertInterpret")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_version(&self) -> Version {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_status(&self) -> InterpretStatus {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_children(&self) -> Vec<Id> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GenerateCaCertIntepret {
|
||||||
|
pub fn build_cert_request_yaml(
|
||||||
|
&self,
|
||||||
|
cluster_issuer_name: &str,
|
||||||
|
dns_names: &str,
|
||||||
|
) -> Result<serde_yaml::Value, InterpretError> {
|
||||||
|
let cert_yaml = format!(
|
||||||
|
r#"
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: ingress-cert
|
||||||
|
namespace: openshift-ingress
|
||||||
|
spec:
|
||||||
|
secretName: ingress-cert-tls
|
||||||
|
issuerRef:
|
||||||
|
name: {cluster_issuer_name}
|
||||||
|
kind: ClusterIssuer
|
||||||
|
dnsNames:
|
||||||
|
- "*.{dns_names}"
|
||||||
|
"#
|
||||||
|
);
|
||||||
|
Ok(serde_yaml::to_value(cert_yaml)?)
|
||||||
|
}
|
||||||
|
pub async fn apply_cert_request(
|
||||||
|
&self,
|
||||||
|
client: &Arc<K8sClient>,
|
||||||
|
cert_yaml: serde_yaml::Value,
|
||||||
|
operator_namespace: &str,
|
||||||
|
) -> Result<(), InterpretError> {
|
||||||
|
Ok(client
|
||||||
|
.apply_yaml(&cert_yaml, Some(operator_namespace))
|
||||||
|
.await?)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,2 +1,3 @@
|
|||||||
|
mod gen_ca_cert;
|
||||||
mod helm;
|
mod helm;
|
||||||
pub use helm::*;
|
pub use helm::*;
|
||||||
|
|||||||
@@ -4,5 +4,4 @@ pub mod application_monitoring;
|
|||||||
pub mod grafana;
|
pub mod grafana;
|
||||||
pub mod kube_prometheus;
|
pub mod kube_prometheus;
|
||||||
pub mod ntfy;
|
pub mod ntfy;
|
||||||
pub mod okd;
|
|
||||||
pub mod prometheus;
|
pub mod prometheus;
|
||||||
|
|||||||
@@ -1,149 +0,0 @@
|
|||||||
use std::{collections::BTreeMap, sync::Arc};
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
data::Version,
|
|
||||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
|
||||||
inventory::Inventory,
|
|
||||||
score::Score,
|
|
||||||
topology::{K8sclient, Topology, k8s::K8sClient},
|
|
||||||
};
|
|
||||||
use async_trait::async_trait;
|
|
||||||
use harmony_types::id::Id;
|
|
||||||
use k8s_openapi::api::core::v1::ConfigMap;
|
|
||||||
use kube::api::ObjectMeta;
|
|
||||||
use serde::Serialize;
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize)]
|
|
||||||
pub struct OpenshiftUserWorkloadMonitoring {}
|
|
||||||
|
|
||||||
impl<T: Topology + K8sclient> Score<T> for OpenshiftUserWorkloadMonitoring {
|
|
||||||
fn name(&self) -> String {
|
|
||||||
"OpenshiftUserWorkloadMonitoringScore".to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
|
||||||
Box::new(OpenshiftUserWorkloadMonitoringInterpret {})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize)]
|
|
||||||
pub struct OpenshiftUserWorkloadMonitoringInterpret {}
|
|
||||||
|
|
||||||
#[async_trait]
|
|
||||||
impl<T: Topology + K8sclient> Interpret<T> for OpenshiftUserWorkloadMonitoringInterpret {
|
|
||||||
async fn execute(
|
|
||||||
&self,
|
|
||||||
_inventory: &Inventory,
|
|
||||||
topology: &T,
|
|
||||||
) -> Result<Outcome, InterpretError> {
|
|
||||||
let client = topology.k8s_client().await.unwrap();
|
|
||||||
self.update_cluster_monitoring_config_cm(&client).await?;
|
|
||||||
self.update_user_workload_monitoring_config_cm(&client)
|
|
||||||
.await?;
|
|
||||||
self.verify_user_workload(&client).await?;
|
|
||||||
Ok(Outcome::success(
|
|
||||||
"successfully enabled user-workload-monitoring".to_string(),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_name(&self) -> InterpretName {
|
|
||||||
InterpretName::Custom("OpenshiftUserWorkloadMonitoring")
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_version(&self) -> Version {
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_status(&self) -> InterpretStatus {
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_children(&self) -> Vec<Id> {
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl OpenshiftUserWorkloadMonitoringInterpret {
|
|
||||||
pub async fn update_cluster_monitoring_config_cm(
|
|
||||||
&self,
|
|
||||||
client: &Arc<K8sClient>,
|
|
||||||
) -> Result<Outcome, InterpretError> {
|
|
||||||
let mut data = BTreeMap::new();
|
|
||||||
data.insert(
|
|
||||||
"config.yaml".to_string(),
|
|
||||||
r#"
|
|
||||||
enableUserWorkload: true
|
|
||||||
alertmanagerMain:
|
|
||||||
enableUserAlertmanagerConfig: true
|
|
||||||
"#
|
|
||||||
.to_string(),
|
|
||||||
);
|
|
||||||
|
|
||||||
let cm = ConfigMap {
|
|
||||||
metadata: ObjectMeta {
|
|
||||||
name: Some("cluster-monitoring-config".to_string()),
|
|
||||||
namespace: Some("openshift-monitoring".to_string()),
|
|
||||||
..Default::default()
|
|
||||||
},
|
|
||||||
data: Some(data),
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
client.apply(&cm, Some("openshift-monitoring")).await?;
|
|
||||||
|
|
||||||
Ok(Outcome::success(
|
|
||||||
"updated cluster-monitoring-config-map".to_string(),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn update_user_workload_monitoring_config_cm(
|
|
||||||
&self,
|
|
||||||
client: &Arc<K8sClient>,
|
|
||||||
) -> Result<Outcome, InterpretError> {
|
|
||||||
let mut data = BTreeMap::new();
|
|
||||||
data.insert(
|
|
||||||
"config.yaml".to_string(),
|
|
||||||
r#"
|
|
||||||
alertmanager:
|
|
||||||
enabled: true
|
|
||||||
enableAlertmanagerConfig: true
|
|
||||||
"#
|
|
||||||
.to_string(),
|
|
||||||
);
|
|
||||||
let cm = ConfigMap {
|
|
||||||
metadata: ObjectMeta {
|
|
||||||
name: Some("user-workload-monitoring-config".to_string()),
|
|
||||||
namespace: Some("openshift-user-workload-monitoring".to_string()),
|
|
||||||
..Default::default()
|
|
||||||
},
|
|
||||||
data: Some(data),
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
client
|
|
||||||
.apply(&cm, Some("openshift-user-workload-monitoring"))
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
Ok(Outcome::success(
|
|
||||||
"updated openshift-user-monitoring-config-map".to_string(),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn verify_user_workload(
|
|
||||||
&self,
|
|
||||||
client: &Arc<K8sClient>,
|
|
||||||
) -> Result<Outcome, InterpretError> {
|
|
||||||
let namespace = "openshift-user-workload-monitoring";
|
|
||||||
let alertmanager_name = "alertmanager-user-workload-0";
|
|
||||||
let prometheus_name = "prometheus-user-workload-0";
|
|
||||||
client
|
|
||||||
.wait_for_pod_ready(alertmanager_name, Some(namespace))
|
|
||||||
.await?;
|
|
||||||
client
|
|
||||||
.wait_for_pod_ready(prometheus_name, Some(namespace))
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
Ok(Outcome::success(format!(
|
|
||||||
"pods: {}, {} ready in ns: {}",
|
|
||||||
alertmanager_name, prometheus_name, namespace
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
pub mod enable_user_workload;
|
|
||||||
Reference in New Issue
Block a user