Compare commits

..

4 Commits

3 changed files with 55 additions and 61 deletions

View File

@@ -1,56 +0,0 @@
## **Remove Worker flag from OKD Control Planes**
### **Context**
On OKD user provisioned infrastructure the control plane nodes can have the flag node-role.kubernetes.io/worker which allows non critical workloads to be scheduled on the control-planes
### **Observed Symptoms**
- After adding HAProxy servers to the backend each back end appears down
- Traffic is redirected to the control planes instead of workers
- The pods router-default are incorrectly applied on the control planes rather than on the workers
- Pods are being scheduled on the control planes causing cluster instability
```
ss -tlnp | grep 80
```
- shows process haproxy is listening at 0.0.0.0:80 on cps
- same problem for port 443
- In namespace rook-ceph certain pods are deploted on cps rather than on worker nodes
### **Cause**
- when intalling UPI, the roles (master, worker) are not managed by the Machine Config operator and the cps are made schedulable by default.
### **Diagnostic**
check node labels:
```
oc get nodes --show-labels | grep control-plane
```
Inspecter kubelet configuration:
```
cat /etc/systemd/system/kubelet.service
```
find the line:
```
--node-labels=node-role.kubernetes.io/control-plane,node-role.kubernetes.io/master,node-role.kubernetes.io/worker
```
→ presence of label worker confirms the problem.
Verify the flag doesnt come from MCO
```
oc get machineconfig | grep rendered-master
```
**Solution:**
To make the control planes non schedulable you must patch the cluster scheduler resource
```
oc patch scheduler cluster --type merge -p '{"spec":{"mastersSchedulable":false}}'
```
after the patch is applied the workloads can be deplaced by draining the nodes
```
oc adm cordon <cp-node>
oc adm drain <cp-node> --ignore-daemonsets delete-emptydir-data
```

View File

@@ -3,13 +3,11 @@ use std::time::Duration;
use derive_new::new; use derive_new::new;
use k8s_openapi::{ use k8s_openapi::{
ClusterResourceScope, NamespaceResourceScope, ClusterResourceScope, NamespaceResourceScope,
api::{ api::{apps::v1::Deployment, core::v1::Pod},
apps::v1::Deployment, apimachinery::pkg::version::Info,
core::v1::{Pod, PodStatus},
},
}; };
use kube::{ use kube::{
Client, Config, Error, Resource, Client, Config, Discovery, Error, Resource,
api::{Api, AttachParams, DeleteParams, ListParams, Patch, PatchParams, ResourceExt}, api::{Api, AttachParams, DeleteParams, ListParams, Patch, PatchParams, ResourceExt},
config::{KubeConfigOptions, Kubeconfig}, config::{KubeConfigOptions, Kubeconfig},
core::ErrorResponse, core::ErrorResponse,
@@ -59,6 +57,17 @@ impl K8sClient {
}) })
} }
pub async fn get_apiserver_version(&self) -> Result<Info, Error> {
let client: Client = self.client.clone();
let version_info: Info = client.apiserver_version().await?;
Ok(version_info)
}
pub async fn discovery(&self) -> Result<Discovery, Error> {
let discovery: Discovery = Discovery::new(self.client.clone()).run().await?;
Ok(discovery)
}
pub async fn get_resource_json_value( pub async fn get_resource_json_value(
&self, &self,
name: &str, name: &str,

View File

@@ -47,6 +47,13 @@ struct K8sState {
message: String, message: String,
} }
#[derive(Debug, Clone)]
pub enum KubernetesDistribution {
OpenshiftFamily,
K3sFamily,
Default,
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
enum K8sSource { enum K8sSource {
LocalK3d, LocalK3d,
@@ -57,6 +64,7 @@ enum K8sSource {
pub struct K8sAnywhereTopology { pub struct K8sAnywhereTopology {
k8s_state: Arc<OnceCell<Option<K8sState>>>, k8s_state: Arc<OnceCell<Option<K8sState>>>,
tenant_manager: Arc<OnceCell<K8sTenantManager>>, tenant_manager: Arc<OnceCell<K8sTenantManager>>,
flavour: Arc<OnceCell<KubernetesDistribution>>,
config: Arc<K8sAnywhereConfig>, config: Arc<K8sAnywhereConfig>,
} }
@@ -162,6 +170,7 @@ impl K8sAnywhereTopology {
Self { Self {
k8s_state: Arc::new(OnceCell::new()), k8s_state: Arc::new(OnceCell::new()),
tenant_manager: Arc::new(OnceCell::new()), tenant_manager: Arc::new(OnceCell::new()),
flavour: Arc::new(OnceCell::new()),
config: Arc::new(K8sAnywhereConfig::from_env()), config: Arc::new(K8sAnywhereConfig::from_env()),
} }
} }
@@ -170,10 +179,42 @@ impl K8sAnywhereTopology {
Self { Self {
k8s_state: Arc::new(OnceCell::new()), k8s_state: Arc::new(OnceCell::new()),
tenant_manager: Arc::new(OnceCell::new()), tenant_manager: Arc::new(OnceCell::new()),
flavour: Arc::new(OnceCell::new()),
config: Arc::new(config), config: Arc::new(config),
} }
} }
pub async fn get_k8s_distribution(&self) -> Result<&KubernetesDistribution, PreparationError> {
self.flavour
.get_or_try_init(async || {
let client = self.k8s_client().await.unwrap();
let discovery = client.discovery().await.map_err(|e| {
PreparationError::new(format!("Could not discover API groups: {}", e))
})?;
let version = client.get_apiserver_version().await.map_err(|e| {
PreparationError::new(format!("Could not get server version: {}", e))
})?;
// OpenShift / OKD
if discovery
.groups()
.any(|g| g.name() == "project.openshift.io")
{
return Ok(KubernetesDistribution::OpenshiftFamily);
}
// K3d / K3s
if version.git_version.contains("k3s") {
return Ok(KubernetesDistribution::K3sFamily);
}
return Ok(KubernetesDistribution::Default);
})
.await
}
async fn get_cluster_observability_operator_prometheus_application_score( async fn get_cluster_observability_operator_prometheus_application_score(
&self, &self,
sender: RHOBObservability, sender: RHOBObservability,