Compare commits
4 Commits
doc/worker
...
5f78300d78
| Author | SHA1 | Date | |
|---|---|---|---|
| 5f78300d78 | |||
| 2d3c32469c | |||
| 1cec398d4d | |||
| f073b7e5fb |
@@ -1,56 +0,0 @@
|
||||
## **Remove Worker flag from OKD Control Planes**
|
||||
|
||||
### **Context**
|
||||
On OKD user provisioned infrastructure the control plane nodes can have the flag node-role.kubernetes.io/worker which allows non critical workloads to be scheduled on the control-planes
|
||||
|
||||
### **Observed Symptoms**
|
||||
- After adding HAProxy servers to the backend each back end appears down
|
||||
- Traffic is redirected to the control planes instead of workers
|
||||
- The pods router-default are incorrectly applied on the control planes rather than on the workers
|
||||
- Pods are being scheduled on the control planes causing cluster instability
|
||||
|
||||
```
|
||||
ss -tlnp | grep 80
|
||||
```
|
||||
- shows process haproxy is listening at 0.0.0.0:80 on cps
|
||||
- same problem for port 443
|
||||
- In namespace rook-ceph certain pods are deploted on cps rather than on worker nodes
|
||||
|
||||
### **Cause**
|
||||
- when intalling UPI, the roles (master, worker) are not managed by the Machine Config operator and the cps are made schedulable by default.
|
||||
|
||||
### **Diagnostic**
|
||||
check node labels:
|
||||
```
|
||||
oc get nodes --show-labels | grep control-plane
|
||||
```
|
||||
Inspecter kubelet configuration:
|
||||
|
||||
```
|
||||
cat /etc/systemd/system/kubelet.service
|
||||
```
|
||||
|
||||
find the line:
|
||||
```
|
||||
--node-labels=node-role.kubernetes.io/control-plane,node-role.kubernetes.io/master,node-role.kubernetes.io/worker
|
||||
```
|
||||
→ presence of label worker confirms the problem.
|
||||
|
||||
Verify the flag doesnt come from MCO
|
||||
```
|
||||
oc get machineconfig | grep rendered-master
|
||||
```
|
||||
|
||||
**Solution:**
|
||||
To make the control planes non schedulable you must patch the cluster scheduler resource
|
||||
|
||||
```
|
||||
oc patch scheduler cluster --type merge -p '{"spec":{"mastersSchedulable":false}}'
|
||||
```
|
||||
after the patch is applied the workloads can be deplaced by draining the nodes
|
||||
|
||||
```
|
||||
oc adm cordon <cp-node>
|
||||
oc adm drain <cp-node> --ignore-daemonsets –delete-emptydir-data
|
||||
```
|
||||
|
||||
@@ -3,13 +3,11 @@ use std::time::Duration;
|
||||
use derive_new::new;
|
||||
use k8s_openapi::{
|
||||
ClusterResourceScope, NamespaceResourceScope,
|
||||
api::{
|
||||
apps::v1::Deployment,
|
||||
core::v1::{Pod, PodStatus},
|
||||
},
|
||||
api::{apps::v1::Deployment, core::v1::Pod},
|
||||
apimachinery::pkg::version::Info,
|
||||
};
|
||||
use kube::{
|
||||
Client, Config, Error, Resource,
|
||||
Client, Config, Discovery, Error, Resource,
|
||||
api::{Api, AttachParams, DeleteParams, ListParams, Patch, PatchParams, ResourceExt},
|
||||
config::{KubeConfigOptions, Kubeconfig},
|
||||
core::ErrorResponse,
|
||||
@@ -59,6 +57,17 @@ impl K8sClient {
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn get_apiserver_version(&self) -> Result<Info, Error> {
|
||||
let client: Client = self.client.clone();
|
||||
let version_info: Info = client.apiserver_version().await?;
|
||||
Ok(version_info)
|
||||
}
|
||||
|
||||
pub async fn discovery(&self) -> Result<Discovery, Error> {
|
||||
let discovery: Discovery = Discovery::new(self.client.clone()).run().await?;
|
||||
Ok(discovery)
|
||||
}
|
||||
|
||||
pub async fn get_resource_json_value(
|
||||
&self,
|
||||
name: &str,
|
||||
|
||||
@@ -47,6 +47,13 @@ struct K8sState {
|
||||
message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum KubernetesDistribution {
|
||||
OpenshiftFamily,
|
||||
K3sFamily,
|
||||
Default,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
enum K8sSource {
|
||||
LocalK3d,
|
||||
@@ -57,6 +64,7 @@ enum K8sSource {
|
||||
pub struct K8sAnywhereTopology {
|
||||
k8s_state: Arc<OnceCell<Option<K8sState>>>,
|
||||
tenant_manager: Arc<OnceCell<K8sTenantManager>>,
|
||||
flavour: Arc<OnceCell<KubernetesDistribution>>,
|
||||
config: Arc<K8sAnywhereConfig>,
|
||||
}
|
||||
|
||||
@@ -162,6 +170,7 @@ impl K8sAnywhereTopology {
|
||||
Self {
|
||||
k8s_state: Arc::new(OnceCell::new()),
|
||||
tenant_manager: Arc::new(OnceCell::new()),
|
||||
flavour: Arc::new(OnceCell::new()),
|
||||
config: Arc::new(K8sAnywhereConfig::from_env()),
|
||||
}
|
||||
}
|
||||
@@ -170,10 +179,42 @@ impl K8sAnywhereTopology {
|
||||
Self {
|
||||
k8s_state: Arc::new(OnceCell::new()),
|
||||
tenant_manager: Arc::new(OnceCell::new()),
|
||||
flavour: Arc::new(OnceCell::new()),
|
||||
config: Arc::new(config),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_k8s_distribution(&self) -> Result<&KubernetesDistribution, PreparationError> {
|
||||
self.flavour
|
||||
.get_or_try_init(async || {
|
||||
let client = self.k8s_client().await.unwrap();
|
||||
|
||||
let discovery = client.discovery().await.map_err(|e| {
|
||||
PreparationError::new(format!("Could not discover API groups: {}", e))
|
||||
})?;
|
||||
|
||||
let version = client.get_apiserver_version().await.map_err(|e| {
|
||||
PreparationError::new(format!("Could not get server version: {}", e))
|
||||
})?;
|
||||
|
||||
// OpenShift / OKD
|
||||
if discovery
|
||||
.groups()
|
||||
.any(|g| g.name() == "project.openshift.io")
|
||||
{
|
||||
return Ok(KubernetesDistribution::OpenshiftFamily);
|
||||
}
|
||||
|
||||
// K3d / K3s
|
||||
if version.git_version.contains("k3s") {
|
||||
return Ok(KubernetesDistribution::K3sFamily);
|
||||
}
|
||||
|
||||
return Ok(KubernetesDistribution::Default);
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
async fn get_cluster_observability_operator_prometheus_application_score(
|
||||
&self,
|
||||
sender: RHOBObservability,
|
||||
|
||||
Reference in New Issue
Block a user