Compare commits
1 Commits
feat/worke
...
feat/unshe
| Author | SHA1 | Date | |
|---|---|---|---|
| f87e223d75 |
@@ -27,7 +27,7 @@ use kube::{
|
||||
};
|
||||
use log::{debug, error, trace, warn};
|
||||
use serde::{Serialize, de::DeserializeOwned};
|
||||
use serde_json::json;
|
||||
use serde_json::{json, Value};
|
||||
use similar::TextDiff;
|
||||
use tokio::{io::AsyncReadExt, time::sleep};
|
||||
use url::Url;
|
||||
@@ -64,6 +64,10 @@ impl K8sClient {
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn patch_resource(&self, patch: Value, gvk: &GroupVersionKind) -> Result<(), Error> {
|
||||
|
||||
}
|
||||
|
||||
pub async fn service_account_api(&self, namespace: &str) -> Api<ServiceAccount> {
|
||||
let api: Api<ServiceAccount> = Api::namespaced(self.client.clone(), namespace);
|
||||
api
|
||||
|
||||
@@ -1,21 +1,15 @@
|
||||
use async_trait::async_trait;
|
||||
use derive_new::new;
|
||||
use harmony_types::id::Id;
|
||||
use log::{debug, info};
|
||||
use log::info;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
hardware::PhysicalHost,
|
||||
infra::inventory::InventoryRepositoryFactory,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::{HostRole, Inventory},
|
||||
modules::{
|
||||
dhcp::DhcpHostBindingScore, http::IPxeMacBootFileScore,
|
||||
inventory::DiscoverHostForRoleScore, okd::templates::BootstrapIpxeTpl,
|
||||
},
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{HAClusterTopology, HostBinding},
|
||||
topology::HAClusterTopology,
|
||||
};
|
||||
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
@@ -58,159 +52,6 @@ impl OKDSetup04WorkersInterpret {
|
||||
info!("[Workers] Rendering per-MAC PXE for workers and rebooting");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Ensures that three physical hosts are discovered and available for the ControlPlane role.
|
||||
/// It will trigger discovery if not enough hosts are found.
|
||||
async fn get_nodes(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &HAClusterTopology,
|
||||
) -> Result<Vec<PhysicalHost>, InterpretError> {
|
||||
const REQUIRED_HOSTS: usize = 2;
|
||||
let repo = InventoryRepositoryFactory::build().await?;
|
||||
let mut control_plane_hosts = repo.get_host_for_role(&HostRole::Worker).await?;
|
||||
|
||||
while control_plane_hosts.len() < REQUIRED_HOSTS {
|
||||
info!(
|
||||
"Discovery of {} control plane hosts in progress, current number {}",
|
||||
REQUIRED_HOSTS,
|
||||
control_plane_hosts.len()
|
||||
);
|
||||
// This score triggers the discovery agent for a specific role.
|
||||
DiscoverHostForRoleScore {
|
||||
role: HostRole::Worker,
|
||||
}
|
||||
.interpret(inventory, topology)
|
||||
.await?;
|
||||
control_plane_hosts = repo.get_host_for_role(&HostRole::Worker).await?;
|
||||
}
|
||||
|
||||
if control_plane_hosts.len() < REQUIRED_HOSTS {
|
||||
Err(InterpretError::new(format!(
|
||||
"OKD Requires at least {} control plane hosts, but only found {}. Cannot proceed.",
|
||||
REQUIRED_HOSTS,
|
||||
control_plane_hosts.len()
|
||||
)))
|
||||
} else {
|
||||
// Take exactly the number of required hosts to ensure consistency.
|
||||
Ok(control_plane_hosts
|
||||
.into_iter()
|
||||
.take(REQUIRED_HOSTS)
|
||||
.collect())
|
||||
}
|
||||
}
|
||||
|
||||
/// Configures DHCP host bindings for all control plane nodes.
|
||||
async fn configure_host_binding(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &HAClusterTopology,
|
||||
nodes: &Vec<PhysicalHost>,
|
||||
) -> Result<(), InterpretError> {
|
||||
info!("[Worker] Configuring host bindings for worker nodes.");
|
||||
|
||||
// Ensure the topology definition matches the number of physical nodes found.
|
||||
if topology.control_plane.len() != nodes.len() {
|
||||
return Err(InterpretError::new(format!(
|
||||
"Mismatch between logical control plane hosts defined in topology ({}) and physical nodes found ({}).",
|
||||
topology.control_plane.len(),
|
||||
nodes.len()
|
||||
)));
|
||||
}
|
||||
|
||||
// Create a binding for each physical host to its corresponding logical host.
|
||||
let bindings: Vec<HostBinding> = topology
|
||||
.control_plane
|
||||
.iter()
|
||||
.zip(nodes.iter())
|
||||
.map(|(logical_host, physical_host)| {
|
||||
info!(
|
||||
"Creating binding: Logical Host '{}' -> Physical Host ID '{}'",
|
||||
logical_host.name, physical_host.id
|
||||
);
|
||||
HostBinding {
|
||||
logical_host: logical_host.clone(),
|
||||
physical_host: physical_host.clone(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
DhcpHostBindingScore {
|
||||
host_binding: bindings,
|
||||
domain: Some(topology.domain_name.clone()),
|
||||
}
|
||||
.interpret(inventory, topology)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Renders and deploys a per-MAC iPXE boot file for each control plane node.
|
||||
async fn configure_ipxe(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &HAClusterTopology,
|
||||
nodes: &Vec<PhysicalHost>,
|
||||
) -> Result<(), InterpretError> {
|
||||
info!("[Worker] Rendering per-MAC iPXE configurations.");
|
||||
|
||||
// The iPXE script content is the same for all control plane nodes,
|
||||
// pointing to the 'master.ign' ignition file.
|
||||
let content = BootstrapIpxeTpl {
|
||||
http_ip: &topology.http_server.get_ip().to_string(),
|
||||
scos_path: "scos",
|
||||
ignition_http_path: "okd_ignition_files",
|
||||
installation_device: "/dev/sda", // This might need to be configurable per-host in the future
|
||||
ignition_file_name: "worker.ign", // Worker nodes use the worker ignition file
|
||||
}
|
||||
.to_string();
|
||||
|
||||
debug!("[Worker] iPXE content template:\n{content}");
|
||||
|
||||
// Create and apply an iPXE boot file for each node.
|
||||
for node in nodes {
|
||||
let mac_address = node.get_mac_address();
|
||||
if mac_address.is_empty() {
|
||||
return Err(InterpretError::new(format!(
|
||||
"Physical host with ID '{}' has no MAC addresses defined.",
|
||||
node.id
|
||||
)));
|
||||
}
|
||||
info!(
|
||||
"[Worker] Applying iPXE config for node ID '{}' with MACs: {:?}",
|
||||
node.id, mac_address
|
||||
);
|
||||
|
||||
IPxeMacBootFileScore {
|
||||
mac_address,
|
||||
content: content.clone(),
|
||||
}
|
||||
.interpret(inventory, topology)
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Prompts the user to reboot the target control plane nodes.
|
||||
async fn reboot_targets(&self, nodes: &Vec<PhysicalHost>) -> Result<(), InterpretError> {
|
||||
let node_ids: Vec<String> = nodes.iter().map(|n| n.id.to_string()).collect();
|
||||
info!("[Worker] Requesting reboot for control plane nodes: {node_ids:?}",);
|
||||
|
||||
let confirmation = inquire::Confirm::new(
|
||||
&format!("Please reboot the {} worker nodes ({}) to apply their PXE configuration. Press enter when ready.", nodes.len(), node_ids.join(", ")),
|
||||
)
|
||||
.prompt()
|
||||
.map_err(|e| InterpretError::new(format!("User prompt failed: {e}")))?;
|
||||
|
||||
if !confirmation {
|
||||
return Err(InterpretError::new(
|
||||
"User aborted the operation.".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -233,23 +74,10 @@ impl Interpret<HAClusterTopology> for OKDSetup04WorkersInterpret {
|
||||
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &HAClusterTopology,
|
||||
_inventory: &Inventory,
|
||||
_topology: &HAClusterTopology,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
self.render_and_reboot().await?;
|
||||
// 1. Ensure we have 2 physical hosts for the worker nodes.
|
||||
let nodes = self.get_nodes(inventory, topology).await?;
|
||||
|
||||
// 2. Create DHCP reservations for the worker nodes.
|
||||
self.configure_host_binding(inventory, topology, &nodes)
|
||||
.await?;
|
||||
|
||||
// 3. Create iPXE files for each worker node to boot from the worker ignition.
|
||||
self.configure_ipxe(inventory, topology, &nodes).await?;
|
||||
|
||||
// 4. Reboot the nodes to start the OS installation.
|
||||
self.reboot_targets(&nodes).await?;
|
||||
|
||||
Ok(Outcome::success("Workers provisioned".into()))
|
||||
}
|
||||
}
|
||||
|
||||
87
harmony/src/modules/okd/control_plane.rs
Normal file
87
harmony/src/modules/okd/control_plane.rs
Normal file
@@ -0,0 +1,87 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use harmony_types::id::Id;
|
||||
use kube::api::GroupVersionKind;
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::{
|
||||
data::Version,
|
||||
interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome},
|
||||
inventory::Inventory,
|
||||
score::Score,
|
||||
topology::{K8sclient, Topology, k8s::K8sClient},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ControlPlaneConfig {}
|
||||
|
||||
impl<T: Topology + K8sclient> Score<T> for ControlPlaneConfig {
|
||||
fn name(&self) -> String {
|
||||
"ControlPlaneConfig".to_string()
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ControlPlaneConfigInterpret {
|
||||
score: ControlPlaneConfig,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Topology + K8sclient> Interpret<T> for ControlPlaneConfigInterpret {
|
||||
async fn execute(
|
||||
&self,
|
||||
inventory: &Inventory,
|
||||
topology: &T,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let client = topology.k8s_client().await.unwrap();
|
||||
self.control_plane_unschedulable(&client).await
|
||||
}
|
||||
|
||||
fn get_name(&self) -> InterpretName {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_version(&self) -> Version {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_status(&self) -> InterpretStatus {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn get_children(&self) -> Vec<Id> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl ControlPlaneConfigInterpret {
|
||||
async fn control_plane_unschedulable(
|
||||
&self,
|
||||
client: &Arc<K8sClient>,
|
||||
) -> Result<Outcome, InterpretError> {
|
||||
let patch = json!({
|
||||
"spec": {
|
||||
"mastersSchedulable": false
|
||||
}
|
||||
});
|
||||
|
||||
let resource = GroupVersionKind {
|
||||
group: "config.openshift.io".to_string(),
|
||||
version: "v1".to_string(),
|
||||
kind: "Scheduler".to_string(),
|
||||
};
|
||||
|
||||
client.patch_resource(patch, &resource).await?;
|
||||
|
||||
Ok(Outcome::success(
|
||||
"control planes are no longer schedulable".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user