Files
harmony/harmony/src/domain/topology/k8s.rs
Ian Letourneau 9d4e6acac0 fix(host_network): retrieve proper hostname and next available bond id (#182)
In order to query the current network state `NodeNetworkState` and to apply a `NodeNetworkConfigurationPolicy` for a given node, we first needed to find its hostname. As all we had was the UUID of a node.

We had different options available (e.g. updating the Harmony Inventory Agent to retrieve it, store it in the OKD installation pipeline on assignation, etc.). But for the sake of simplicity and for better flexibility (e.g. being able to run this score on a cluster that wasn't setup with Harmony), the `hostname` was retrieved directly in the cluster by running the equivalent of `kubectl get nodes -o yaml` and matching the nodes with the system UUID.

### Other changes
* Find the next available bond id for a node
* Apply a network config policy for a node (configuring a bond in our case)
* Adjust the CRDs for NMState

Note: to see a quick demo, watch the recording in NationTech/harmony#183
Reviewed-on: NationTech/harmony#182
Reviewed-by: johnride <jg@nationtech.io>
2025-11-05 23:38:24 +00:00

695 lines
24 KiB
Rust

use std::time::Duration;
use derive_new::new;
use k8s_openapi::{
ClusterResourceScope, NamespaceResourceScope,
api::{
apps::v1::Deployment,
core::v1::{Node, Pod, ServiceAccount},
},
apimachinery::pkg::version::Info,
};
use kube::{
Client, Config, Discovery, Error, Resource,
api::{
Api, AttachParams, DeleteParams, ListParams, ObjectList, Patch, PatchParams, ResourceExt,
},
config::{KubeConfigOptions, Kubeconfig},
core::ErrorResponse,
discovery::{ApiCapabilities, Scope},
error::DiscoveryError,
runtime::reflector::Lookup,
};
use kube::{api::DynamicObject, runtime::conditions};
use kube::{
api::{ApiResource, GroupVersionKind},
runtime::wait::await_condition,
};
use log::{debug, error, info, trace, warn};
use serde::{Serialize, de::DeserializeOwned};
use serde_json::json;
use similar::TextDiff;
use tokio::{io::AsyncReadExt, time::sleep};
use url::Url;
#[derive(new, Clone)]
pub struct K8sClient {
client: Client,
}
impl Serialize for K8sClient {
fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
todo!()
}
}
impl std::fmt::Debug for K8sClient {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// This is a poor man's debug implementation for now as kube::Client does not provide much
// useful information
f.write_fmt(format_args!(
"K8sClient {{ kube client using default namespace {} }}",
self.client.default_namespace()
))
}
}
impl K8sClient {
pub async fn try_default() -> Result<Self, Error> {
Ok(Self {
client: Client::try_default().await?,
})
}
pub async fn service_account_api(&self, namespace: &str) -> Api<ServiceAccount> {
let api: Api<ServiceAccount> = Api::namespaced(self.client.clone(), namespace);
api
}
pub async fn get_apiserver_version(&self) -> Result<Info, Error> {
let client: Client = self.client.clone();
let version_info: Info = client.apiserver_version().await?;
Ok(version_info)
}
pub async fn discovery(&self) -> Result<Discovery, Error> {
let discovery: Discovery = Discovery::new(self.client.clone()).run().await?;
Ok(discovery)
}
pub async fn get_resource_json_value(
&self,
name: &str,
namespace: Option<&str>,
gvk: &GroupVersionKind,
) -> Result<DynamicObject, Error> {
let gvk = ApiResource::from_gvk(gvk);
let resource: Api<DynamicObject> = if let Some(ns) = namespace {
Api::namespaced_with(self.client.clone(), ns, &gvk)
} else {
Api::default_namespaced_with(self.client.clone(), &gvk)
};
resource.get(name).await
}
pub async fn get_deployment(
&self,
name: &str,
namespace: Option<&str>,
) -> Result<Option<Deployment>, Error> {
let deps: Api<Deployment> = if let Some(ns) = namespace {
debug!("getting namespaced deployment");
Api::namespaced(self.client.clone(), ns)
} else {
debug!("getting default namespace deployment");
Api::default_namespaced(self.client.clone())
};
debug!("getting deployment {} in ns {}", name, namespace.unwrap());
deps.get_opt(name).await
}
pub async fn get_pod(&self, name: &str, namespace: Option<&str>) -> Result<Option<Pod>, Error> {
let pods: Api<Pod> = if let Some(ns) = namespace {
Api::namespaced(self.client.clone(), ns)
} else {
Api::default_namespaced(self.client.clone())
};
pods.get_opt(name).await
}
pub async fn scale_deployment(
&self,
name: &str,
namespace: Option<&str>,
replicas: u32,
) -> Result<(), Error> {
let deployments: Api<Deployment> = if let Some(ns) = namespace {
Api::namespaced(self.client.clone(), ns)
} else {
Api::default_namespaced(self.client.clone())
};
let patch = json!({
"spec": {
"replicas": replicas
}
});
let pp = PatchParams::default();
let scale = Patch::Merge(&patch);
deployments.patch_scale(name, &pp, &scale).await?;
Ok(())
}
pub async fn delete_deployment(
&self,
name: &str,
namespace: Option<&str>,
) -> Result<(), Error> {
let deployments: Api<Deployment> = if let Some(ns) = namespace {
Api::namespaced(self.client.clone(), ns)
} else {
Api::default_namespaced(self.client.clone())
};
let delete_params = DeleteParams::default();
deployments.delete(name, &delete_params).await?;
Ok(())
}
pub async fn wait_until_deployment_ready(
&self,
name: &str,
namespace: Option<&str>,
timeout: Option<Duration>,
) -> Result<(), String> {
let api: Api<Deployment>;
if let Some(ns) = namespace {
api = Api::namespaced(self.client.clone(), ns);
} else {
api = Api::default_namespaced(self.client.clone());
}
let establish = await_condition(api, name, conditions::is_deployment_completed());
let timeout = timeout.unwrap_or(Duration::from_secs(120));
let res = tokio::time::timeout(timeout, establish).await;
if res.is_ok() {
Ok(())
} else {
Err("timed out while waiting for deployment".to_string())
}
}
pub async fn wait_for_pod_ready(
&self,
pod_name: &str,
namespace: Option<&str>,
) -> Result<(), Error> {
let mut elapsed = 0;
let interval = 5; // seconds between checks
let timeout_secs = 120;
loop {
let pod = self.get_pod(pod_name, namespace).await?;
if let Some(p) = pod {
if let Some(status) = p.status {
if let Some(phase) = status.phase {
if phase.to_lowercase() == "running" {
return Ok(());
}
}
}
}
if elapsed >= timeout_secs {
return Err(Error::Discovery(DiscoveryError::MissingResource(format!(
"'{}' in ns '{}' did not become ready within {}s",
pod_name,
namespace.unwrap(),
timeout_secs
))));
}
sleep(Duration::from_secs(interval)).await;
elapsed += interval;
}
}
/// Will execute a commond in the first pod found that matches the specified label
/// '{label}={name}'
pub async fn exec_app_capture_output(
&self,
name: String,
label: String,
namespace: Option<&str>,
command: Vec<&str>,
) -> Result<String, String> {
let api: Api<Pod>;
if let Some(ns) = namespace {
api = Api::namespaced(self.client.clone(), ns);
} else {
api = Api::default_namespaced(self.client.clone());
}
let pod_list = api
.list(&ListParams::default().labels(format!("{label}={name}").as_str()))
.await
.expect("couldn't get list of pods");
let res = api
.exec(
pod_list
.items
.first()
.expect("couldn't get pod")
.name()
.expect("couldn't get pod name")
.into_owned()
.as_str(),
command,
&AttachParams::default().stdout(true).stderr(true),
)
.await;
match res {
Err(e) => Err(e.to_string()),
Ok(mut process) => {
let status = process
.take_status()
.expect("Couldn't get status")
.await
.expect("Couldn't unwrap status");
if let Some(s) = status.status {
let mut stdout_buf = String::new();
if let Some(mut stdout) = process.stdout() {
stdout
.read_to_string(&mut stdout_buf)
.await
.map_err(|e| format!("Failed to get status stdout {e}"))?;
}
debug!("Status: {} - {:?}", s, status.details);
if s == "Success" {
Ok(stdout_buf)
} else {
Err(s)
}
} else {
Err("Couldn't get inner status of pod exec".to_string())
}
}
}
}
/// Will execute a command in the first pod found that matches the label `app.kubernetes.io/name={name}`
pub async fn exec_app(
&self,
name: String,
namespace: Option<&str>,
command: Vec<&str>,
) -> Result<(), String> {
let api: Api<Pod>;
if let Some(ns) = namespace {
api = Api::namespaced(self.client.clone(), ns);
} else {
api = Api::default_namespaced(self.client.clone());
}
let pod_list = api
.list(&ListParams::default().labels(format!("app.kubernetes.io/name={name}").as_str()))
.await
.expect("couldn't get list of pods");
let res = api
.exec(
pod_list
.items
.first()
.expect("couldn't get pod")
.name()
.expect("couldn't get pod name")
.into_owned()
.as_str(),
command,
&AttachParams::default(),
)
.await;
match res {
Err(e) => Err(e.to_string()),
Ok(mut process) => {
let status = process
.take_status()
.expect("Couldn't get status")
.await
.expect("Couldn't unwrap status");
if let Some(s) = status.status {
debug!("Status: {} - {:?}", s, status.details);
if s == "Success" { Ok(()) } else { Err(s) }
} else {
Err("Couldn't get inner status of pod exec".to_string())
}
}
}
}
/// Apply a resource in namespace
///
/// See `kubectl apply` for more information on the expected behavior of this function
pub async fn apply<K>(&self, resource: &K, namespace: Option<&str>) -> Result<K, Error>
where
K: Resource + Clone + std::fmt::Debug + DeserializeOwned + serde::Serialize,
<K as Resource>::Scope: ApplyStrategy<K>,
<K as kube::Resource>::DynamicType: Default,
{
debug!(
"Applying resource {:?} with ns {:?}",
resource.meta().name,
namespace
);
trace!(
"{:#}",
serde_json::to_value(resource).unwrap_or(serde_json::Value::Null)
);
let api: Api<K> =
<<K as Resource>::Scope as ApplyStrategy<K>>::get_api(&self.client, namespace);
// api.create(&PostParams::default(), &resource).await
let patch_params = PatchParams::apply("harmony");
let name = resource
.meta()
.name
.as_ref()
.expect("K8s Resource should have a name");
if *crate::config::DRY_RUN {
match api.get(name).await {
Ok(current) => {
trace!("Received current value {current:#?}");
// The resource exists, so we calculate and display a diff.
println!("\nPerforming dry-run for resource: '{name}'");
let mut current_yaml = serde_yaml::to_value(&current).unwrap_or_else(|_| {
panic!("Could not serialize current value : {current:#?}")
});
if current_yaml.is_mapping() && current_yaml.get("status").is_some() {
let map = current_yaml.as_mapping_mut().unwrap();
let removed = map.remove_entry("status");
trace!("Removed status {removed:?}");
} else {
trace!(
"Did not find status entry for current object {}/{}",
current.meta().namespace.as_ref().unwrap_or(&"".to_string()),
current.meta().name.as_ref().unwrap_or(&"".to_string())
);
}
let current_yaml = serde_yaml::to_string(&current_yaml)
.unwrap_or_else(|_| "Failed to serialize current resource".to_string());
let new_yaml = serde_yaml::to_string(resource)
.unwrap_or_else(|_| "Failed to serialize new resource".to_string());
if current_yaml == new_yaml {
println!("No changes detected.");
// Return the current resource state as there are no changes.
return Ok(current);
}
println!("Changes detected:");
let diff = TextDiff::from_lines(&current_yaml, &new_yaml);
// Iterate over the changes and print them in a git-like diff format.
for change in diff.iter_all_changes() {
let sign = match change.tag() {
similar::ChangeTag::Delete => "-",
similar::ChangeTag::Insert => "+",
similar::ChangeTag::Equal => " ",
};
print!("{sign}{change}");
}
// In a dry run, we return the new resource state that would have been applied.
Ok(resource.clone())
}
Err(Error::Api(ErrorResponse { code: 404, .. })) => {
// The resource does not exist, so the "diff" is the entire new resource.
println!("\nPerforming dry-run for new resource: '{name}'");
println!(
"Resource does not exist. It would be created with the following content:"
);
let new_yaml = serde_yaml::to_string(resource)
.unwrap_or_else(|_| "Failed to serialize new resource".to_string());
// Print each line of the new resource with a '+' prefix.
for line in new_yaml.lines() {
println!("+{line}");
}
// In a dry run, we return the new resource state that would have been created.
Ok(resource.clone())
}
Err(e) => {
// Another API error occurred.
error!("Failed to get resource '{name}': {e}");
Err(e)
}
}
} else {
return api
.patch(name, &patch_params, &Patch::Apply(resource))
.await;
}
}
pub async fn apply_many<K>(&self, resource: &[K], ns: Option<&str>) -> Result<Vec<K>, Error>
where
K: Resource + Clone + std::fmt::Debug + DeserializeOwned + serde::Serialize,
<K as Resource>::Scope: ApplyStrategy<K>,
<K as Resource>::DynamicType: Default,
{
let mut result = Vec::new();
for r in resource.iter() {
result.push(self.apply(r, ns).await?);
}
Ok(result)
}
pub async fn apply_yaml_many(
&self,
#[allow(clippy::ptr_arg)] yaml: &Vec<serde_yaml::Value>,
ns: Option<&str>,
) -> Result<(), Error> {
for y in yaml.iter() {
self.apply_yaml(y, ns).await?;
}
Ok(())
}
pub async fn apply_yaml(
&self,
yaml: &serde_yaml::Value,
ns: Option<&str>,
) -> Result<(), Error> {
let obj: DynamicObject = serde_yaml::from_value(yaml.clone()).expect("TODO do not unwrap");
let name = obj.metadata.name.as_ref().expect("YAML must have a name");
let api_version = yaml
.get("apiVersion")
.expect("couldn't get apiVersion from YAML")
.as_str()
.expect("couldn't get apiVersion as str");
let kind = yaml
.get("kind")
.expect("couldn't get kind from YAML")
.as_str()
.expect("couldn't get kind as str");
let mut it = api_version.splitn(2, '/');
let first = it.next().unwrap();
let (g, v) = match it.next() {
Some(second) => (first, second),
None => ("", first),
};
let gvk = GroupVersionKind::gvk(g, v, kind);
let api_resource = ApiResource::from_gvk(&gvk);
let namespace = match ns {
Some(n) => n,
None => obj
.metadata
.namespace
.as_ref()
.expect("YAML must have a namespace"),
};
// 5. Create a dynamic API client for this resource type.
let api: Api<DynamicObject> =
Api::namespaced_with(self.client.clone(), namespace, &api_resource);
// 6. Apply the object to the cluster using Server-Side Apply.
// This will create the resource if it doesn't exist, or update it if it does.
println!("Applying '{name}' in namespace '{namespace}'...",);
let patch_params = PatchParams::apply("harmony"); // Use a unique field manager name
let result = api.patch(name, &patch_params, &Patch::Apply(&obj)).await?;
println!("Successfully applied '{}'.", result.name_any());
Ok(())
}
/// Apply a resource from a URL
///
/// It is the equivalent of `kubectl apply -f <url>`
pub async fn apply_url(&self, url: Url, ns: Option<&str>) -> Result<(), Error> {
let patch_params = PatchParams::apply("harmony");
let discovery = kube::Discovery::new(self.client.clone()).run().await?;
let yaml = reqwest::get(url)
.await
.expect("Could not get URL")
.text()
.await
.expect("Could not get content from URL");
for doc in multidoc_deserialize(&yaml).expect("failed to parse YAML from file") {
let obj: DynamicObject =
serde_yaml::from_value(doc).expect("cannot apply without valid YAML");
let namespace = obj.metadata.namespace.as_deref().or(ns);
let type_meta = obj
.types
.as_ref()
.expect("cannot apply object without valid TypeMeta");
let gvk = GroupVersionKind::try_from(type_meta)
.expect("cannot apply object without valid GroupVersionKind");
let name = obj.name_any();
if let Some((ar, caps)) = discovery.resolve_gvk(&gvk) {
let api = get_dynamic_api(ar, caps, self.client.clone(), namespace, false);
trace!(
"Applying {}: \n{}",
gvk.kind,
serde_yaml::to_string(&obj).expect("Failed to serialize YAML")
);
let data: serde_json::Value =
serde_json::to_value(&obj).expect("Failed to serialize JSON");
let _r = api.patch(&name, &patch_params, &Patch::Apply(data)).await?;
debug!("applied {} {}", gvk.kind, name);
} else {
warn!("Cannot apply document for unknown {gvk:?}");
}
}
Ok(())
}
/// Gets a single named resource of a specific type `K`.
///
/// This function uses the `ApplyStrategy` trait to correctly determine
/// whether to look in a specific namespace or in the entire cluster.
///
/// Returns `Ok(None)` if the resource is not found (404).
pub async fn get_resource<K>(
&self,
name: &str,
namespace: Option<&str>,
) -> Result<Option<K>, Error>
where
K: Resource + Clone + std::fmt::Debug + DeserializeOwned,
<K as Resource>::Scope: ApplyStrategy<K>,
<K as kube::Resource>::DynamicType: Default,
{
let api: Api<K> =
<<K as Resource>::Scope as ApplyStrategy<K>>::get_api(&self.client, namespace);
api.get_opt(name).await
}
/// Lists all resources of a specific type `K`.
///
/// This function uses the `ApplyStrategy` trait to correctly determine
/// whether to list from a specific namespace or from the entire cluster.
pub async fn list_resources<K>(
&self,
namespace: Option<&str>,
list_params: Option<ListParams>,
) -> Result<ObjectList<K>, Error>
where
K: Resource + Clone + std::fmt::Debug + DeserializeOwned,
<K as Resource>::Scope: ApplyStrategy<K>,
<K as kube::Resource>::DynamicType: Default,
{
let api: Api<K> =
<<K as Resource>::Scope as ApplyStrategy<K>>::get_api(&self.client, namespace);
let list_params = list_params.unwrap_or_default();
api.list(&list_params).await
}
/// Fetches a list of all Nodes in the cluster.
pub async fn get_nodes(
&self,
list_params: Option<ListParams>,
) -> Result<ObjectList<Node>, Error> {
self.list_resources(None, list_params).await
}
pub async fn from_kubeconfig(path: &str) -> Option<K8sClient> {
let k = match Kubeconfig::read_from(path) {
Ok(k) => k,
Err(e) => {
error!("Failed to load kubeconfig from {path} : {e}");
return None;
}
};
Some(K8sClient::new(
Client::try_from(
Config::from_custom_kubeconfig(k, &KubeConfigOptions::default())
.await
.unwrap(),
)
.unwrap(),
))
}
}
fn get_dynamic_api(
resource: ApiResource,
capabilities: ApiCapabilities,
client: Client,
ns: Option<&str>,
all: bool,
) -> Api<DynamicObject> {
if capabilities.scope == Scope::Cluster || all {
Api::all_with(client, &resource)
} else if let Some(namespace) = ns {
Api::namespaced_with(client, namespace, &resource)
} else {
Api::default_namespaced_with(client, &resource)
}
}
fn multidoc_deserialize(data: &str) -> Result<Vec<serde_yaml::Value>, serde_yaml::Error> {
use serde::Deserialize;
let mut docs = vec![];
for de in serde_yaml::Deserializer::from_str(data) {
docs.push(serde_yaml::Value::deserialize(de)?);
}
Ok(docs)
}
pub trait ApplyStrategy<K: Resource> {
fn get_api(client: &Client, ns: Option<&str>) -> Api<K>;
}
/// Implementation for all resources that are cluster-scoped.
/// It will always use `Api::all` and ignore the namespace parameter.
impl<K> ApplyStrategy<K> for ClusterResourceScope
where
K: Resource<Scope = ClusterResourceScope>,
<K as kube::Resource>::DynamicType: Default,
{
fn get_api(client: &Client, _ns: Option<&str>) -> Api<K> {
Api::all(client.clone())
}
}
/// Implementation for all resources that are namespace-scoped.
/// It will use `Api::namespaced` if a namespace is provided, otherwise
/// it falls back to the default namespace configured in your kubeconfig.
impl<K> ApplyStrategy<K> for NamespaceResourceScope
where
K: Resource<Scope = NamespaceResourceScope>,
<K as kube::Resource>::DynamicType: Default,
{
fn get_api(client: &Client, ns: Option<&str>) -> Api<K> {
match ns {
Some(ns) => Api::namespaced(client.clone(), ns),
None => Api::default_namespaced(client.clone()),
}
}
}