harmony/fleet/harmony-fleet-deploy/src/agent.rs

//! `FleetAgentScore` — declarative deploy of a `harmony-fleet-agent`
//! instance.
//!
//! v1 supports the `Pod` target only — the in-cluster Pod form-
//! factor the e2e harness deploys. Production today uses
//! `harmony::modules::fleet::FleetDeviceSetupScore` (VM/SSH/Pi
//! target) which is slated to move into this crate as
//! `FleetAgentTarget::Vm` in a follow-up; for now the public
//! [`FleetAgentTarget`] enum is single-variant so the migration
//! is additive.
//!
//! Composition: the `Pod` target Score interprets to two
//! [`K8sResourceScore`] applies in sequence — a `ConfigMap` carrying
//! `/etc/fleet-agent/config.toml`, then a `Deployment` running the
//! agent image and mounting the ConfigMap. No handrolled manifests
//! in test code; the harness composes this Score against a test
//! Topology.

use std::collections::BTreeMap;

use async_trait::async_trait;
use harmony::data::Version;
use harmony::interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome};
use harmony::inventory::Inventory;
use harmony::modules::k8s::resource::K8sResourceScore;
use harmony::score::Score;
use harmony::topology::{K8sclient, Topology};
use harmony_types::id::Id;
use k8s_openapi::api::apps::v1::{Deployment, DeploymentSpec};
use k8s_openapi::api::core::v1::{
    ConfigMap, ConfigMapVolumeSource, Container, EnvVar, PodSpec, PodTemplateSpec, Volume,
    VolumeMount,
};
use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector;
use kube::api::ObjectMeta;
use serde::Serialize;
use thiserror::Error;

/// Which form factor to deploy the agent in.
///
/// `Pod`: in-cluster Kubernetes Pod, used by the e2e harness and by
/// production deploys where agents live inside the same cluster as
/// the operator.
///
/// Future: `Vm { ssh: SshCredentials, … }` and `Bare { … }`. Today
/// the VM/SSH/RPi path is handled by `FleetDeviceSetupScore` in
/// `harmony::modules::fleet`; that work is in scope for a follow-up
/// PR that finishes the migration to this crate.
#[derive(Debug, Clone, Serialize)]
pub enum FleetAgentTarget {
    Pod(PodTarget),
}

/// Concrete inputs for the `Pod` target.
#[derive(Debug, Clone, Serialize)]
pub struct PodTarget {
    /// Cross-boundary device identity the agent publishes under.
    pub device_id: String,
    /// Container image (`localhost/harmony-fleet-agent:e2e`, an
    /// internal registry path, …).
    pub image: String,
    /// `IfNotPresent` for k3d-sideloaded images, `Always` for
    /// registry-backed deploys.
    pub image_pull_policy: String,
    /// In-cluster NATS URL the agent connects to. Built from a
    /// `FleetNatsScore::in_cluster_url()` typically.
    pub nats_url: String,
    /// Static NATS user/pass — the v1 `UserPass` auth mode. Mirrors
    /// `FleetNatsAuth::UserPass`. Will be replaced by Zitadel JWT
    /// credentials once the callout layer joins this crate.
    pub nats_user: String,
    pub nats_pass: String,
    /// `RUST_LOG` env value passed to the agent process.
    pub log_level: String,
    /// Routing labels published by the agent in every DeviceInfo
    /// heartbeat. The operator reflects them onto the `Device` CR
    /// for selector-based targeting.
    pub labels: BTreeMap<String, String>,
}

#[derive(Debug, Clone, Serialize)]
pub struct FleetAgentScore {
    pub namespace: String,
    pub target: FleetAgentTarget,
}

impl FleetAgentScore {
    pub fn pod(namespace: impl Into<String>, target: PodTarget) -> Self {
        Self {
            namespace: namespace.into(),
            target: FleetAgentTarget::Pod(target),
        }
    }

    /// Resource name pattern used for both the ConfigMap and the
    /// Deployment. Distinct per device id so multiple devices coexist
    /// in one namespace.
    pub fn resource_name(device_id: &str) -> String {
        format!("fleet-agent-{device_id}")
    }
}

#[derive(Debug, Error)]
pub enum FleetAgentError {
    #[error("applying ConfigMap for agent {device_id}: {source}")]
    ConfigMap {
        device_id: String,
        #[source]
        source: InterpretError,
    },
    #[error("applying Deployment for agent {device_id}: {source}")]
    Deployment {
        device_id: String,
        #[source]
        source: InterpretError,
    },
}

impl From<FleetAgentError> for InterpretError {
    fn from(value: FleetAgentError) -> Self {
        InterpretError::new(value.to_string())
    }
}

impl<T: Topology + K8sclient> Score<T> for FleetAgentScore {
    fn create_interpret(&self) -> Box<dyn Interpret<T>> {
        Box::new(FleetAgentInterpret {
            score: self.clone(),
        })
    }

    fn name(&self) -> String {
        match &self.target {
            FleetAgentTarget::Pod(p) => format!("FleetAgentScore(pod, {})", p.device_id),
        }
    }
}

#[derive(Debug)]
struct FleetAgentInterpret {
    score: FleetAgentScore,
}

#[async_trait]
impl<T: Topology + K8sclient> Interpret<T> for FleetAgentInterpret {
    async fn execute(
        &self,
        inventory: &Inventory,
        topology: &T,
    ) -> Result<Outcome, InterpretError> {
        match &self.score.target {
            FleetAgentTarget::Pod(pod) => {
                let cm = render_config_map(&self.score.namespace, pod);
                K8sResourceScore::single(cm, Some(self.score.namespace.clone()))
                    .interpret(inventory, topology)
                    .await
                    .map_err(|e| FleetAgentError::ConfigMap {
                        device_id: pod.device_id.clone(),
                        source: e,
                    })?;

                let dep = render_deployment(&self.score.namespace, pod);
                let outcome = K8sResourceScore::single(dep, Some(self.score.namespace.clone()))
                    .interpret(inventory, topology)
                    .await
                    .map_err(|e| FleetAgentError::Deployment {
                        device_id: pod.device_id.clone(),
                        source: e,
                    })?;

                Ok(Outcome::success_with_details(
                    outcome.message,
                    vec![
                        format!("agent device_id: {}", pod.device_id),
                        format!("agent namespace: {}", self.score.namespace),
                        format!("agent image:     {}", pod.image),
                    ],
                ))
            }
        }
    }

    fn get_name(&self) -> InterpretName {
        InterpretName::Custom("FleetAgentInterpret")
    }

    fn get_version(&self) -> Version {
        Version::from("0.1.0").expect("static version literal")
    }

    fn get_status(&self) -> InterpretStatus {
        InterpretStatus::QUEUED
    }

    fn get_children(&self) -> Vec<Id> {
        vec![]
    }
}

// ---- pod manifest rendering ------------------------------------------------

fn common_labels(device_id: &str) -> BTreeMap<String, String> {
    BTreeMap::from([
        (
            "app.kubernetes.io/name".to_string(),
            "harmony-fleet-agent".to_string(),
        ),
        (
            "app.kubernetes.io/instance".to_string(),
            format!("agent-{device_id}"),
        ),
        (
            "harmony.io/managed-by".to_string(),
            "harmony-fleet-deploy".to_string(),
        ),
        ("harmony.io/device-id".to_string(), device_id.to_string()),
    ])
}

fn render_config_map(namespace: &str, pod: &PodTarget) -> ConfigMap {
    let name = FleetAgentScore::resource_name(&pod.device_id);
    let labels_block = if pod.labels.is_empty() {
        String::new()
    } else {
        let mut s = String::from("\n[labels]\n");
        for (k, v) in &pod.labels {
            s.push_str(&format!("{k} = \"{v}\"\n"));
        }
        s
    };
    let toml = format!(
        r#"[agent]
device_id = "{device_id}"
runtime_enabled = false

[credentials]
type = "toml-shared"
nats_user = "{nats_user}"
nats_pass = "{nats_pass}"

[nats]
urls = ["{nats_url}"]
{labels_block}"#,
        device_id = pod.device_id,
        nats_user = pod.nats_user,
        nats_pass = pod.nats_pass,
        nats_url = pod.nats_url,
    );
    ConfigMap {
        metadata: ObjectMeta {
            name: Some(name),
            namespace: Some(namespace.to_string()),
            labels: Some(common_labels(&pod.device_id)),
            ..Default::default()
        },
        data: Some(BTreeMap::from([("config.toml".to_string(), toml)])),
        ..Default::default()
    }
}

fn render_deployment(namespace: &str, pod: &PodTarget) -> Deployment {
    let name = FleetAgentScore::resource_name(&pod.device_id);
    let labels = common_labels(&pod.device_id);

    let container = Container {
        name: "agent".to_string(),
        image: Some(pod.image.clone()),
        image_pull_policy: Some(pod.image_pull_policy.clone()),
        args: Some(vec![
            "--config".to_string(),
            "/etc/fleet-agent/config.toml".to_string(),
        ]),
        env: Some(vec![EnvVar {
            name: "RUST_LOG".to_string(),
            value: Some(pod.log_level.clone()),
            ..Default::default()
        }]),
        volume_mounts: Some(vec![VolumeMount {
            name: "config".to_string(),
            mount_path: "/etc/fleet-agent".to_string(),
            read_only: Some(true),
            ..Default::default()
        }]),
        ..Default::default()
    };

    let volume = Volume {
        name: "config".to_string(),
        config_map: Some(ConfigMapVolumeSource {
            name: name.clone(),
            ..Default::default()
        }),
        ..Default::default()
    };

    let selector_labels: BTreeMap<String, String> = labels
        .iter()
        .filter(|(k, _)| *k == "app.kubernetes.io/instance")
        .map(|(k, v)| (k.clone(), v.clone()))
        .collect();

    Deployment {
        metadata: ObjectMeta {
            name: Some(name),
            namespace: Some(namespace.to_string()),
            labels: Some(labels.clone()),
            ..Default::default()
        },
        spec: Some(DeploymentSpec {
            replicas: Some(1),
            selector: LabelSelector {
                match_labels: Some(selector_labels),
                ..Default::default()
            },
            template: PodTemplateSpec {
                metadata: Some(ObjectMeta {
                    labels: Some(labels),
                    ..Default::default()
                }),
                spec: Some(PodSpec {
                    containers: vec![container],
                    volumes: Some(vec![volume]),
                    ..Default::default()
                }),
            },
            ..Default::default()
        }),
        ..Default::default()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn pod() -> PodTarget {
        PodTarget {
            device_id: "vm-device-00".to_string(),
            image: "localhost/harmony-fleet-agent:e2e".to_string(),
            image_pull_policy: "IfNotPresent".to_string(),
            nats_url: "nats://fleet-nats.e2e-xxx.svc.cluster.local:4222".to_string(),
            nats_user: "device".to_string(),
            nats_pass: "dev".to_string(),
            log_level: "info".to_string(),
            labels: BTreeMap::from([("e2e".to_string(), "true".to_string())]),
        }
    }

    #[test]
    fn configmap_disables_runtime_so_pod_starts_without_podman() {
        let cm = render_config_map("e2e-xxx", &pod());
        let toml = cm.data.unwrap().remove("config.toml").unwrap();
        assert!(toml.contains("runtime_enabled = false"));
        assert!(toml.contains(r#"device_id = "vm-device-00""#));
        assert!(toml.contains(r#"e2e = "true""#));
    }

    #[test]
    fn deployment_label_carries_device_id_and_managed_by() {
        let d = render_deployment("e2e-xxx", &pod());
        let labels = d.metadata.labels.unwrap();
        assert_eq!(
            labels.get("harmony.io/device-id").map(|s| s.as_str()),
            Some("vm-device-00")
        );
        assert_eq!(
            labels.get("harmony.io/managed-by").map(|s| s.as_str()),
            Some("harmony-fleet-deploy"),
        );
    }
}