Files
harmony/fleet/harmony-fleet-deploy/src/agent.rs
Jean-Gabriel Gill-Couture 020ebcb1f9 refactor(fleet): deploy-architecture cleanup per ADR-023 — Scores everywhere, deploy crate, principles in CLAUDE.md
The previous e2e harness handrolled k8s manifests in `stack.rs`,
bypassing the Score-Topology-Interpret machinery harmony exists to
provide. This commit:

1. **ADR-023** codifies the rules: deploy with Scores (not
   manifests), e2e uses the same Scores as production, one Score
   per component, deploy blocks on smoke-test success, deploy logic
   lives in `*-deploy` crates, topologies are compile-time,
   thiserror over anyhow. CLAUDE.md mirrors the principles.

2. **New `fleet/harmony-fleet-deploy` crate** is the canonical home
   for fleet-component Scores:
   - `FleetOperatorScore` + helm-chart generator + `install_crds`
     moved out of `harmony::modules::fleet::operator` (they should
     never have lived in `harmony` core). `FleetServerScore`
     (composite of NATS + operator + Zitadel + callout) moved too.
   - New `FleetNatsScore` (preset over `NatsHelmChartScore` with
     fleet's required values; v1 supports `UserPass` auth, callout
     mode reserved on the public API for PR 1.5).
   - New `FleetAgentScore` with `FleetAgentTarget::Pod`; `Vm`
     target is a future variant that absorbs `FleetDeviceSetupScore`.
   - `harmony-fleet-deploy` binary built on the existing
     `harmony_cli` crate — no new CLI scaffolding.

3. **Operator runtime binary trimmed**: `Install` and `Chart`
   subcommands removed; both jobs now belong to
   `harmony-fleet-deploy`. The runtime binary becomes leaner.

4. **E2E harness rewritten** as a thin Score composer:
   `harmony-fleet-e2e/src/stack.rs` deploys the stack via
   `FleetNatsScore` + `FleetAgentScore`. The inline NATS manifest
   factory and the bespoke agent Pod renderer are gone.
   - Bring-up runs once per test binary via `shared_stack` +
     `tokio::sync::OnceCell` (matches the `fleet_e2e_demo` pattern).
   - Stale `e2e-*` namespaces from prior runs get pruned at
     startup so the leaks the OnceCell creates don't compound.

5. **`thiserror` for the agent's `CommandServer`** — replaces the
   anyhow-based surface with typed `CommandError` /
   `CommandServerError`.

6. **Memory** captures eight load-bearing principles (saved to
   `~/.claude/projects/.../memory/`) so future sessions don't drift
   back into manifest-handrolling.

Verified: `cargo test -p harmony-fleet-e2e --test ping` green
end-to-end against k3d in 25s warm.
2026-05-18 22:54:50 -04:00

372 lines
12 KiB
Rust

//! `FleetAgentScore` — declarative deploy of a `harmony-fleet-agent`
//! instance.
//!
//! v1 supports the `Pod` target only — the in-cluster Pod form-
//! factor the e2e harness deploys. Production today uses
//! `harmony::modules::fleet::FleetDeviceSetupScore` (VM/SSH/Pi
//! target) which is slated to move into this crate as
//! `FleetAgentTarget::Vm` in a follow-up; for now the public
//! [`FleetAgentTarget`] enum is single-variant so the migration
//! is additive.
//!
//! Composition: the `Pod` target Score interprets to two
//! [`K8sResourceScore`] applies in sequence — a `ConfigMap` carrying
//! `/etc/fleet-agent/config.toml`, then a `Deployment` running the
//! agent image and mounting the ConfigMap. No handrolled manifests
//! in test code; the harness composes this Score against a test
//! Topology.
use std::collections::BTreeMap;
use async_trait::async_trait;
use harmony::data::Version;
use harmony::interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome};
use harmony::inventory::Inventory;
use harmony::modules::k8s::resource::K8sResourceScore;
use harmony::score::Score;
use harmony::topology::{K8sclient, Topology};
use harmony_types::id::Id;
use k8s_openapi::api::apps::v1::{Deployment, DeploymentSpec};
use k8s_openapi::api::core::v1::{
ConfigMap, ConfigMapVolumeSource, Container, EnvVar, PodSpec, PodTemplateSpec, Volume,
VolumeMount,
};
use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector;
use kube::api::ObjectMeta;
use serde::Serialize;
use thiserror::Error;
/// Which form factor to deploy the agent in.
///
/// `Pod`: in-cluster Kubernetes Pod, used by the e2e harness and by
/// production deploys where agents live inside the same cluster as
/// the operator.
///
/// Future: `Vm { ssh: SshCredentials, … }` and `Bare { … }`. Today
/// the VM/SSH/RPi path is handled by `FleetDeviceSetupScore` in
/// `harmony::modules::fleet`; that work is in scope for a follow-up
/// PR that finishes the migration to this crate.
#[derive(Debug, Clone, Serialize)]
pub enum FleetAgentTarget {
Pod(PodTarget),
}
/// Concrete inputs for the `Pod` target.
#[derive(Debug, Clone, Serialize)]
pub struct PodTarget {
/// Cross-boundary device identity the agent publishes under.
pub device_id: String,
/// Container image (`localhost/harmony-fleet-agent:e2e`, an
/// internal registry path, …).
pub image: String,
/// `IfNotPresent` for k3d-sideloaded images, `Always` for
/// registry-backed deploys.
pub image_pull_policy: String,
/// In-cluster NATS URL the agent connects to. Built from a
/// `FleetNatsScore::in_cluster_url()` typically.
pub nats_url: String,
/// Static NATS user/pass — the v1 `UserPass` auth mode. Mirrors
/// `FleetNatsAuth::UserPass`. Will be replaced by Zitadel JWT
/// credentials once the callout layer joins this crate.
pub nats_user: String,
pub nats_pass: String,
/// `RUST_LOG` env value passed to the agent process.
pub log_level: String,
/// Routing labels published by the agent in every DeviceInfo
/// heartbeat. The operator reflects them onto the `Device` CR
/// for selector-based targeting.
pub labels: BTreeMap<String, String>,
}
#[derive(Debug, Clone, Serialize)]
pub struct FleetAgentScore {
pub namespace: String,
pub target: FleetAgentTarget,
}
impl FleetAgentScore {
pub fn pod(namespace: impl Into<String>, target: PodTarget) -> Self {
Self {
namespace: namespace.into(),
target: FleetAgentTarget::Pod(target),
}
}
/// Resource name pattern used for both the ConfigMap and the
/// Deployment. Distinct per device id so multiple devices coexist
/// in one namespace.
pub fn resource_name(device_id: &str) -> String {
format!("fleet-agent-{device_id}")
}
}
#[derive(Debug, Error)]
pub enum FleetAgentError {
#[error("applying ConfigMap for agent {device_id}: {source}")]
ConfigMap {
device_id: String,
#[source]
source: InterpretError,
},
#[error("applying Deployment for agent {device_id}: {source}")]
Deployment {
device_id: String,
#[source]
source: InterpretError,
},
}
impl From<FleetAgentError> for InterpretError {
fn from(value: FleetAgentError) -> Self {
InterpretError::new(value.to_string())
}
}
impl<T: Topology + K8sclient> Score<T> for FleetAgentScore {
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
Box::new(FleetAgentInterpret {
score: self.clone(),
})
}
fn name(&self) -> String {
match &self.target {
FleetAgentTarget::Pod(p) => format!("FleetAgentScore(pod, {})", p.device_id),
}
}
}
#[derive(Debug)]
struct FleetAgentInterpret {
score: FleetAgentScore,
}
#[async_trait]
impl<T: Topology + K8sclient> Interpret<T> for FleetAgentInterpret {
async fn execute(
&self,
inventory: &Inventory,
topology: &T,
) -> Result<Outcome, InterpretError> {
match &self.score.target {
FleetAgentTarget::Pod(pod) => {
let cm = render_config_map(&self.score.namespace, pod);
K8sResourceScore::single(cm, Some(self.score.namespace.clone()))
.interpret(inventory, topology)
.await
.map_err(|e| FleetAgentError::ConfigMap {
device_id: pod.device_id.clone(),
source: e,
})?;
let dep = render_deployment(&self.score.namespace, pod);
let outcome = K8sResourceScore::single(dep, Some(self.score.namespace.clone()))
.interpret(inventory, topology)
.await
.map_err(|e| FleetAgentError::Deployment {
device_id: pod.device_id.clone(),
source: e,
})?;
Ok(Outcome::success_with_details(
outcome.message,
vec![
format!("agent device_id: {}", pod.device_id),
format!("agent namespace: {}", self.score.namespace),
format!("agent image: {}", pod.image),
],
))
}
}
}
fn get_name(&self) -> InterpretName {
InterpretName::Custom("FleetAgentInterpret")
}
fn get_version(&self) -> Version {
Version::from("0.1.0").expect("static version literal")
}
fn get_status(&self) -> InterpretStatus {
InterpretStatus::QUEUED
}
fn get_children(&self) -> Vec<Id> {
vec![]
}
}
// ---- pod manifest rendering ------------------------------------------------
fn common_labels(device_id: &str) -> BTreeMap<String, String> {
BTreeMap::from([
(
"app.kubernetes.io/name".to_string(),
"harmony-fleet-agent".to_string(),
),
(
"app.kubernetes.io/instance".to_string(),
format!("agent-{device_id}"),
),
(
"harmony.io/managed-by".to_string(),
"harmony-fleet-deploy".to_string(),
),
("harmony.io/device-id".to_string(), device_id.to_string()),
])
}
fn render_config_map(namespace: &str, pod: &PodTarget) -> ConfigMap {
let name = FleetAgentScore::resource_name(&pod.device_id);
let labels_block = if pod.labels.is_empty() {
String::new()
} else {
let mut s = String::from("\n[labels]\n");
for (k, v) in &pod.labels {
s.push_str(&format!("{k} = \"{v}\"\n"));
}
s
};
let toml = format!(
r#"[agent]
device_id = "{device_id}"
runtime_enabled = false
[credentials]
type = "toml-shared"
nats_user = "{nats_user}"
nats_pass = "{nats_pass}"
[nats]
urls = ["{nats_url}"]
{labels_block}"#,
device_id = pod.device_id,
nats_user = pod.nats_user,
nats_pass = pod.nats_pass,
nats_url = pod.nats_url,
);
ConfigMap {
metadata: ObjectMeta {
name: Some(name),
namespace: Some(namespace.to_string()),
labels: Some(common_labels(&pod.device_id)),
..Default::default()
},
data: Some(BTreeMap::from([("config.toml".to_string(), toml)])),
..Default::default()
}
}
fn render_deployment(namespace: &str, pod: &PodTarget) -> Deployment {
let name = FleetAgentScore::resource_name(&pod.device_id);
let labels = common_labels(&pod.device_id);
let container = Container {
name: "agent".to_string(),
image: Some(pod.image.clone()),
image_pull_policy: Some(pod.image_pull_policy.clone()),
args: Some(vec![
"--config".to_string(),
"/etc/fleet-agent/config.toml".to_string(),
]),
env: Some(vec![EnvVar {
name: "RUST_LOG".to_string(),
value: Some(pod.log_level.clone()),
..Default::default()
}]),
volume_mounts: Some(vec![VolumeMount {
name: "config".to_string(),
mount_path: "/etc/fleet-agent".to_string(),
read_only: Some(true),
..Default::default()
}]),
..Default::default()
};
let volume = Volume {
name: "config".to_string(),
config_map: Some(ConfigMapVolumeSource {
name: name.clone(),
..Default::default()
}),
..Default::default()
};
let selector_labels: BTreeMap<String, String> = labels
.iter()
.filter(|(k, _)| *k == "app.kubernetes.io/instance")
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
Deployment {
metadata: ObjectMeta {
name: Some(name),
namespace: Some(namespace.to_string()),
labels: Some(labels.clone()),
..Default::default()
},
spec: Some(DeploymentSpec {
replicas: Some(1),
selector: LabelSelector {
match_labels: Some(selector_labels),
..Default::default()
},
template: PodTemplateSpec {
metadata: Some(ObjectMeta {
labels: Some(labels),
..Default::default()
}),
spec: Some(PodSpec {
containers: vec![container],
volumes: Some(vec![volume]),
..Default::default()
}),
},
..Default::default()
}),
..Default::default()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn pod() -> PodTarget {
PodTarget {
device_id: "vm-device-00".to_string(),
image: "localhost/harmony-fleet-agent:e2e".to_string(),
image_pull_policy: "IfNotPresent".to_string(),
nats_url: "nats://fleet-nats.e2e-xxx.svc.cluster.local:4222".to_string(),
nats_user: "device".to_string(),
nats_pass: "dev".to_string(),
log_level: "info".to_string(),
labels: BTreeMap::from([("e2e".to_string(), "true".to_string())]),
}
}
#[test]
fn configmap_disables_runtime_so_pod_starts_without_podman() {
let cm = render_config_map("e2e-xxx", &pod());
let toml = cm.data.unwrap().remove("config.toml").unwrap();
assert!(toml.contains("runtime_enabled = false"));
assert!(toml.contains(r#"device_id = "vm-device-00""#));
assert!(toml.contains(r#"e2e = "true""#));
}
#[test]
fn deployment_label_carries_device_id_and_managed_by() {
let d = render_deployment("e2e-xxx", &pod());
let labels = d.metadata.labels.unwrap();
assert_eq!(
labels.get("harmony.io/device-id").map(|s| s.as_str()),
Some("vm-device-00")
);
assert_eq!(
labels.get("harmony.io/managed-by").map(|s| s.as_str()),
Some("harmony-fleet-deploy"),
);
}
}