The previous e2e harness handrolled k8s manifests in `stack.rs`,
bypassing the Score-Topology-Interpret machinery harmony exists to
provide. This commit:
1. **ADR-023** codifies the rules: deploy with Scores (not
manifests), e2e uses the same Scores as production, one Score
per component, deploy blocks on smoke-test success, deploy logic
lives in `*-deploy` crates, topologies are compile-time,
thiserror over anyhow. CLAUDE.md mirrors the principles.
2. **New `fleet/harmony-fleet-deploy` crate** is the canonical home
for fleet-component Scores:
- `FleetOperatorScore` + helm-chart generator + `install_crds`
moved out of `harmony::modules::fleet::operator` (they should
never have lived in `harmony` core). `FleetServerScore`
(composite of NATS + operator + Zitadel + callout) moved too.
- New `FleetNatsScore` (preset over `NatsHelmChartScore` with
fleet's required values; v1 supports `UserPass` auth, callout
mode reserved on the public API for PR 1.5).
- New `FleetAgentScore` with `FleetAgentTarget::Pod`; `Vm`
target is a future variant that absorbs `FleetDeviceSetupScore`.
- `harmony-fleet-deploy` binary built on the existing
`harmony_cli` crate — no new CLI scaffolding.
3. **Operator runtime binary trimmed**: `Install` and `Chart`
subcommands removed; both jobs now belong to
`harmony-fleet-deploy`. The runtime binary becomes leaner.
4. **E2E harness rewritten** as a thin Score composer:
`harmony-fleet-e2e/src/stack.rs` deploys the stack via
`FleetNatsScore` + `FleetAgentScore`. The inline NATS manifest
factory and the bespoke agent Pod renderer are gone.
- Bring-up runs once per test binary via `shared_stack` +
`tokio::sync::OnceCell` (matches the `fleet_e2e_demo` pattern).
- Stale `e2e-*` namespaces from prior runs get pruned at
startup so the leaks the OnceCell creates don't compound.
5. **`thiserror` for the agent's `CommandServer`** — replaces the
anyhow-based surface with typed `CommandError` /
`CommandServerError`.
6. **Memory** captures eight load-bearing principles (saved to
`~/.claude/projects/.../memory/`) so future sessions don't drift
back into manifest-handrolling.
Verified: `cargo test -p harmony-fleet-e2e --test ping` green
end-to-end against k3d in 25s warm.
372 lines
12 KiB
Rust
372 lines
12 KiB
Rust
//! `FleetAgentScore` — declarative deploy of a `harmony-fleet-agent`
|
|
//! instance.
|
|
//!
|
|
//! v1 supports the `Pod` target only — the in-cluster Pod form-
|
|
//! factor the e2e harness deploys. Production today uses
|
|
//! `harmony::modules::fleet::FleetDeviceSetupScore` (VM/SSH/Pi
|
|
//! target) which is slated to move into this crate as
|
|
//! `FleetAgentTarget::Vm` in a follow-up; for now the public
|
|
//! [`FleetAgentTarget`] enum is single-variant so the migration
|
|
//! is additive.
|
|
//!
|
|
//! Composition: the `Pod` target Score interprets to two
|
|
//! [`K8sResourceScore`] applies in sequence — a `ConfigMap` carrying
|
|
//! `/etc/fleet-agent/config.toml`, then a `Deployment` running the
|
|
//! agent image and mounting the ConfigMap. No handrolled manifests
|
|
//! in test code; the harness composes this Score against a test
|
|
//! Topology.
|
|
|
|
use std::collections::BTreeMap;
|
|
|
|
use async_trait::async_trait;
|
|
use harmony::data::Version;
|
|
use harmony::interpret::{Interpret, InterpretError, InterpretName, InterpretStatus, Outcome};
|
|
use harmony::inventory::Inventory;
|
|
use harmony::modules::k8s::resource::K8sResourceScore;
|
|
use harmony::score::Score;
|
|
use harmony::topology::{K8sclient, Topology};
|
|
use harmony_types::id::Id;
|
|
use k8s_openapi::api::apps::v1::{Deployment, DeploymentSpec};
|
|
use k8s_openapi::api::core::v1::{
|
|
ConfigMap, ConfigMapVolumeSource, Container, EnvVar, PodSpec, PodTemplateSpec, Volume,
|
|
VolumeMount,
|
|
};
|
|
use k8s_openapi::apimachinery::pkg::apis::meta::v1::LabelSelector;
|
|
use kube::api::ObjectMeta;
|
|
use serde::Serialize;
|
|
use thiserror::Error;
|
|
|
|
/// Which form factor to deploy the agent in.
|
|
///
|
|
/// `Pod`: in-cluster Kubernetes Pod, used by the e2e harness and by
|
|
/// production deploys where agents live inside the same cluster as
|
|
/// the operator.
|
|
///
|
|
/// Future: `Vm { ssh: SshCredentials, … }` and `Bare { … }`. Today
|
|
/// the VM/SSH/RPi path is handled by `FleetDeviceSetupScore` in
|
|
/// `harmony::modules::fleet`; that work is in scope for a follow-up
|
|
/// PR that finishes the migration to this crate.
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub enum FleetAgentTarget {
|
|
Pod(PodTarget),
|
|
}
|
|
|
|
/// Concrete inputs for the `Pod` target.
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct PodTarget {
|
|
/// Cross-boundary device identity the agent publishes under.
|
|
pub device_id: String,
|
|
/// Container image (`localhost/harmony-fleet-agent:e2e`, an
|
|
/// internal registry path, …).
|
|
pub image: String,
|
|
/// `IfNotPresent` for k3d-sideloaded images, `Always` for
|
|
/// registry-backed deploys.
|
|
pub image_pull_policy: String,
|
|
/// In-cluster NATS URL the agent connects to. Built from a
|
|
/// `FleetNatsScore::in_cluster_url()` typically.
|
|
pub nats_url: String,
|
|
/// Static NATS user/pass — the v1 `UserPass` auth mode. Mirrors
|
|
/// `FleetNatsAuth::UserPass`. Will be replaced by Zitadel JWT
|
|
/// credentials once the callout layer joins this crate.
|
|
pub nats_user: String,
|
|
pub nats_pass: String,
|
|
/// `RUST_LOG` env value passed to the agent process.
|
|
pub log_level: String,
|
|
/// Routing labels published by the agent in every DeviceInfo
|
|
/// heartbeat. The operator reflects them onto the `Device` CR
|
|
/// for selector-based targeting.
|
|
pub labels: BTreeMap<String, String>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct FleetAgentScore {
|
|
pub namespace: String,
|
|
pub target: FleetAgentTarget,
|
|
}
|
|
|
|
impl FleetAgentScore {
|
|
pub fn pod(namespace: impl Into<String>, target: PodTarget) -> Self {
|
|
Self {
|
|
namespace: namespace.into(),
|
|
target: FleetAgentTarget::Pod(target),
|
|
}
|
|
}
|
|
|
|
/// Resource name pattern used for both the ConfigMap and the
|
|
/// Deployment. Distinct per device id so multiple devices coexist
|
|
/// in one namespace.
|
|
pub fn resource_name(device_id: &str) -> String {
|
|
format!("fleet-agent-{device_id}")
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Error)]
|
|
pub enum FleetAgentError {
|
|
#[error("applying ConfigMap for agent {device_id}: {source}")]
|
|
ConfigMap {
|
|
device_id: String,
|
|
#[source]
|
|
source: InterpretError,
|
|
},
|
|
#[error("applying Deployment for agent {device_id}: {source}")]
|
|
Deployment {
|
|
device_id: String,
|
|
#[source]
|
|
source: InterpretError,
|
|
},
|
|
}
|
|
|
|
impl From<FleetAgentError> for InterpretError {
|
|
fn from(value: FleetAgentError) -> Self {
|
|
InterpretError::new(value.to_string())
|
|
}
|
|
}
|
|
|
|
impl<T: Topology + K8sclient> Score<T> for FleetAgentScore {
|
|
fn create_interpret(&self) -> Box<dyn Interpret<T>> {
|
|
Box::new(FleetAgentInterpret {
|
|
score: self.clone(),
|
|
})
|
|
}
|
|
|
|
fn name(&self) -> String {
|
|
match &self.target {
|
|
FleetAgentTarget::Pod(p) => format!("FleetAgentScore(pod, {})", p.device_id),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
struct FleetAgentInterpret {
|
|
score: FleetAgentScore,
|
|
}
|
|
|
|
#[async_trait]
|
|
impl<T: Topology + K8sclient> Interpret<T> for FleetAgentInterpret {
|
|
async fn execute(
|
|
&self,
|
|
inventory: &Inventory,
|
|
topology: &T,
|
|
) -> Result<Outcome, InterpretError> {
|
|
match &self.score.target {
|
|
FleetAgentTarget::Pod(pod) => {
|
|
let cm = render_config_map(&self.score.namespace, pod);
|
|
K8sResourceScore::single(cm, Some(self.score.namespace.clone()))
|
|
.interpret(inventory, topology)
|
|
.await
|
|
.map_err(|e| FleetAgentError::ConfigMap {
|
|
device_id: pod.device_id.clone(),
|
|
source: e,
|
|
})?;
|
|
|
|
let dep = render_deployment(&self.score.namespace, pod);
|
|
let outcome = K8sResourceScore::single(dep, Some(self.score.namespace.clone()))
|
|
.interpret(inventory, topology)
|
|
.await
|
|
.map_err(|e| FleetAgentError::Deployment {
|
|
device_id: pod.device_id.clone(),
|
|
source: e,
|
|
})?;
|
|
|
|
Ok(Outcome::success_with_details(
|
|
outcome.message,
|
|
vec![
|
|
format!("agent device_id: {}", pod.device_id),
|
|
format!("agent namespace: {}", self.score.namespace),
|
|
format!("agent image: {}", pod.image),
|
|
],
|
|
))
|
|
}
|
|
}
|
|
}
|
|
|
|
fn get_name(&self) -> InterpretName {
|
|
InterpretName::Custom("FleetAgentInterpret")
|
|
}
|
|
|
|
fn get_version(&self) -> Version {
|
|
Version::from("0.1.0").expect("static version literal")
|
|
}
|
|
|
|
fn get_status(&self) -> InterpretStatus {
|
|
InterpretStatus::QUEUED
|
|
}
|
|
|
|
fn get_children(&self) -> Vec<Id> {
|
|
vec![]
|
|
}
|
|
}
|
|
|
|
// ---- pod manifest rendering ------------------------------------------------
|
|
|
|
fn common_labels(device_id: &str) -> BTreeMap<String, String> {
|
|
BTreeMap::from([
|
|
(
|
|
"app.kubernetes.io/name".to_string(),
|
|
"harmony-fleet-agent".to_string(),
|
|
),
|
|
(
|
|
"app.kubernetes.io/instance".to_string(),
|
|
format!("agent-{device_id}"),
|
|
),
|
|
(
|
|
"harmony.io/managed-by".to_string(),
|
|
"harmony-fleet-deploy".to_string(),
|
|
),
|
|
("harmony.io/device-id".to_string(), device_id.to_string()),
|
|
])
|
|
}
|
|
|
|
fn render_config_map(namespace: &str, pod: &PodTarget) -> ConfigMap {
|
|
let name = FleetAgentScore::resource_name(&pod.device_id);
|
|
let labels_block = if pod.labels.is_empty() {
|
|
String::new()
|
|
} else {
|
|
let mut s = String::from("\n[labels]\n");
|
|
for (k, v) in &pod.labels {
|
|
s.push_str(&format!("{k} = \"{v}\"\n"));
|
|
}
|
|
s
|
|
};
|
|
let toml = format!(
|
|
r#"[agent]
|
|
device_id = "{device_id}"
|
|
runtime_enabled = false
|
|
|
|
[credentials]
|
|
type = "toml-shared"
|
|
nats_user = "{nats_user}"
|
|
nats_pass = "{nats_pass}"
|
|
|
|
[nats]
|
|
urls = ["{nats_url}"]
|
|
{labels_block}"#,
|
|
device_id = pod.device_id,
|
|
nats_user = pod.nats_user,
|
|
nats_pass = pod.nats_pass,
|
|
nats_url = pod.nats_url,
|
|
);
|
|
ConfigMap {
|
|
metadata: ObjectMeta {
|
|
name: Some(name),
|
|
namespace: Some(namespace.to_string()),
|
|
labels: Some(common_labels(&pod.device_id)),
|
|
..Default::default()
|
|
},
|
|
data: Some(BTreeMap::from([("config.toml".to_string(), toml)])),
|
|
..Default::default()
|
|
}
|
|
}
|
|
|
|
fn render_deployment(namespace: &str, pod: &PodTarget) -> Deployment {
|
|
let name = FleetAgentScore::resource_name(&pod.device_id);
|
|
let labels = common_labels(&pod.device_id);
|
|
|
|
let container = Container {
|
|
name: "agent".to_string(),
|
|
image: Some(pod.image.clone()),
|
|
image_pull_policy: Some(pod.image_pull_policy.clone()),
|
|
args: Some(vec![
|
|
"--config".to_string(),
|
|
"/etc/fleet-agent/config.toml".to_string(),
|
|
]),
|
|
env: Some(vec![EnvVar {
|
|
name: "RUST_LOG".to_string(),
|
|
value: Some(pod.log_level.clone()),
|
|
..Default::default()
|
|
}]),
|
|
volume_mounts: Some(vec![VolumeMount {
|
|
name: "config".to_string(),
|
|
mount_path: "/etc/fleet-agent".to_string(),
|
|
read_only: Some(true),
|
|
..Default::default()
|
|
}]),
|
|
..Default::default()
|
|
};
|
|
|
|
let volume = Volume {
|
|
name: "config".to_string(),
|
|
config_map: Some(ConfigMapVolumeSource {
|
|
name: name.clone(),
|
|
..Default::default()
|
|
}),
|
|
..Default::default()
|
|
};
|
|
|
|
let selector_labels: BTreeMap<String, String> = labels
|
|
.iter()
|
|
.filter(|(k, _)| *k == "app.kubernetes.io/instance")
|
|
.map(|(k, v)| (k.clone(), v.clone()))
|
|
.collect();
|
|
|
|
Deployment {
|
|
metadata: ObjectMeta {
|
|
name: Some(name),
|
|
namespace: Some(namespace.to_string()),
|
|
labels: Some(labels.clone()),
|
|
..Default::default()
|
|
},
|
|
spec: Some(DeploymentSpec {
|
|
replicas: Some(1),
|
|
selector: LabelSelector {
|
|
match_labels: Some(selector_labels),
|
|
..Default::default()
|
|
},
|
|
template: PodTemplateSpec {
|
|
metadata: Some(ObjectMeta {
|
|
labels: Some(labels),
|
|
..Default::default()
|
|
}),
|
|
spec: Some(PodSpec {
|
|
containers: vec![container],
|
|
volumes: Some(vec![volume]),
|
|
..Default::default()
|
|
}),
|
|
},
|
|
..Default::default()
|
|
}),
|
|
..Default::default()
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
fn pod() -> PodTarget {
|
|
PodTarget {
|
|
device_id: "vm-device-00".to_string(),
|
|
image: "localhost/harmony-fleet-agent:e2e".to_string(),
|
|
image_pull_policy: "IfNotPresent".to_string(),
|
|
nats_url: "nats://fleet-nats.e2e-xxx.svc.cluster.local:4222".to_string(),
|
|
nats_user: "device".to_string(),
|
|
nats_pass: "dev".to_string(),
|
|
log_level: "info".to_string(),
|
|
labels: BTreeMap::from([("e2e".to_string(), "true".to_string())]),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn configmap_disables_runtime_so_pod_starts_without_podman() {
|
|
let cm = render_config_map("e2e-xxx", &pod());
|
|
let toml = cm.data.unwrap().remove("config.toml").unwrap();
|
|
assert!(toml.contains("runtime_enabled = false"));
|
|
assert!(toml.contains(r#"device_id = "vm-device-00""#));
|
|
assert!(toml.contains(r#"e2e = "true""#));
|
|
}
|
|
|
|
#[test]
|
|
fn deployment_label_carries_device_id_and_managed_by() {
|
|
let d = render_deployment("e2e-xxx", &pod());
|
|
let labels = d.metadata.labels.unwrap();
|
|
assert_eq!(
|
|
labels.get("harmony.io/device-id").map(|s| s.as_str()),
|
|
Some("vm-device-00")
|
|
);
|
|
assert_eq!(
|
|
labels.get("harmony.io/managed-by").map(|s| s.as_str()),
|
|
Some("harmony-fleet-deploy"),
|
|
);
|
|
}
|
|
}
|