Some checks failed
Run Check Script / check (pull_request) Failing after 2m7s
RealFleetService implements FleetService against the same sources the reconcile loop owns, read-only: - Device/Deployment CRs (kube) for the registry, desired intent, and the aggregator-maintained .status.aggregate (target/healthy/failing/ pending counts, deployment status). - device-heartbeat KV → last ping + device status (Stale after 90s). - device-state KV → per-device phase → Failing/Pending, primary deployment. Status, dashboard counts, and alerts (one critical per failing deployment, one warning per stale device; acks held in-memory) are all derived from live state. Deployment version is the first service's image tag. blacklist_device patches a label on the Device CR; run_command stays a seam (needs agent-side transport). serve_web now connects NATS + kube and builds RealFleetService when not --mock (the bail is gone); --mock still uses the seeded MockFleetService for offline UI work. Reads are on-demand per request — fine at staging scale, a cache can follow. Unit tests cover status derivation, primary-deployment selection, version parsing, and alert derivation.
158 lines
5.0 KiB
Rust
158 lines
5.0 KiB
Rust
pub mod mock;
|
|
pub mod real;
|
|
|
|
use async_trait::async_trait;
|
|
use chrono::{DateTime, Utc};
|
|
use serde::Serialize;
|
|
|
|
pub use harmony_reconciler_contracts::InventorySnapshot;
|
|
|
|
#[async_trait]
|
|
pub trait FleetService: Send + Sync + 'static {
|
|
async fn dashboard_detail(&self) -> anyhow::Result<DashboardDetail>;
|
|
async fn list_devices(&self) -> anyhow::Result<Vec<DeviceDetail>>;
|
|
async fn get_device(&self, id: &str) -> anyhow::Result<Option<DeviceDetail>>;
|
|
async fn list_deployments(&self) -> anyhow::Result<Vec<DeploymentDetail>>;
|
|
async fn get_deployment(&self, name: &str) -> anyhow::Result<Option<DeploymentDetail>>;
|
|
async fn get_deployment_devices(&self, name: &str) -> anyhow::Result<Vec<DeviceDetail>>;
|
|
async fn blacklist_device(&self, id: &str) -> anyhow::Result<DeviceDetail>;
|
|
async fn list_alerts(&self) -> anyhow::Result<Vec<Alert>>;
|
|
async fn ack_alert(&self, id: &str) -> anyhow::Result<bool>;
|
|
async fn filtered_devices(
|
|
&self,
|
|
status: Option<DeviceStatus>,
|
|
deployment: Option<String>,
|
|
region: Option<String>,
|
|
search: Option<String>,
|
|
) -> anyhow::Result<Vec<DeviceDetail>>;
|
|
/// Send a one-shot shell command to a device for administrative
|
|
/// access. Returns the (eventual) output; streaming back live is a
|
|
/// later refinement. The device round-trip is not wired yet.
|
|
async fn run_command(&self, device_id: &str, command: &str) -> anyhow::Result<String>;
|
|
}
|
|
|
|
// ── Device ─────────────────────────────────────────────────────────────
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct DeviceDetail {
|
|
pub id: String,
|
|
pub status: DeviceStatus,
|
|
pub last_seen: DateTime<Utc>,
|
|
pub minutes_ago: i64,
|
|
pub deployment: Option<String>,
|
|
pub region: String,
|
|
pub tags: Vec<String>,
|
|
/// Hardware/OS facts from the agent. `None` until the first
|
|
/// post-enrollment publish (mirrors `DeviceInfo.inventory`).
|
|
pub inventory: Option<InventorySnapshot>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
|
|
#[serde(rename_all = "kebab-case")]
|
|
pub enum DeviceStatus {
|
|
Healthy,
|
|
Pending,
|
|
Stale,
|
|
Failing,
|
|
Blacklisted,
|
|
Unknown,
|
|
}
|
|
|
|
impl DeviceStatus {
|
|
pub fn label(self) -> &'static str {
|
|
match self {
|
|
Self::Healthy => "healthy",
|
|
Self::Pending => "pending",
|
|
Self::Stale => "stale",
|
|
Self::Failing => "failing",
|
|
Self::Blacklisted => "blacklisted",
|
|
Self::Unknown => "unknown",
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── Deployment ─────────────────────────────────────────────────────────
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct DeploymentDetail {
|
|
pub name: String,
|
|
pub version: String,
|
|
pub status: DeploymentStatus,
|
|
pub target: u32,
|
|
pub healthy: u32,
|
|
pub failing: u32,
|
|
pub pending: u32,
|
|
pub updated_at: String,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
|
|
#[serde(rename_all = "kebab-case")]
|
|
pub enum DeploymentStatus {
|
|
Active,
|
|
Rolling,
|
|
Failing,
|
|
Paused,
|
|
}
|
|
|
|
impl DeploymentStatus {
|
|
pub fn label(self) -> &'static str {
|
|
match self {
|
|
Self::Active => "active",
|
|
Self::Rolling => "rolling",
|
|
Self::Failing => "failing",
|
|
Self::Paused => "paused",
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── Dashboard ──────────────────────────────────────────────────────────
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct DashboardDetail {
|
|
pub devices_total: u32,
|
|
pub devices_healthy: u32,
|
|
pub devices_pending: u32,
|
|
pub devices_failing: u32,
|
|
pub devices_stale: u32,
|
|
pub devices_blacklisted: u32,
|
|
pub devices_unknown: u32,
|
|
pub deployments_total: usize,
|
|
pub health_pct: u32,
|
|
pub attention_devices: Vec<DeviceDetail>,
|
|
pub top_deployments: Vec<DeploymentDetail>,
|
|
pub active_alerts: Vec<Alert>,
|
|
pub rolling_count: usize,
|
|
pub failing_count: usize,
|
|
}
|
|
|
|
// ── Alert ──────────────────────────────────────────────────────────────
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct Alert {
|
|
pub id: String,
|
|
pub severity: AlertSeverity,
|
|
pub title: String,
|
|
pub deployment: Option<String>,
|
|
pub device: Option<String>,
|
|
pub at: String,
|
|
pub acked: bool,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
|
|
#[serde(rename_all = "kebab-case")]
|
|
pub enum AlertSeverity {
|
|
Critical,
|
|
Warning,
|
|
Info,
|
|
}
|
|
|
|
impl AlertSeverity {
|
|
pub fn label(self) -> &'static str {
|
|
match self {
|
|
Self::Critical => "critical",
|
|
Self::Warning => "warning",
|
|
Self::Info => "info",
|
|
}
|
|
}
|
|
}
|