Some checks are pending
Run Check Script / check (pull_request) Waiting to run
The CRD previously accepted any string for `score.type`, so typos like `"pdoman"` or `"PodmnV0"` would be persisted by the apiserver and only surface on-device as agent-side deserialize warnings. That class of failure is distasteful and hard to debug. Replace the auto-derived schema for `ScorePayload` with a hand-rolled one that keeps the same visible shape but adds two apiserver-level guardrails: - `score.type` gets `minLength: 1` and an `x-kubernetes-validations` CEL rule requiring it to match `^[A-Za-z_][A-Za-z0-9_]*$` — a valid Rust identifier, since score variants *are* Rust struct names in `harmony::modules::podman::IotScore`. Message points operators at the concrete example `PodmanV0`. - `score.data` still carries only `x-kubernetes-preserve-unknown- fields: true`. The rule validates the discriminator's *shape*, not its *value*, so v0.3+ variants (OkdApplyV0, KubectlApplyV0) don't require an operator release — preserves ROADMAP §6.1's generic-router design. The `x-kubernetes-preserve-unknown-fields` extension stays scoped to `score.data` alone; every other field in the CRD has a strict schema, exactly one preserve-unknown-fields marker and exactly one validations block in the whole document. Smoke test extended: phase 2b applies a CR with `score.type: "has spaces"` and asserts the apiserver rejects it with the CEL message before the operator ever sees it. Positive phases (kubectl apply -> NATS KV put -> status observed -> delete -> KV key removed) still PASS end-to-end. Matches the `preserve_arbitrary` pattern used by ArgoCD (`Application.spec.source.helm.valuesObject`) and Flux (`HelmRelease.spec.values`), both of which similarly use narrow preserve-unknown-fields on a payload field without coupling the CRD to their variant catalog.
244 lines
9.0 KiB
Bash
Executable File
244 lines
9.0 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# A1 smoke test — the end-to-end verification from ROADMAP/iot_platform/v0_walking_skeleton.md §9.A1.
|
|
#
|
|
# Deployment CR ──apply──▶ operator ──KV put──▶ NATS JetStream
|
|
# │
|
|
# nats kv get ◀┘
|
|
#
|
|
# Stands up a NATS server container + a k3d cluster, runs the operator against
|
|
# them, applies a test CR, asserts the key appears in NATS KV, deletes the CR,
|
|
# asserts the key disappears. Everything is torn down in the cleanup trap.
|
|
#
|
|
# Requirements on the host:
|
|
# - podman (rootless OK)
|
|
# - cargo (for building/running the operator)
|
|
# - kubectl
|
|
# - a k3d binary (defaults to Harmony's downloaded copy)
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
OPERATOR_DIR="$REPO_ROOT/iot/iot-operator-v0"
|
|
|
|
K3D_BIN="${K3D_BIN:-$HOME/.local/share/harmony/k3d/k3d}"
|
|
CLUSTER_NAME="${CLUSTER_NAME:-iot-smoke}"
|
|
NATS_CONTAINER="${NATS_CONTAINER:-iot-smoke-nats}"
|
|
NATS_NET_NAME="${NATS_NET_NAME:-iot-smoke-net}"
|
|
NATS_IMAGE="${NATS_IMAGE:-docker.io/library/nats:2.10-alpine}"
|
|
NATSBOX_IMAGE="${NATSBOX_IMAGE:-docker.io/natsio/nats-box:latest}"
|
|
NATS_PORT="${NATS_PORT:-4222}"
|
|
TARGET_DEVICE="${TARGET_DEVICE:-pi-demo-01}"
|
|
DEPLOY_NAME="${DEPLOY_NAME:-hello-world}"
|
|
DEPLOY_NS="${DEPLOY_NS:-iot-demo}"
|
|
|
|
OPERATOR_LOG="$(mktemp -t iot-operator.XXXXXX.log)"
|
|
OPERATOR_PID=""
|
|
KUBECONFIG_FILE=""
|
|
|
|
log() { printf '\033[1;34m[smoke]\033[0m %s\n' "$*"; }
|
|
fail() { printf '\033[1;31m[smoke FAIL]\033[0m %s\n' "$*" >&2; exit 1; }
|
|
|
|
cleanup() {
|
|
local rc=$?
|
|
log "cleanup…"
|
|
if [[ -n "$OPERATOR_PID" ]] && kill -0 "$OPERATOR_PID" 2>/dev/null; then
|
|
kill "$OPERATOR_PID" 2>/dev/null || true
|
|
wait "$OPERATOR_PID" 2>/dev/null || true
|
|
fi
|
|
if [[ "${KEEP:-0}" != "1" ]]; then
|
|
"$K3D_BIN" cluster delete "$CLUSTER_NAME" >/dev/null 2>&1 || true
|
|
podman rm -f "$NATS_CONTAINER" >/dev/null 2>&1 || true
|
|
podman network rm "$NATS_NET_NAME" >/dev/null 2>&1 || true
|
|
[[ -n "$KUBECONFIG_FILE" ]] && rm -f "$KUBECONFIG_FILE"
|
|
else
|
|
log "KEEP=1 — leaving cluster '$CLUSTER_NAME' and container '$NATS_CONTAINER' running"
|
|
log "KUBECONFIG=$KUBECONFIG_FILE"
|
|
fi
|
|
if [[ $rc -ne 0 ]]; then
|
|
log "operator log at $OPERATOR_LOG"
|
|
echo "----- operator log tail -----"
|
|
tail -n 60 "$OPERATOR_LOG" 2>/dev/null || true
|
|
else
|
|
rm -f "$OPERATOR_LOG"
|
|
fi
|
|
exit $rc
|
|
}
|
|
trap cleanup EXIT INT TERM
|
|
|
|
require() { command -v "$1" >/dev/null 2>&1 || fail "missing required tool: $1"; }
|
|
require podman
|
|
require cargo
|
|
require kubectl
|
|
[[ -x "$K3D_BIN" ]] || fail "k3d binary not executable at $K3D_BIN (set K3D_BIN=…)"
|
|
|
|
natsbox() {
|
|
podman run --rm --network "$NATS_NET_NAME" "$NATSBOX_IMAGE" \
|
|
nats --server "nats://$NATS_CONTAINER:$NATS_PORT" "$@"
|
|
}
|
|
|
|
###############################################################################
|
|
# phase 1 — NATS
|
|
###############################################################################
|
|
log "phase 1: start NATS"
|
|
podman network exists "$NATS_NET_NAME" || podman network create "$NATS_NET_NAME" >/dev/null
|
|
podman rm -f "$NATS_CONTAINER" >/dev/null 2>&1 || true
|
|
podman run -d \
|
|
--name "$NATS_CONTAINER" \
|
|
--network "$NATS_NET_NAME" \
|
|
-p "$NATS_PORT:4222" \
|
|
"$NATS_IMAGE" -js >/dev/null
|
|
log "waiting for NATS"
|
|
for _ in $(seq 1 30); do
|
|
if podman run --rm --network "$NATS_NET_NAME" "$NATSBOX_IMAGE" \
|
|
nats --server "nats://$NATS_CONTAINER:4222" server check connection >/dev/null 2>&1; then
|
|
break
|
|
fi
|
|
sleep 1
|
|
done
|
|
natsbox server check connection >/dev/null || fail "NATS never became ready"
|
|
|
|
###############################################################################
|
|
# phase 2 — k3d cluster + CRD
|
|
###############################################################################
|
|
log "phase 2: create k3d cluster '$CLUSTER_NAME'"
|
|
"$K3D_BIN" cluster delete "$CLUSTER_NAME" >/dev/null 2>&1 || true
|
|
"$K3D_BIN" cluster create "$CLUSTER_NAME" --wait --timeout 90s >/dev/null
|
|
|
|
KUBECONFIG_FILE="$(mktemp -t iot-smoke-kubeconfig.XXXXXX)"
|
|
"$K3D_BIN" kubeconfig get "$CLUSTER_NAME" > "$KUBECONFIG_FILE"
|
|
export KUBECONFIG="$KUBECONFIG_FILE"
|
|
|
|
log "generate + apply CRD"
|
|
( cd "$OPERATOR_DIR" && cargo run -q -- gen-crd ) | kubectl apply -f - >/dev/null
|
|
kubectl wait --for=condition=Established "crd/deployments.iot.nationtech.io" --timeout=30s >/dev/null
|
|
|
|
kubectl get ns "$DEPLOY_NS" >/dev/null 2>&1 || kubectl create namespace "$DEPLOY_NS" >/dev/null
|
|
|
|
###############################################################################
|
|
# phase 2b — CEL discriminator guardrail: an invalid score.type must be rejected
|
|
# by the apiserver (tests x-kubernetes-validations on spec.score)
|
|
###############################################################################
|
|
log "phase 2b: apiserver rejects invalid score.type"
|
|
BAD_CR=$(cat <<EOF
|
|
apiVersion: iot.nationtech.io/v1alpha1
|
|
kind: Deployment
|
|
metadata:
|
|
name: bad-discriminator
|
|
namespace: $DEPLOY_NS
|
|
spec:
|
|
targetDevices: [$TARGET_DEVICE]
|
|
score:
|
|
type: "has spaces"
|
|
data: {}
|
|
rollout:
|
|
strategy: Immediate
|
|
EOF
|
|
)
|
|
BAD_OUT="$(echo "$BAD_CR" | kubectl apply -f - 2>&1 || true)"
|
|
if echo "$BAD_OUT" | grep -q "must be a valid Rust identifier"; then
|
|
log "apiserver rejected invalid discriminator as expected"
|
|
else
|
|
fail "expected CEL rejection for score.type='has spaces'; got: $BAD_OUT"
|
|
fi
|
|
# Belt-and-braces: make sure nothing was persisted
|
|
if kubectl -n "$DEPLOY_NS" get deployment.iot.nationtech.io bad-discriminator >/dev/null 2>&1; then
|
|
kubectl -n "$DEPLOY_NS" delete deployment.iot.nationtech.io bad-discriminator >/dev/null 2>&1 || true
|
|
fail "apiserver should have rejected 'bad-discriminator' but it was persisted"
|
|
fi
|
|
|
|
###############################################################################
|
|
# phase 3 — operator
|
|
###############################################################################
|
|
log "phase 3: start operator"
|
|
(
|
|
cd "$OPERATOR_DIR"
|
|
cargo build -q
|
|
)
|
|
NATS_URL="nats://127.0.0.1:$NATS_PORT" \
|
|
KV_BUCKET="desired-state" \
|
|
RUST_LOG="info,kube_runtime=warn" \
|
|
"$REPO_ROOT/target/debug/iot-operator-v0" \
|
|
>"$OPERATOR_LOG" 2>&1 &
|
|
OPERATOR_PID=$!
|
|
log "operator pid=$OPERATOR_PID (log: $OPERATOR_LOG)"
|
|
|
|
for _ in $(seq 1 30); do
|
|
if grep -q "starting Deployment controller" "$OPERATOR_LOG"; then break; fi
|
|
if ! kill -0 "$OPERATOR_PID" 2>/dev/null; then fail "operator exited early"; fi
|
|
sleep 0.5
|
|
done
|
|
grep -q "starting Deployment controller" "$OPERATOR_LOG" \
|
|
|| fail "operator never logged 'starting Deployment controller'"
|
|
grep -q "KV bucket ready" "$OPERATOR_LOG" \
|
|
|| fail "operator never confirmed KV bucket ready"
|
|
|
|
###############################################################################
|
|
# phase 4 — apply Deployment CR
|
|
###############################################################################
|
|
log "phase 4: apply Deployment CR"
|
|
cat <<EOF | kubectl apply -f - >/dev/null
|
|
apiVersion: iot.nationtech.io/v1alpha1
|
|
kind: Deployment
|
|
metadata:
|
|
name: $DEPLOY_NAME
|
|
namespace: $DEPLOY_NS
|
|
spec:
|
|
targetDevices: [$TARGET_DEVICE]
|
|
score:
|
|
type: PodmanV0
|
|
data:
|
|
services:
|
|
- name: hello
|
|
image: docker.io/library/nginx:alpine
|
|
ports: ["8080:80"]
|
|
rollout:
|
|
strategy: Immediate
|
|
EOF
|
|
|
|
log "wait for KV key $TARGET_DEVICE.$DEPLOY_NAME"
|
|
KV_VALUE=""
|
|
for _ in $(seq 1 30); do
|
|
if KV_VALUE="$(natsbox kv get desired-state "$TARGET_DEVICE.$DEPLOY_NAME" --raw 2>/dev/null)"; then
|
|
[[ -n "$KV_VALUE" ]] && break
|
|
fi
|
|
sleep 1
|
|
done
|
|
[[ -n "$KV_VALUE" ]] || fail "KV key never appeared"
|
|
echo "$KV_VALUE" | grep -q '"type":"PodmanV0"' \
|
|
|| fail "KV value missing \"type\":\"PodmanV0\" discriminator — got: $KV_VALUE"
|
|
echo "$KV_VALUE" | grep -q '"image":"docker.io/library/nginx:alpine"' \
|
|
|| fail "KV value missing nginx image — got: $KV_VALUE"
|
|
|
|
log "wait for .status.observedScoreString"
|
|
OBSERVED=""
|
|
for _ in $(seq 1 30); do
|
|
OBSERVED="$(kubectl -n "$DEPLOY_NS" get deployment.iot.nationtech.io "$DEPLOY_NAME" \
|
|
-o jsonpath='{.status.observedScoreString}' 2>/dev/null || true)"
|
|
[[ -n "$OBSERVED" ]] && break
|
|
sleep 1
|
|
done
|
|
[[ -n "$OBSERVED" ]] || fail ".status.observedScoreString never set"
|
|
[[ "$OBSERVED" == "$KV_VALUE" ]] \
|
|
|| fail "observedScoreString does not match KV value:\n status=$OBSERVED\n kv =$KV_VALUE"
|
|
|
|
###############################################################################
|
|
# phase 5 — delete CR, expect cleanup via finalizer
|
|
###############################################################################
|
|
log "phase 5: delete Deployment CR — finalizer should remove KV key"
|
|
kubectl -n "$DEPLOY_NS" delete deployment.iot.nationtech.io "$DEPLOY_NAME" --wait=true >/dev/null
|
|
|
|
log "wait for KV key removal"
|
|
for _ in $(seq 1 30); do
|
|
if ! natsbox kv get desired-state "$TARGET_DEVICE.$DEPLOY_NAME" --raw >/dev/null 2>&1; then
|
|
log "KV key gone"
|
|
break
|
|
fi
|
|
sleep 1
|
|
done
|
|
if natsbox kv get desired-state "$TARGET_DEVICE.$DEPLOY_NAME" --raw >/dev/null 2>&1; then
|
|
fail "KV key still present after CR delete"
|
|
fi
|
|
|
|
log "PASS"
|