Files
speech-to-text/setup.sh

194 lines
7.5 KiB
Bash

#!/usr/bin/env bash
#
# setup.sh — one-time bootstrap for the parakeet ASR backend on Arch Linux.
#
# This installs/locates everything the parakeet Go ASR backend needs:
# - ONNX Runtime shared library (the only hard runtime dep)
# - ffmpeg (so non-WAV uploads: m4a/ogg/mp3/... get transcoded)
# - the `parakeet` Go server binary (achetronic/parakeet)
# - the Parakeet TDT 0.6B int8 ONNX model files
#
# It is idempotent: re-running skips anything already in place.
#
# Nothing here requires root except the package installs, which use sudo.
set -euo pipefail
PREFIX="${PREFIX:-$HOME/.local/share/speech-to-text}"
BIN_DIR="$PREFIX/bin"
MODELS_DIR="$PREFIX/models"
PARAKEET_VERSION="${PARAKEET_VERSION:-latest}"
# Where Handy keeps its downloaded models, by platform. We probe these so you
# can reuse the exact model Handy already pulled instead of re-downloading.
HANDY_MODEL_CANDIDATES=(
"$HOME/.local/share/com.pais.handy/models"
"$HOME/.local/share/handy/models"
"$HOME/.config/handy/models"
"$HOME/.cache/handy/models"
)
log() { printf '\033[1;36m==>\033[0m %s\n' "$*"; }
warn() { printf '\033[1;33m[warn]\033[0m %s\n' "$*"; }
die() { printf '\033[1;31m[err]\033[0m %s\n' "$*" >&2; exit 1; }
mkdir -p "$BIN_DIR" "$MODELS_DIR"
# ---------------------------------------------------------------------------
# 1. System packages: onnxruntime + ffmpeg
# ---------------------------------------------------------------------------
log "Checking system dependencies (onnxruntime, ffmpeg)…"
if ! command -v ffmpeg >/dev/null 2>&1; then
log "Installing ffmpeg via pacman…"
sudo pacman -S --needed --noconfirm ffmpeg
else
log "ffmpeg present: $(command -v ffmpeg)"
fi
# ONNX Runtime: prefer the system lib; otherwise drop a copy into PREFIX.
find_ort() {
for p in /usr/lib/libonnxruntime.so /usr/local/lib/libonnxruntime.so \
"$PREFIX/lib/libonnxruntime.so"; do
[ -e "$p" ] && { echo "$p"; return 0; }
done
ldconfig -p 2>/dev/null | awk '/libonnxruntime\.so/{print $NF; exit}'
}
ORT_LIB="$(find_ort || true)"
if [ -z "${ORT_LIB:-}" ]; then
if command -v yay >/dev/null 2>&1; then
log "Installing onnxruntime from AUR via yay…"
yay -S --needed --noconfirm onnxruntime || warn "yay install failed; falling back to manual download"
ORT_LIB="$(find_ort || true)"
fi
fi
if [ -z "${ORT_LIB:-}" ]; then
log "Downloading ONNX Runtime 1.17.0 into $PREFIX/lib …"
mkdir -p "$PREFIX/lib"
tmp="$(mktemp -d)"
arch="$(uname -m)"
case "$arch" in
x86_64) ort_pkg="onnxruntime-linux-x64-1.17.0" ;;
aarch64) ort_pkg="onnxruntime-linux-aarch64-1.17.0" ;;
*) die "Unsupported arch '$arch' for the prebuilt ONNX Runtime; install it via your package manager." ;;
esac
curl -fL -o "$tmp/ort.tgz" \
"https://github.com/microsoft/onnxruntime/releases/download/v1.17.0/${ort_pkg}.tgz"
tar -xzf "$tmp/ort.tgz" -C "$tmp"
cp "$tmp/$ort_pkg"/lib/* "$PREFIX/lib/"
rm -rf "$tmp"
ORT_LIB="$PREFIX/lib/libonnxruntime.so"
fi
log "ONNX Runtime: $ORT_LIB"
# ---------------------------------------------------------------------------
# 2. parakeet Go server binary
# ---------------------------------------------------------------------------
if [ ! -x "$BIN_DIR/parakeet" ]; then
log "Fetching parakeet server binary ($PARAKEET_VERSION)…"
arch="$(uname -m)"
case "$arch" in
x86_64) asset="parakeet-linux-amd64" ;;
aarch64) asset="parakeet-linux-arm64" ;;
*) die "No prebuilt parakeet binary for '$arch'. Build from source: https://github.com/achetronic/parakeet" ;;
esac
url="https://github.com/achetronic/parakeet/releases/${PARAKEET_VERSION}/download/${asset}"
[ "$PARAKEET_VERSION" = "latest" ] && \
url="https://github.com/achetronic/parakeet/releases/latest/download/${asset}"
curl -fL -o "$BIN_DIR/parakeet" "$url"
chmod +x "$BIN_DIR/parakeet"
else
log "parakeet binary already present: $BIN_DIR/parakeet"
fi
# ---------------------------------------------------------------------------
# 3. Models — reuse Handy's if compatible, else download the identical int8 set
# ---------------------------------------------------------------------------
# The Go server expects these filenames in MODELS_DIR:
REQUIRED=( config.json vocab.txt encoder-model.int8.onnx decoder_joint-model.int8.onnx )
models_complete() {
for f in "${REQUIRED[@]}"; do
[ -e "$MODELS_DIR/$f" ] || return 1
done
return 0
}
if models_complete; then
log "Models already in place at $MODELS_DIR"
else
# Try to reuse Handy's downloaded model by symlinking matching files.
reused=0
for cand in "${HANDY_MODEL_CANDIDATES[@]}"; do
[ -d "$cand" ] || continue
log "Found a Handy model directory: $cand"
log "Inspecting it for Parakeet ONNX files…"
ls -la "$cand" || true
# Heuristic match: encoder + decoder onnx + vocab. Handy's exact filenames
# may differ; we link any clear matches and report what's missing.
enc="$(find "$cand" -maxdepth 2 -iname '*encoder*int8*.onnx' | head -n1 || true)"
dec="$(find "$cand" -maxdepth 2 -iname '*decoder*joint*int8*.onnx' | head -n1 || true)"
voc="$(find "$cand" -maxdepth 2 -iname 'vocab*.txt' | head -n1 || true)"
cfg="$(find "$cand" -maxdepth 2 -iname 'config.json' | head -n1 || true)"
if [ -n "$enc" ] && [ -n "$dec" ] && [ -n "$voc" ]; then
ln -sf "$enc" "$MODELS_DIR/encoder-model.int8.onnx"
ln -sf "$dec" "$MODELS_DIR/decoder_joint-model.int8.onnx"
ln -sf "$voc" "$MODELS_DIR/vocab.txt"
[ -n "$cfg" ] && ln -sf "$cfg" "$MODELS_DIR/config.json"
reused=1
log "Linked Handy's Parakeet model into $MODELS_DIR"
break
else
warn "That Handy dir did not contain the expected int8 encoder/decoder/vocab trio."
fi
done
if [ "$reused" -eq 0 ] || ! models_complete; then
warn "Could not reuse Handy's model files (filenames differ or not found)."
log "Downloading the identical Parakeet TDT 0.6B v3 int8 ONNX model (~670MB)…"
# Same model, same source (istupakov ONNX conversion) the Go server's
# Makefile uses. Pulled from Hugging Face.
base="https://huggingface.co/istupakov/parakeet-tdt-0.6b-v3-onnx/resolve/main"
curl -fL -o "$MODELS_DIR/encoder-model.int8.onnx" "$base/encoder-model.int8.onnx"
curl -fL -o "$MODELS_DIR/decoder_joint-model.int8.onnx" "$base/decoder_joint-model.int8.onnx"
curl -fL -o "$MODELS_DIR/vocab.txt" "$base/vocab.txt"
curl -fL -o "$MODELS_DIR/config.json" "$base/config.json" || \
echo '{}' > "$MODELS_DIR/config.json"
fi
fi
models_complete || die "Model files are still incomplete in $MODELS_DIR; check the warnings above."
# ---------------------------------------------------------------------------
# 4. Write the resolved environment for the systemd units to source
# ---------------------------------------------------------------------------
ENV_FILE="$PREFIX/speech-to-text.env"
cat > "$ENV_FILE" <<EOF
# Generated by setup.sh on $(date -Iseconds). Edit to taste.
ONNXRUNTIME_LIB=$ORT_LIB
STT_PARAKEET_MODELS_DIR=$MODELS_DIR
STT_PARAKEET_BIN=$BIN_DIR/parakeet
STT_PARAKEET_PORT=5092
STT_BACKEND_URL=http://127.0.0.1:5092
# speech-to-text (UI) settings:
STT_HOST=127.0.0.1
STT_PORT=8080
STT_LANG=fr
STT_MAX_MB=25
# Backend tuning. Each worker holds ~670MB RAM for the int8 model.
STT_WORKERS=2
EOF
log "Wrote environment file: $ENV_FILE"
echo
log "Setup complete."
echo " Binary : $BIN_DIR/parakeet"
echo " Models : $MODELS_DIR"
echo " ORT : $ORT_LIB"
echo " Env : $ENV_FILE"
echo
echo "Next: install the systemd units (see README.md → 'Run it on boot')."