xserv/tools/setup-llama-cpp.sh

#!/usr/bin/env bash
# Build the llama.cpp baseline (third_party/llama.cpp) with CUDA.
#
# Source is vendored as a git submodule pinned to a fixed tag (see .gitmodules
# and the recorded gitlink commit). This script does NOT fetch from the network
# by default — it expects the source to already be present, either via:
#   - `git submodule update --init` (on a host with network), or
#   - rsync/tar transfer (how it reaches dash5, which has no network).
#
# It only fetches as a convenience fallback when the source is missing AND
# network is reachable.
#
# Idempotent. Safe to re-run.
#
# Usage:
#   tools/setup-llama-cpp.sh            # build (configure if needed)
#   tools/setup-llama-cpp.sh --rebuild  # wipe build dir, reconfigure, rebuild
#
# Env:
#   CUDA_ARCH   CUDA architectures for cmake (default 120-real = RTX 5090 SM120)
#   CUDA_HOME   CUDA toolkit root (auto-detected: /usr/local/cuda-12.9 then cuda)

set -euo pipefail

ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
VENDOR_DIR="$ROOT_DIR/third_party/llama.cpp"
CUDA_ARCH="${CUDA_ARCH:-120-real}"
REBUILD=0
for arg in "$@"; do
    case "$arg" in
        --rebuild) REBUILD=1 ;;
        --help|-h) grep -E '^#' "$0" | sed 's/^# \{0,1\}//'; exit 0 ;;
    esac
done

if [ -d /usr/local/cuda-12.9 ]; then
    export CUDA_HOME="${CUDA_HOME:-/usr/local/cuda-12.9}"
elif [ -d /usr/local/cuda ]; then
    export CUDA_HOME="${CUDA_HOME:-/usr/local/cuda}"
fi
[ -n "${CUDA_HOME:-}" ] && export PATH="$CUDA_HOME/bin:$PATH"

echo "=== llama.cpp build ==="
echo "  vendor dir : $VENDOR_DIR"
echo "  CUDA arch  : $CUDA_ARCH"
echo "  CUDA_HOME  : ${CUDA_HOME:-<not set>}"

# --- Ensure source is present ---
if [ ! -f "$VENDOR_DIR/CMakeLists.txt" ]; then
    echo "==> source missing at $VENDOR_DIR"
    if git -C "$ROOT_DIR" rev-parse --git-dir >/dev/null 2>&1 \
        && timeout 8 git ls-remote https://github.com/ggerganov/llama.cpp HEAD >/dev/null 2>&1; then
        echo "==> network OK, initializing submodule"
        git -C "$ROOT_DIR" submodule update --init --recursive third_party/llama.cpp
    else
        echo "ERROR: llama.cpp source not present and network unavailable." >&2
        echo "  On a networked host run: git submodule update --init third_party/llama.cpp" >&2
        echo "  Then transfer the source here (the bench tooling does this via rsync)." >&2
        exit 1
    fi
fi

BUILD_DIR="$VENDOR_DIR/build"
if [ "$REBUILD" -eq 1 ] && [ -d "$BUILD_DIR" ]; then
    echo "==> --rebuild: removing $BUILD_DIR"
    rm -rf "$BUILD_DIR"
fi

SERVER_BIN="$BUILD_DIR/bin/llama-server"
if [ -x "$SERVER_BIN" ] && [ "$REBUILD" -eq 0 ]; then
    echo "==> already built: $SERVER_BIN (use --rebuild to force)"
    echo "$SERVER_BIN"
    exit 0
fi

echo "==> cmake configure"
cmake -S "$VENDOR_DIR" -B "$BUILD_DIR" \
    -DGGML_CUDA=ON \
    -DLLAMA_CURL=OFF \
    -DLLAMA_BUILD_TESTS=OFF \
    -DLLAMA_BUILD_EXAMPLES=OFF \
    -DCMAKE_BUILD_TYPE=Release \
    -DCMAKE_CUDA_ARCHITECTURES="$CUDA_ARCH"

echo "==> build llama-server llama-cli (jobs: $(nproc))"
cmake --build "$BUILD_DIR" --target llama-server llama-cli -j "$(nproc)"

if [ ! -x "$SERVER_BIN" ]; then
    echo "ERROR: llama-server did not build at $SERVER_BIN" >&2
    exit 1
fi

echo "=== done ==="
echo "$SERVER_BIN"