#!/usr/bin/env bash # Build the llama.cpp baseline (third_party/llama.cpp) with CUDA. # # Source is vendored as a git submodule pinned to a fixed tag (see .gitmodules # and the recorded gitlink commit). This script does NOT fetch from the network # by default — it expects the source to already be present, either via: # - `git submodule update --init` (on a host with network), or # - rsync/tar transfer (how it reaches dash5, which has no network). # # It only fetches as a convenience fallback when the source is missing AND # network is reachable. # # Idempotent. Safe to re-run. # # Usage: # tools/setup-llama-cpp.sh # build (configure if needed) # tools/setup-llama-cpp.sh --rebuild # wipe build dir, reconfigure, rebuild # # Env: # CUDA_ARCH CUDA architectures for cmake (default 120-real = RTX 5090 SM120) # CUDA_HOME CUDA toolkit root (auto-detected: /usr/local/cuda-12.9 then cuda) set -euo pipefail ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" VENDOR_DIR="$ROOT_DIR/third_party/llama.cpp" CUDA_ARCH="${CUDA_ARCH:-120-real}" REBUILD=0 for arg in "$@"; do case "$arg" in --rebuild) REBUILD=1 ;; --help|-h) grep -E '^#' "$0" | sed 's/^# \{0,1\}//'; exit 0 ;; esac done if [ -d /usr/local/cuda-12.9 ]; then export CUDA_HOME="${CUDA_HOME:-/usr/local/cuda-12.9}" elif [ -d /usr/local/cuda ]; then export CUDA_HOME="${CUDA_HOME:-/usr/local/cuda}" fi [ -n "${CUDA_HOME:-}" ] && export PATH="$CUDA_HOME/bin:$PATH" echo "=== llama.cpp build ===" echo " vendor dir : $VENDOR_DIR" echo " CUDA arch : $CUDA_ARCH" echo " CUDA_HOME : ${CUDA_HOME:-}" # --- Ensure source is present --- if [ ! -f "$VENDOR_DIR/CMakeLists.txt" ]; then echo "==> source missing at $VENDOR_DIR" if git -C "$ROOT_DIR" rev-parse --git-dir >/dev/null 2>&1 \ && timeout 8 git ls-remote https://github.com/ggerganov/llama.cpp HEAD >/dev/null 2>&1; then echo "==> network OK, initializing submodule" git -C "$ROOT_DIR" submodule update --init --recursive third_party/llama.cpp else echo "ERROR: llama.cpp source not present and network unavailable." >&2 echo " On a networked host run: git submodule update --init third_party/llama.cpp" >&2 echo " Then transfer the source here (the bench tooling does this via rsync)." >&2 exit 1 fi fi BUILD_DIR="$VENDOR_DIR/build" if [ "$REBUILD" -eq 1 ] && [ -d "$BUILD_DIR" ]; then echo "==> --rebuild: removing $BUILD_DIR" rm -rf "$BUILD_DIR" fi SERVER_BIN="$BUILD_DIR/bin/llama-server" if [ -x "$SERVER_BIN" ] && [ "$REBUILD" -eq 0 ]; then echo "==> already built: $SERVER_BIN (use --rebuild to force)" echo "$SERVER_BIN" exit 0 fi echo "==> cmake configure" cmake -S "$VENDOR_DIR" -B "$BUILD_DIR" \ -DGGML_CUDA=ON \ -DLLAMA_CURL=OFF \ -DLLAMA_BUILD_TESTS=OFF \ -DLLAMA_BUILD_EXAMPLES=OFF \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CUDA_ARCHITECTURES="$CUDA_ARCH" echo "==> build llama-server llama-cli (jobs: $(nproc))" cmake --build "$BUILD_DIR" --target llama-server llama-cli -j "$(nproc)" if [ ! -x "$SERVER_BIN" ]; then echo "ERROR: llama-server did not build at $SERVER_BIN" >&2 exit 1 fi echo "=== done ===" echo "$SERVER_BIN"