Vendor llama.cpp as a submodule pinned to b9371 and add a one-click benchmark driver that compares xserv against it on identical workloads: - setup-llama-cpp.sh: network-optional CUDA build (SM120); convert-to-gguf.sh converts the same safetensors to BF16 GGUF for an apples-to-apples baseline. - tools/bench/: black-box OpenAI-API driver measuring TTFT/TPOT/throughput (single-stream + concurrent) and response quality on AIME 2025 + GSM8K. - fetch_datasets.py pulls datasets to local JSON (GPU host has no network); task loaders prefer the local JSON. - sync-and-build.sh: `bench` subcommand transfers source + datasets to the GPU host via tar-over-ssh (no rsync there), builds, and runs the suite. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
103 lines
4.1 KiB
Bash
Executable File
103 lines
4.1 KiB
Bash
Executable File
#!/bin/bash
|
|
# Sync local project to dash5 and build/test/bench there.
|
|
#
|
|
# Usage:
|
|
# ./tools/sync-and-build.sh [test|build|run]
|
|
# Runs `cargo <action> --release` on dash5.
|
|
#
|
|
# ./tools/sync-and-build.sh bench [-- <extra runner args>]
|
|
# Ensures llama.cpp is built (tools/setup-llama-cpp.sh) and a BF16 GGUF
|
|
# exists, then runs tools/bench/runner.py against both xserv-server and
|
|
# llama-server. Result lands in $REMOTE_DIR/bench-out/.
|
|
#
|
|
# ./tools/sync-and-build.sh fetch-bench-out
|
|
# Copies dash5:$REMOTE_DIR/bench-out/ back to ./bench-out/.
|
|
|
|
set -e
|
|
|
|
REMOTE="dash5"
|
|
REMOTE_DIR="/opt/wjh/projects/xserv"
|
|
REMOTE_MODEL_DIR="${REMOTE_MODEL_DIR:-/opt/wjh/models/qwen3-8b}"
|
|
LOCAL_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
|
|
|
ACTION="${1:-build}"
|
|
shift || true
|
|
|
|
cuda_env='if [ -d /usr/local/cuda-12.9 ]; then export CUDA_HOME=/usr/local/cuda-12.9; else export CUDA_HOME=/usr/local/cuda; fi && export PATH=$CUDA_HOME/bin:/usr/local/cuda/bin:$PATH'
|
|
|
|
sync_project() {
|
|
echo "=== Syncing to $REMOTE:$REMOTE_DIR ==="
|
|
# Preserve `target/`, `third_party/` (large + arch-specific) and `bench-out/`
|
|
# on the remote side. Everything else is wiped + replaced.
|
|
ssh "$REMOTE" "mkdir -p $REMOTE_DIR && find $REMOTE_DIR -mindepth 1 -maxdepth 1 ! -name target ! -name third_party ! -name bench-out -exec rm -rf {} +"
|
|
tar --exclude target --exclude third_party --exclude bench-out --exclude .git \
|
|
-C "$LOCAL_DIR" -czf - . \
|
|
| ssh "$REMOTE" "tar -xzf - -C $REMOTE_DIR"
|
|
}
|
|
|
|
sync_llama_src() {
|
|
# dash5 has no network (and no rsync), so we transfer the llama.cpp submodule
|
|
# working tree (source only — never the build dir or .git) via tar-over-ssh.
|
|
local src="$LOCAL_DIR/third_party/llama.cpp"
|
|
if [ ! -f "$src/CMakeLists.txt" ]; then
|
|
echo "ERROR: llama.cpp source not found at $src" >&2
|
|
echo " Run: git submodule update --init third_party/llama.cpp" >&2
|
|
exit 1
|
|
fi
|
|
echo "=== Syncing llama.cpp source to $REMOTE (tar) ==="
|
|
# Preserve the remote build/ dir; only refresh source files.
|
|
ssh "$REMOTE" "mkdir -p $REMOTE_DIR/third_party/llama.cpp"
|
|
tar --exclude build --exclude .git --exclude '*.gguf' \
|
|
-C "$src" -czf - . \
|
|
| ssh "$REMOTE" "tar -xzf - -C $REMOTE_DIR/third_party/llama.cpp"
|
|
}
|
|
|
|
case "$ACTION" in
|
|
test|build|run|check|clippy)
|
|
sync_project
|
|
echo "=== Running: cargo $ACTION ==="
|
|
ssh "$REMOTE" "source \$HOME/.cargo/env && $cuda_env && cd $REMOTE_DIR && cargo $ACTION --release 2>&1"
|
|
;;
|
|
|
|
bench)
|
|
sync_project
|
|
sync_llama_src
|
|
echo "=== Ensuring llama.cpp baseline is built ==="
|
|
ssh "$REMOTE" "source \$HOME/.cargo/env && $cuda_env && cd $REMOTE_DIR && \
|
|
./tools/setup-llama-cpp.sh 2>&1"
|
|
|
|
echo "=== Ensuring BF16 GGUF exists for $REMOTE_MODEL_DIR ==="
|
|
# Returned path on stdout's last line is what we feed --llama-gguf.
|
|
GGUF_PATH=$(ssh "$REMOTE" "$cuda_env && cd $REMOTE_DIR && \
|
|
./tools/convert-to-gguf.sh $REMOTE_MODEL_DIR 2>&1 | tail -1")
|
|
echo " gguf: $GGUF_PATH"
|
|
|
|
echo "=== Building xserv (release) ==="
|
|
ssh "$REMOTE" "source \$HOME/.cargo/env && $cuda_env && cd $REMOTE_DIR && \
|
|
cargo build --release 2>&1"
|
|
|
|
echo "=== Running benchmark suite ==="
|
|
ssh "$REMOTE" "$cuda_env && cd $REMOTE_DIR && \
|
|
python3 -m tools.bench.runner \
|
|
--xserv-bin ./target/release/xserv-server \
|
|
--xserv-model $REMOTE_MODEL_DIR \
|
|
--llama-bin third_party/llama.cpp/build/bin/llama-server \
|
|
--llama-gguf $GGUF_PATH \
|
|
$* 2>&1"
|
|
;;
|
|
|
|
fetch-bench-out)
|
|
mkdir -p "$LOCAL_DIR/bench-out"
|
|
echo "=== Fetching bench-out from $REMOTE:$REMOTE_DIR/bench-out (tar) ==="
|
|
ssh "$REMOTE" "tar -C $REMOTE_DIR/bench-out -czf - ." \
|
|
| tar -xzf - -C "$LOCAL_DIR/bench-out"
|
|
echo " -> $LOCAL_DIR/bench-out/"
|
|
;;
|
|
|
|
*)
|
|
echo "Unknown action: $ACTION" >&2
|
|
echo "Usage: $0 {build|test|run|check|clippy|bench|fetch-bench-out} [-- extra args]" >&2
|
|
exit 2
|
|
;;
|
|
esac
|