Files
xserv/tools/sync-and-build.sh
Gahow Wang 49c7653222 tools: add llama.cpp comparison baseline + standard benchmark suite
Vendor llama.cpp as a submodule pinned to b9371 and add a one-click
benchmark driver that compares xserv against it on identical workloads:

- setup-llama-cpp.sh: network-optional CUDA build (SM120); convert-to-gguf.sh
  converts the same safetensors to BF16 GGUF for an apples-to-apples baseline.
- tools/bench/: black-box OpenAI-API driver measuring TTFT/TPOT/throughput
  (single-stream + concurrent) and response quality on AIME 2025 + GSM8K.
- fetch_datasets.py pulls datasets to local JSON (GPU host has no network);
  task loaders prefer the local JSON.
- sync-and-build.sh: `bench` subcommand transfers source + datasets to the
  GPU host via tar-over-ssh (no rsync there), builds, and runs the suite.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-28 11:18:52 +08:00

103 lines
4.1 KiB
Bash
Executable File

#!/bin/bash
# Sync local project to dash5 and build/test/bench there.
#
# Usage:
# ./tools/sync-and-build.sh [test|build|run]
# Runs `cargo <action> --release` on dash5.
#
# ./tools/sync-and-build.sh bench [-- <extra runner args>]
# Ensures llama.cpp is built (tools/setup-llama-cpp.sh) and a BF16 GGUF
# exists, then runs tools/bench/runner.py against both xserv-server and
# llama-server. Result lands in $REMOTE_DIR/bench-out/.
#
# ./tools/sync-and-build.sh fetch-bench-out
# Copies dash5:$REMOTE_DIR/bench-out/ back to ./bench-out/.
set -e
REMOTE="dash5"
REMOTE_DIR="/opt/wjh/projects/xserv"
REMOTE_MODEL_DIR="${REMOTE_MODEL_DIR:-/opt/wjh/models/qwen3-8b}"
LOCAL_DIR="$(cd "$(dirname "$0")/.." && pwd)"
ACTION="${1:-build}"
shift || true
cuda_env='if [ -d /usr/local/cuda-12.9 ]; then export CUDA_HOME=/usr/local/cuda-12.9; else export CUDA_HOME=/usr/local/cuda; fi && export PATH=$CUDA_HOME/bin:/usr/local/cuda/bin:$PATH'
sync_project() {
echo "=== Syncing to $REMOTE:$REMOTE_DIR ==="
# Preserve `target/`, `third_party/` (large + arch-specific) and `bench-out/`
# on the remote side. Everything else is wiped + replaced.
ssh "$REMOTE" "mkdir -p $REMOTE_DIR && find $REMOTE_DIR -mindepth 1 -maxdepth 1 ! -name target ! -name third_party ! -name bench-out -exec rm -rf {} +"
tar --exclude target --exclude third_party --exclude bench-out --exclude .git \
-C "$LOCAL_DIR" -czf - . \
| ssh "$REMOTE" "tar -xzf - -C $REMOTE_DIR"
}
sync_llama_src() {
# dash5 has no network (and no rsync), so we transfer the llama.cpp submodule
# working tree (source only — never the build dir or .git) via tar-over-ssh.
local src="$LOCAL_DIR/third_party/llama.cpp"
if [ ! -f "$src/CMakeLists.txt" ]; then
echo "ERROR: llama.cpp source not found at $src" >&2
echo " Run: git submodule update --init third_party/llama.cpp" >&2
exit 1
fi
echo "=== Syncing llama.cpp source to $REMOTE (tar) ==="
# Preserve the remote build/ dir; only refresh source files.
ssh "$REMOTE" "mkdir -p $REMOTE_DIR/third_party/llama.cpp"
tar --exclude build --exclude .git --exclude '*.gguf' \
-C "$src" -czf - . \
| ssh "$REMOTE" "tar -xzf - -C $REMOTE_DIR/third_party/llama.cpp"
}
case "$ACTION" in
test|build|run|check|clippy)
sync_project
echo "=== Running: cargo $ACTION ==="
ssh "$REMOTE" "source \$HOME/.cargo/env && $cuda_env && cd $REMOTE_DIR && cargo $ACTION --release 2>&1"
;;
bench)
sync_project
sync_llama_src
echo "=== Ensuring llama.cpp baseline is built ==="
ssh "$REMOTE" "source \$HOME/.cargo/env && $cuda_env && cd $REMOTE_DIR && \
./tools/setup-llama-cpp.sh 2>&1"
echo "=== Ensuring BF16 GGUF exists for $REMOTE_MODEL_DIR ==="
# Returned path on stdout's last line is what we feed --llama-gguf.
GGUF_PATH=$(ssh "$REMOTE" "$cuda_env && cd $REMOTE_DIR && \
./tools/convert-to-gguf.sh $REMOTE_MODEL_DIR 2>&1 | tail -1")
echo " gguf: $GGUF_PATH"
echo "=== Building xserv (release) ==="
ssh "$REMOTE" "source \$HOME/.cargo/env && $cuda_env && cd $REMOTE_DIR && \
cargo build --release 2>&1"
echo "=== Running benchmark suite ==="
ssh "$REMOTE" "$cuda_env && cd $REMOTE_DIR && \
python3 -m tools.bench.runner \
--xserv-bin ./target/release/xserv-server \
--xserv-model $REMOTE_MODEL_DIR \
--llama-bin third_party/llama.cpp/build/bin/llama-server \
--llama-gguf $GGUF_PATH \
$* 2>&1"
;;
fetch-bench-out)
mkdir -p "$LOCAL_DIR/bench-out"
echo "=== Fetching bench-out from $REMOTE:$REMOTE_DIR/bench-out (tar) ==="
ssh "$REMOTE" "tar -C $REMOTE_DIR/bench-out -czf - ." \
| tar -xzf - -C "$LOCAL_DIR/bench-out"
echo " -> $LOCAL_DIR/bench-out/"
;;
*)
echo "Unknown action: $ACTION" >&2
echo "Usage: $0 {build|test|run|check|clippy|bench|fetch-bench-out} [-- extra args]" >&2
exit 2
;;
esac