Files
xserv/tools/bench/run_tp_parallel.sh
Gahow Wang a4a171d425 bench: TP sweep harness (xserv --tp, llama row-split, concurrent groups)
runner/servers gain --tp (xserv --tp N; llama.cpp --split-mode row) and
--llama-devices so llama can run on a disjoint GPU group. run_tp_parallel.sh
runs xserv (GPU 0..N-1) and llama.cpp (GPU 4..4+N-1) concurrently per TP,
matching the box's 0-3 / 4-7 PHB groups. summarize_tp.py tabulates the sweep.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-29 11:10:43 +08:00

39 lines
1.6 KiB
Bash

#!/usr/bin/env bash
# Run the TP=1/2/4 quality sweep with xserv and llama.cpp CONCURRENTLY on
# disjoint GPU groups: xserv on GPUs 0..N-1, llama.cpp on GPUs 4..4+N-1.
# The 8x5090 box is grouped 0-3 / 4-7 (PHB intra-group), so each engine's TP
# comm stays intra-group and the two engines never contend for a GPU.
#
# Run from the repo root on the GPU host. Produces bench-out/tp{1,2,4}-{xserv,llama}.
set -u
MODEL="${MODEL:-/opt/wjh/models/qwen3-8b}"
GGUF="${GGUF:-/opt/wjh/models/qwen3-8b/qwen3-8b-bf16.gguf}"
LIMIT="${LIMIT:-30}"
MAXSEQ="${MAXSEQ:-2048}"
TPS="${TPS:-1 2 4}"
for TP in $TPS; do
LD=$(seq -s, 4 $((3 + TP))) # llama GPUs: 4 / 4,5 / 4,5,6,7
echo "##### TP=$TP (xserv GPU 0..$((TP-1)) || llama GPU $LD) #####"
rm -rf "bench-out/tp$TP-xserv" "bench-out/tp$TP-llama"
python3 -u -m tools.bench.runner --systems xserv --tp "$TP" \
--xserv-bin ./target/release/xserv-server --xserv-model "$MODEL" \
--suite quality --quality-tasks aime2025,gsm8k --quality-limit "$LIMIT" \
--max-batch 1 --max-seq-len "$MAXSEQ" \
--out-dir "bench-out/tp$TP-xserv" > "/tmp/tp$TP-xserv.log" 2>&1 &
XP=$!
python3 -u -m tools.bench.runner --systems llama.cpp --tp "$TP" --llama-devices "$LD" \
--llama-bin third_party/llama.cpp/build/bin/llama-server --llama-gguf "$GGUF" \
--suite quality --quality-tasks aime2025,gsm8k --quality-limit "$LIMIT" \
--max-batch 1 --max-seq-len "$MAXSEQ" \
--out-dir "bench-out/tp$TP-llama" > "/tmp/tp$TP-llama.log" 2>&1 &
LP=$!
wait "$XP" "$LP"
echo "TP=$TP done (xserv exit=$? )"
done
echo ALL_DONE