bench: PP harness (xserv --pp vs llama.cpp -sm layer)
runner/servers: add --pp for both engines (xserv --pp N; llama.cpp -sm layer over N GPUs). New drivers: pp_final.sh (sequential latency + per-GPU VRAM + byte-exact correctness), pp_diag.sh (single x2 vs pp4 x2 determinism control), pp_quality_full.sh / pp_llama_47.sh (AIME+GSM8K matrix, xserv on 0-3 || llama on 4-7), summarize_pp/summarize_fullq, pp_time.py latency probe. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
25
tools/pp_llama_47.sh
Normal file
25
tools/pp_llama_47.sh
Normal file
@@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env bash
|
||||
# llama.cpp PP=1/2/4 quality (aime2025+gsm8k, 30 each) on physical GPUs 4-7,
|
||||
# parallel with the xserv matrix on 0-3. Pass --llama-devices so the runner pins
|
||||
# CUDA_VISIBLE_DEVICES to 4.. (it otherwise forces 0..N-1). Distinct port + dirs.
|
||||
set -u
|
||||
cd /opt/wjh/projects/xserv
|
||||
export PATH=$HOME/.cargo/bin:/usr/local/cuda-12.9/bin:$PATH
|
||||
export CUDA_HOME=/usr/local/cuda-12.9
|
||||
GGUF=/opt/wjh/models/qwen3-8b/qwen3-8b-bf16.gguf
|
||||
LBIN=third_party/llama.cpp/build/bin/llama-server
|
||||
PROG=bench-out/LLAMA47_PROGRESS.md
|
||||
: > "$PROG"; echo "# llama on GPU 4-7 — $(date)" >> "$PROG"
|
||||
for pp in 1 2 4; do
|
||||
dev=$(seq -s, 4 $((3+pp)))
|
||||
out=bench-out/fullq-llama-pp$pp; rm -rf "$out"
|
||||
echo "=== START llama pp=$pp dev=$dev $(date +%H:%M:%S) ===" >> "$PROG"
|
||||
pkill -9 -f "llama-server.*18181" 2>/dev/null; sleep 2
|
||||
python3 -u -m tools.bench.runner --systems llama.cpp --pp "$pp" --llama-devices "$dev" \
|
||||
--llama-bin "$LBIN" --llama-gguf "$GGUF" --llama-port 18181 \
|
||||
--suite quality --quality-tasks aime2025,gsm8k --quality-limit 30 \
|
||||
--max-batch 1 --max-seq-len 4096 --out-dir "$out" >/tmp/fql-$pp.log 2>&1
|
||||
echo "=== END llama pp=$pp rc=$? $(date +%H:%M:%S) $(ls $out/comparison-*.json 2>/dev/null|wc -l) json ===" >> "$PROG"
|
||||
done
|
||||
pkill -9 -f "llama-server.*18181" 2>/dev/null
|
||||
echo "LLAMA47_DONE" >> "$PROG"
|
||||
Reference in New Issue
Block a user