Vendor llama.cpp as a submodule pinned to b9371 and add a one-click benchmark driver that compares xserv against it on identical workloads: - setup-llama-cpp.sh: network-optional CUDA build (SM120); convert-to-gguf.sh converts the same safetensors to BF16 GGUF for an apples-to-apples baseline. - tools/bench/: black-box OpenAI-API driver measuring TTFT/TPOT/throughput (single-stream + concurrent) and response quality on AIME 2025 + GSM8K. - fetch_datasets.py pulls datasets to local JSON (GPU host has no network); task loaders prefer the local JSON. - sync-and-build.sh: `bench` subcommand transfers source + datasets to the GPU host via tar-over-ssh (no rsync there), builds, and runs the suite. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
41 lines
1.1 KiB
Python
41 lines
1.1 KiB
Python
"""Pre-fetch quality-benchmark datasets into local JSON.
|
|
|
|
Run this on a machine WITH network (e.g. your laptop). The resulting
|
|
tools/bench/data/*.json files are then shipped to the GPU host (which has no
|
|
network) by the bench sync step.
|
|
|
|
Usage:
|
|
python3 -m tools.bench.fetch_datasets # all tasks
|
|
python3 -m tools.bench.fetch_datasets aime2025 # one task
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import sys
|
|
|
|
if __package__ in (None, ""):
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
|
|
|
from tools.bench.tasks import aime, gsm8k, save_local
|
|
|
|
FETCHERS = {
|
|
"aime2025": aime.load_remote,
|
|
"gsm8k": gsm8k.load_remote,
|
|
}
|
|
|
|
|
|
def main() -> None:
|
|
wanted = sys.argv[1:] or list(FETCHERS)
|
|
for name in wanted:
|
|
if name not in FETCHERS:
|
|
raise SystemExit(f"unknown task: {name} (have: {', '.join(FETCHERS)})")
|
|
print(f"[fetch] {name} ...")
|
|
records = FETCHERS[name]()
|
|
path = save_local(name, records)
|
|
print(f"[fetch] {name}: {len(records)} records -> {path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|