Standalone smoke tests validating KV-migration correctness paths before trace replay: full migrate-cache, partial-prefill transfer, and a NIXL-connector variant, each with a runner. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
75 lines
2.4 KiB
Bash
75 lines
2.4 KiB
Bash
#!/usr/bin/env bash
|
|
# Single vLLM warmup, multiple smoke-test iterations under varying load.
|
|
#
|
|
# Each iteration uses a distinct --prefix-base to avoid prefix-cache pollution
|
|
# from prior iterations. We sweep noise levels 0, 8, 32, 64 to see at which
|
|
# point the migration cache becomes invisible to the follow-up.
|
|
|
|
set -uo pipefail
|
|
|
|
PROJ_DIR="${PROJ_DIR:-/home/admin/cpfs/wjh/agentic-kv}"
|
|
MODEL="${MODEL:-/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct}"
|
|
VENV="$PROJ_DIR/.venv/bin"
|
|
LOGS_DIR="${LOGS_DIR:-$PROJ_DIR/outputs/smoke_sweep_$(date +%Y%m%d_%H%M%S)}"
|
|
mkdir -p "$LOGS_DIR"
|
|
|
|
cleanup() {
|
|
echo "[sweep] cleaning up vLLM..."
|
|
pkill -9 -f "vllm serve" 2>/dev/null || true
|
|
pkill -9 -f "EngineCore" 2>/dev/null || true
|
|
sleep 2
|
|
}
|
|
trap cleanup EXIT
|
|
cleanup
|
|
|
|
echo "[sweep] starting 2 vLLM kv_both on GPU 0,1"
|
|
for i in 0 1; do
|
|
port=$((8000 + i))
|
|
bp=$((8998 + i))
|
|
master=$((29500 + i))
|
|
PYTHONHASHSEED=42 \
|
|
VLLM_MOONCAKE_BOOTSTRAP_PORT=$bp \
|
|
CUDA_VISIBLE_DEVICES=$i \
|
|
MASTER_PORT=$master \
|
|
nohup "$VENV/vllm" serve "$MODEL" \
|
|
--host 0.0.0.0 --port "$port" \
|
|
--tensor-parallel-size 1 \
|
|
--trust-remote-code --enable-prefix-caching \
|
|
--dtype auto --gpu-memory-utilization 0.9 \
|
|
--max-model-len 200000 \
|
|
--kv-transfer-config '{"kv_connector":"MooncakeConnector","kv_role":"kv_both"}' \
|
|
--enable-prompt-tokens-details \
|
|
> "$LOGS_DIR/vllm_inst_${i}_gpu${i}.log" 2>&1 &
|
|
disown
|
|
sleep 2
|
|
done
|
|
|
|
echo "[sweep] waiting for health ..."
|
|
for port in 8000 8001; do
|
|
tries=0
|
|
while ! curl -sf "http://127.0.0.1:$port/health" >/dev/null 2>&1; do
|
|
tries=$((tries+1))
|
|
if [ $tries -gt 180 ]; then echo "[sweep] FATAL: $port not ready"; exit 1; fi
|
|
sleep 2
|
|
done
|
|
echo " port=$port ready"
|
|
done
|
|
|
|
base=100
|
|
for noise in 0 8 32 64 128; do
|
|
echo ""
|
|
echo "============================================"
|
|
echo "[sweep] iteration noise=$noise prefix_base=$base"
|
|
echo "============================================"
|
|
"$VENV/python" "$PROJ_DIR/microbench/connector_tax/cache_sweep/smoke_test_migrate_cache.py" \
|
|
--src-port 8000 --dst-port 8001 \
|
|
--src-bp 8998 --dst-bp 8999 \
|
|
--noise-reqs "$noise" \
|
|
--prefix-base "$base" \
|
|
2>&1 | tee "$LOGS_DIR/iter_noise${noise}.log" | tail -25
|
|
base=$((base + 100000))
|
|
done
|
|
|
|
echo ""
|
|
echo "[sweep] all iterations done; logs in $LOGS_DIR"
|