Files
agentic-kvc/microbench/connector_tax/cache_sweep/run_smoke_partial.sh
Gahow Wang 41a0c1c48f Migration correctness smoke tests: direct-read, partial-transfer, NIXL
Standalone smoke tests validating KV-migration correctness paths before
trace replay: full migrate-cache, partial-prefill transfer, and a
NIXL-connector variant, each with a runner.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-05-29 11:53:13 +08:00

69 lines
2.2 KiB
Bash

#!/usr/bin/env bash
# Smoke test for Mechanism B (partial KV transfer):
# Start 3 vLLM kv_both Mooncake instances on GPU 0,1,2:
# - inst_0 = src (port 8000, bp 8998)
# - inst_1 = dst_warm (port 8001, bp 8999) — will be pre-warmed
# - inst_2 = dst_cold (port 8002, bp 9000) — control, no cache
#
# Then run smoke_partial_transfer.py which migrates the same prompt
# to both warm and cold dst, comparing transfer cost.
set -uo pipefail
PROJ_DIR="${PROJ_DIR:-/home/admin/cpfs/wjh/agentic-kv}"
MODEL="${MODEL:-/home/admin/cpfs/wjh/models/Qwen/Qwen3-Coder-30B-A3B-Instruct}"
VENV="$PROJ_DIR/.venv/bin"
LOGS_DIR="${LOGS_DIR:-$PROJ_DIR/outputs/smoke_partial_$(date +%Y%m%d_%H%M%S)}"
mkdir -p "$LOGS_DIR"
cleanup() {
echo "[smoke-partial] cleaning up vLLM..."
pkill -9 -f "vllm serve" 2>/dev/null || true
pkill -9 -f "EngineCore" 2>/dev/null || true
sleep 2
}
trap cleanup EXIT
cleanup
echo "[smoke-partial] starting 3 vLLM kv_both Mooncake on GPU 0,1,2"
for i in 0 1 2; do
port=$((8000 + i))
bp=$((8998 + i))
master=$((29500 + i))
PYTHONHASHSEED=42 \
VLLM_MOONCAKE_BOOTSTRAP_PORT=$bp \
CUDA_VISIBLE_DEVICES=$i \
MASTER_PORT=$master \
nohup "$VENV/vllm" serve "$MODEL" \
--host 0.0.0.0 --port "$port" \
--tensor-parallel-size 1 \
--trust-remote-code --enable-prefix-caching \
--dtype auto --gpu-memory-utilization 0.9 \
--max-model-len 200000 \
--kv-transfer-config '{"kv_connector":"MooncakeConnector","kv_role":"kv_both"}' \
--enable-prompt-tokens-details \
> "$LOGS_DIR/vllm_inst_${i}_gpu${i}.log" 2>&1 &
disown
sleep 2
done
echo "[smoke-partial] waiting for health ..."
for port in 8000 8001 8002; do
tries=0
while ! curl -sf "http://127.0.0.1:$port/health" >/dev/null 2>&1; do
tries=$((tries+1))
if [ $tries -gt 240 ]; then echo "FATAL: $port"; exit 1; fi
sleep 2
done
echo " port=$port ready"
done
echo "[smoke-partial] running smoke_partial_transfer.py"
"$VENV/python" "$PROJ_DIR/microbench/connector_tax/cache_sweep/smoke_partial_transfer.py" \
${EXTRA_SMOKE_ARGS:-} \
2>&1 | tee "$LOGS_DIR/smoke_output.log"
ec=${PIPESTATUS[0]}
echo "[smoke-partial] exit=$ec, logs at $LOGS_DIR"
exit $ec