- Regenerate uv.lock after adding fastapi/uvicorn deps so uv sync --locked no longer fails - B3 scripts: default MODEL to $HOME/models/... matching documented convention and other launch scripts (repo has no models/ directory) - launch_elastic_p2p: append || true to each trap command so set -e doesn't abort cleanup when jobs -p is empty and EngineCore orphans remain
207 lines
7.4 KiB
Bash
Executable File
207 lines
7.4 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
# B3 routing sweep: 5 policies on 8x TP1 instances with full instrumentation.
|
||
#
|
||
# Policies:
|
||
# lmetric — cache-aware P_tokens × BS routing (main baseline)
|
||
# load_only — pure min-num_requests (B3 control: no cache)
|
||
# sticky — hard session affinity (B3 control: perfect locality)
|
||
# unified — hybrid affinity + LMetric fallback
|
||
# capped — lmetric on a per-session turn-capped trace
|
||
#
|
||
# Each policy run produces metrics.jsonl + breakdown.json + worker_state.json
|
||
# + run_window.json (start/end unix timestamps so the analyzer can slice the
|
||
# shared engine_*.jsonl by time).
|
||
|
||
set -euo pipefail
|
||
|
||
ROOT="${ROOT:-$(cd "$(dirname "$0")/.." && pwd)}"
|
||
VENV="$ROOT/.venv/bin"
|
||
MODEL="${MODEL:-$HOME/models/Qwen/Qwen3-Coder-30B-A3B-Instruct}"
|
||
TRACE="${TRACE:-$ROOT/traces/w600_r0.0015_st30.jsonl}"
|
||
OUTDIR="${OUTDIR:-$ROOT/outputs/b3_sweep_$(date +%Y%m%d_%H%M%S)}"
|
||
PROXY_PORT="${PROXY_PORT:-9300}"
|
||
BASE_PORT="${BASE_PORT:-8000}"
|
||
# Space-separated list of GPU indices to use, one vLLM instance per index.
|
||
# Override via GPU_INDICES="1 2 3 4 5 6 7" when GPU 0 holds ghost memory.
|
||
GPU_INDICES="${GPU_INDICES:-0 1 2 3 4 5 6 7}"
|
||
POLICIES="${POLICIES:-lmetric load_only sticky unified}"
|
||
MAX_TURNS_CAP="${MAX_TURNS_CAP:-8}"
|
||
EXTRA_VLLM_ARGS="${EXTRA_VLLM_ARGS:---enable-prompt-tokens-details}"
|
||
|
||
# Derive N_INSTANCES from GPU_INDICES
|
||
N_INSTANCES=$(echo $GPU_INDICES | wc -w)
|
||
|
||
mkdir -p "$OUTDIR/engine_state" "$OUTDIR/logs"
|
||
echo "[b3_sweep] OUTDIR=$OUTDIR"
|
||
|
||
cleanup() {
|
||
pkill -9 -f "vllm serve" 2>/dev/null || true
|
||
# vLLM spawns an EngineCore child whose process name is
|
||
# "VLLM::EngineCor" — pkill -f "vllm serve" misses it and leaves
|
||
# the GPU memory locked by a dead-but-tracked-by-driver context.
|
||
pkill -9 -f "EngineCore" 2>/dev/null || true
|
||
pkill -9 -f cache_aware_proxy 2>/dev/null || true
|
||
sleep 3
|
||
}
|
||
trap cleanup EXIT
|
||
|
||
# 1) Launch one vLLM per GPU index in GPU_INDICES; each emits engine_<i>.jsonl
|
||
launch_vllm() {
|
||
echo "[b3_sweep] launching $N_INSTANCES vLLM instances on GPUs $GPU_INDICES ..."
|
||
local i=0
|
||
for gpu in $GPU_INDICES; do
|
||
local port=$((BASE_PORT + i))
|
||
local master=$((29500 + i))
|
||
local log="$OUTDIR/logs/vllm_inst_${i}_gpu${gpu}.log"
|
||
AGENTIC_STEP_LOG_PATH="$OUTDIR/engine_state/engine_${i}.jsonl" \
|
||
AGENTIC_WORKER_ID="engine_${i}" \
|
||
CUDA_VISIBLE_DEVICES=$gpu \
|
||
MASTER_PORT=$master \
|
||
nohup "$VENV/vllm" serve "$MODEL" \
|
||
--host 0.0.0.0 --port "$port" \
|
||
--tensor-parallel-size 1 \
|
||
--trust-remote-code --enable-prefix-caching \
|
||
--dtype auto --gpu-memory-utilization 0.9 \
|
||
--max-model-len 200000 \
|
||
$EXTRA_VLLM_ARGS \
|
||
> "$log" 2>&1 &
|
||
disown
|
||
sleep 2
|
||
i=$((i + 1))
|
||
done
|
||
|
||
echo "[b3_sweep] waiting for vLLM health ..."
|
||
for i in $(seq 0 $((N_INSTANCES - 1))); do
|
||
local port=$((BASE_PORT + i))
|
||
local tries=0
|
||
while ! curl -sf "http://127.0.0.1:$port/health" >/dev/null 2>&1; do
|
||
tries=$((tries + 1))
|
||
if [ $tries -gt 90 ]; then
|
||
echo "[b3_sweep] FATAL: inst_$i (port $port) not healthy after 180s"
|
||
exit 1
|
||
fi
|
||
sleep 2
|
||
done
|
||
echo " inst_$i ready"
|
||
done
|
||
}
|
||
|
||
launch_proxy() {
|
||
local policy="$1"
|
||
local logfile="$2"
|
||
local combined_args=""
|
||
for i in $(seq 0 $((N_INSTANCES - 1))); do
|
||
combined_args="$combined_args http://127.0.0.1:$((BASE_PORT + i))"
|
||
done
|
||
nohup "$VENV/python" "$ROOT/scripts/cache_aware_proxy.py" \
|
||
--port "$PROXY_PORT" \
|
||
--combined $combined_args \
|
||
--policy "$policy" \
|
||
> "$logfile" 2>&1 &
|
||
disown
|
||
local tries=0
|
||
until curl -sf "http://127.0.0.1:$PROXY_PORT/stats" >/dev/null 2>&1; do
|
||
tries=$((tries + 1))
|
||
if [ $tries -gt 30 ]; then
|
||
echo "[b3_sweep] FATAL: proxy did not come up in 60s"
|
||
tail -30 "$logfile"
|
||
exit 1
|
||
fi
|
||
sleep 2
|
||
done
|
||
}
|
||
|
||
run_policy() {
|
||
local policy="$1"
|
||
local trace="$2"
|
||
local rundir="$OUTDIR/$policy"
|
||
mkdir -p "$rundir"
|
||
echo "[b3_sweep] === policy=$policy trace=$(basename "$trace") ==="
|
||
|
||
pkill -9 -f cache_aware_proxy 2>/dev/null || true
|
||
sleep 2
|
||
launch_proxy "$policy" "$rundir/proxy.log"
|
||
|
||
local t_start
|
||
t_start=$(date +%s.%N)
|
||
echo "{\"policy\": \"$policy\", \"trace\": \"$trace\", \"t_start_unix\": $t_start}" \
|
||
> "$rundir/run_window.json.partial"
|
||
|
||
PYTHONPATH="$ROOT" "$VENV/python" -m replayer \
|
||
--trace "$trace" \
|
||
--output "$rundir/metrics.jsonl" \
|
||
--endpoint "http://127.0.0.1:$PROXY_PORT" \
|
||
--model "$MODEL" \
|
||
2>&1 | tee "$rundir/replayer.log" | tail -3
|
||
|
||
local t_end
|
||
t_end=$(date +%s.%N)
|
||
python3 - "$rundir" "$policy" "$trace" "$t_start" "$t_end" <<'PY'
|
||
import json, sys
|
||
rundir, policy, trace, t_start, t_end = sys.argv[1:]
|
||
with open(f"{rundir}/run_window.json", "w") as f:
|
||
json.dump({
|
||
"policy": policy, "trace": trace,
|
||
"t_start_unix": float(t_start),
|
||
"t_end_unix": float(t_end),
|
||
}, f, indent=2)
|
||
PY
|
||
rm -f "$rundir/run_window.json.partial"
|
||
|
||
curl -s "http://127.0.0.1:$PROXY_PORT/breakdown" > "$rundir/breakdown.json"
|
||
curl -s "http://127.0.0.1:$PROXY_PORT/worker_state" > "$rundir/worker_state.json"
|
||
curl -s "http://127.0.0.1:$PROXY_PORT/stats" > "$rundir/stats.json"
|
||
echo "[b3_sweep] $policy done: $(wc -l < "$rundir/metrics.jsonl") metric rows"
|
||
}
|
||
|
||
# 2) Run each policy
|
||
launch_vllm
|
||
|
||
for policy in $POLICIES; do
|
||
run_policy "$policy" "$TRACE"
|
||
done
|
||
|
||
# 3) Capped variant: lmetric picker on a per-session turn-capped trace.
|
||
# The directory label is "capped" but the proxy must launch with
|
||
# --policy lmetric (the proxy's argparse has no "capped" choice).
|
||
echo "[b3_sweep] building capped trace (max_turns=$MAX_TURNS_CAP) ..."
|
||
CAPPED_TRACE="$OUTDIR/capped/trace.jsonl"
|
||
mkdir -p "$OUTDIR/capped"
|
||
"$VENV/python" "$ROOT/scripts/build_capped_trace.py" \
|
||
--input "$TRACE" \
|
||
--output "$CAPPED_TRACE" \
|
||
--max-turns "$MAX_TURNS_CAP" | tee "$OUTDIR/capped/build.log"
|
||
|
||
# Inline equivalent of run_policy "capped" but using --policy lmetric.
|
||
echo "[b3_sweep] === policy=capped (picker=lmetric) trace=$(basename "$CAPPED_TRACE") ==="
|
||
pkill -9 -f cache_aware_proxy 2>/dev/null || true
|
||
sleep 2
|
||
launch_proxy lmetric "$OUTDIR/capped/proxy.log"
|
||
t_cap_start=$(date +%s.%N)
|
||
PYTHONPATH="$ROOT" "$VENV/python" -m replayer \
|
||
--trace "$CAPPED_TRACE" \
|
||
--output "$OUTDIR/capped/metrics.jsonl" \
|
||
--endpoint "http://127.0.0.1:$PROXY_PORT" \
|
||
--model "$MODEL" \
|
||
2>&1 | tee "$OUTDIR/capped/replayer.log" | tail -3
|
||
t_cap_end=$(date +%s.%N)
|
||
python3 - "$OUTDIR/capped" capped "$CAPPED_TRACE" "$t_cap_start" "$t_cap_end" <<'PY'
|
||
import json, sys
|
||
rundir, policy, trace, t_start, t_end = sys.argv[1:]
|
||
with open(f"{rundir}/run_window.json", "w") as f:
|
||
json.dump({
|
||
"policy": policy, "trace": trace,
|
||
"t_start_unix": float(t_start),
|
||
"t_end_unix": float(t_end),
|
||
}, f, indent=2)
|
||
PY
|
||
curl -s "http://127.0.0.1:$PROXY_PORT/breakdown" > "$OUTDIR/capped/breakdown.json"
|
||
curl -s "http://127.0.0.1:$PROXY_PORT/worker_state" > "$OUTDIR/capped/worker_state.json"
|
||
curl -s "http://127.0.0.1:$PROXY_PORT/stats" > "$OUTDIR/capped/stats.json"
|
||
echo "[b3_sweep] capped done: $(wc -l < "$OUTDIR/capped/metrics.jsonl") metric rows"
|
||
|
||
# 4) Snapshot final engine state file sizes for the analyzer
|
||
ls -l "$OUTDIR/engine_state/" > "$OUTDIR/engine_state_files.txt"
|
||
|
||
echo "[b3_sweep] sweep complete. OUTDIR=$OUTDIR"
|