cache_aware_proxy: add lmetric_decode_weight (decode-load penalty in the LMetric fallback score) and a v3 anti-hotspot recent-migration penalty (effective_load = num_req + recent-migration count over a sliding window), preventing back-to-back migration clustering. UNIFIED_ABLATION.md documents the A (overload_factor=1.3) + B' (decode-weight, max(num_req,1)) + RaceFix sweep: A+B'+RaceFix reaches TTFT p90 7770ms, beating v3 PD-sep migration by ~20%. Runners/analyzer for the b3 trace replay included. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
95 lines
3.2 KiB
Bash
Executable File
95 lines
3.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
# B3 routing-policy reproducibility re-test.
|
||
#
|
||
# Re-runs the 5 routing policies from fig_b3_latency_bars.png on the same
|
||
# trace, in a single same-day session, to check whether the ordering
|
||
# (unified < load_only < sticky etc.) still holds today.
|
||
#
|
||
# Policies (in run order):
|
||
# lmetric plain — cache-aware P_tokens × BS
|
||
# load_only plain — pure min-num_requests
|
||
# sticky plain — hard session affinity
|
||
# unified plain — hybrid affinity + LMetric fallback
|
||
# unified_v2 Mooncake kv_both + selective PD-sep (with DR-fix applied)
|
||
#
|
||
# unified_v2 is run with VLLM_MOONCAKE_DISABLE_DIRECT_READ_SYNC=1 so we
|
||
# get the "best Mooncake state" we have today (DR-fix on top of the
|
||
# already-fixed mainline after e3a1d70 etc.). The other 4 policies don't
|
||
# load any connector so the patch is irrelevant.
|
||
|
||
set -uo pipefail
|
||
|
||
PROJ_DIR="${PROJ_DIR:-/home/admin/cpfs/wjh/agentic-kv}"
|
||
TRACE="${TRACE:-$PROJ_DIR/traces/w600_r0.0015_st30.jsonl}"
|
||
DATE="$(date +%Y%m%d_%H%M)"
|
||
OUTROOT="${OUTROOT:-$PROJ_DIR/outputs/b3_replay_${DATE}}"
|
||
PYTHON="$PROJ_DIR/.venv/bin/python"
|
||
DR_FIX_SCRIPT="$PROJ_DIR/microbench/connector_tax/cache_sweep/apply_direct_read_fix.py"
|
||
VLLM_ROOT="${VLLM_ROOT:-$PROJ_DIR/.venv/lib/python3.12/site-packages/vllm}"
|
||
|
||
mkdir -p "$OUTROOT"
|
||
echo "=== B3 5-policy re-test ==="
|
||
echo "Trace : $TRACE"
|
||
echo "Out : $OUTROOT"
|
||
echo "Order : lmetric → load_only → sticky → unified → unified_v2 (DR-fix on)"
|
||
echo ""
|
||
|
||
cleanup_all() {
|
||
pkill -9 -f cache_aware_proxy 2>/dev/null || true
|
||
pkill -9 -f "vllm serve" 2>/dev/null || true
|
||
pkill -9 -f "EngineCore" 2>/dev/null || true
|
||
sleep 5
|
||
"$PYTHON" "$DR_FIX_SCRIPT" --revert --vllm-root "$VLLM_ROOT" 2>/dev/null || true
|
||
}
|
||
trap cleanup_all EXIT
|
||
cleanup_all
|
||
|
||
# Apply DR-fix once — it's env-gated so only unified_v2 (with env=1) sees it
|
||
echo "[stage 0] applying CT_DR_FIX (env-gated, only activates when VLLM_MOONCAKE_DISABLE_DIRECT_READ_SYNC=1)"
|
||
"$PYTHON" "$DR_FIX_SCRIPT" --apply --vllm-root "$VLLM_ROOT"
|
||
|
||
run_policy() {
|
||
local policy="$1"
|
||
local skip_dr="$2"
|
||
local rundir="$OUTROOT/$policy"
|
||
mkdir -p "$rundir"
|
||
|
||
echo ""
|
||
echo "====== $policy ; DR_SYNC_DISABLED=$skip_dr ======"
|
||
|
||
if [ "$skip_dr" = "1" ]; then
|
||
export VLLM_MOONCAKE_DISABLE_DIRECT_READ_SYNC=1
|
||
else
|
||
unset VLLM_MOONCAKE_DISABLE_DIRECT_READ_SYNC
|
||
fi
|
||
|
||
bash "$PROJ_DIR/scripts/b3_isolated_policy.sh" "$policy" "$TRACE" "$rundir" \
|
||
2>&1 | tee "$rundir/orchestrator.log" | tail -30
|
||
rc="${PIPESTATUS[0]}"
|
||
if [ "$rc" != "0" ]; then
|
||
echo "[FAIL] policy $policy rc=$rc"
|
||
fi
|
||
# Belt-and-braces cleanup between policies
|
||
pkill -9 -f cache_aware_proxy 2>/dev/null || true
|
||
pkill -9 -f "vllm serve" 2>/dev/null || true
|
||
pkill -9 -f "EngineCore" 2>/dev/null || true
|
||
sleep 10
|
||
return 0
|
||
}
|
||
|
||
run_policy "lmetric" "0"
|
||
run_policy "load_only" "0"
|
||
run_policy "sticky" "0"
|
||
run_policy "unified" "0"
|
||
run_policy "unified_v2" "1" # uses Mooncake kv_both; activate DR-fix
|
||
|
||
echo ""
|
||
echo "[stage Z] reverting CT_DR_FIX"
|
||
"$PYTHON" "$DR_FIX_SCRIPT" --revert --vllm-root "$VLLM_ROOT"
|
||
|
||
echo ""
|
||
echo "Done. Artifacts: $OUTROOT"
|
||
for p in lmetric load_only sticky unified unified_v2; do
|
||
echo " $p: $OUTROOT/$p/metrics.jsonl"
|
||
done
|