Files
Gahow Wang 67fcec7933 Unified-routing A+B ablation: decode-aware LMetric + v3 anti-hotspot
cache_aware_proxy: add lmetric_decode_weight (decode-load penalty in the
LMetric fallback score) and a v3 anti-hotspot recent-migration penalty
(effective_load = num_req + recent-migration count over a sliding window),
preventing back-to-back migration clustering. UNIFIED_ABLATION.md documents
the A (overload_factor=1.3) + B' (decode-weight, max(num_req,1)) + RaceFix
sweep: A+B'+RaceFix reaches TTFT p90 7770ms, beating v3 PD-sep migration by
~20%. Runners/analyzer for the b3 trace replay included.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-05-29 11:52:44 +08:00

95 lines
3.2 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# B3 routing-policy reproducibility re-test.
#
# Re-runs the 5 routing policies from fig_b3_latency_bars.png on the same
# trace, in a single same-day session, to check whether the ordering
# (unified < load_only < sticky etc.) still holds today.
#
# Policies (in run order):
# lmetric plain — cache-aware P_tokens × BS
# load_only plain — pure min-num_requests
# sticky plain — hard session affinity
# unified plain — hybrid affinity + LMetric fallback
# unified_v2 Mooncake kv_both + selective PD-sep (with DR-fix applied)
#
# unified_v2 is run with VLLM_MOONCAKE_DISABLE_DIRECT_READ_SYNC=1 so we
# get the "best Mooncake state" we have today (DR-fix on top of the
# already-fixed mainline after e3a1d70 etc.). The other 4 policies don't
# load any connector so the patch is irrelevant.
set -uo pipefail
PROJ_DIR="${PROJ_DIR:-/home/admin/cpfs/wjh/agentic-kv}"
TRACE="${TRACE:-$PROJ_DIR/traces/w600_r0.0015_st30.jsonl}"
DATE="$(date +%Y%m%d_%H%M)"
OUTROOT="${OUTROOT:-$PROJ_DIR/outputs/b3_replay_${DATE}}"
PYTHON="$PROJ_DIR/.venv/bin/python"
DR_FIX_SCRIPT="$PROJ_DIR/microbench/connector_tax/cache_sweep/apply_direct_read_fix.py"
VLLM_ROOT="${VLLM_ROOT:-$PROJ_DIR/.venv/lib/python3.12/site-packages/vllm}"
mkdir -p "$OUTROOT"
echo "=== B3 5-policy re-test ==="
echo "Trace : $TRACE"
echo "Out : $OUTROOT"
echo "Order : lmetric → load_only → sticky → unified → unified_v2 (DR-fix on)"
echo ""
cleanup_all() {
pkill -9 -f cache_aware_proxy 2>/dev/null || true
pkill -9 -f "vllm serve" 2>/dev/null || true
pkill -9 -f "EngineCore" 2>/dev/null || true
sleep 5
"$PYTHON" "$DR_FIX_SCRIPT" --revert --vllm-root "$VLLM_ROOT" 2>/dev/null || true
}
trap cleanup_all EXIT
cleanup_all
# Apply DR-fix once — it's env-gated so only unified_v2 (with env=1) sees it
echo "[stage 0] applying CT_DR_FIX (env-gated, only activates when VLLM_MOONCAKE_DISABLE_DIRECT_READ_SYNC=1)"
"$PYTHON" "$DR_FIX_SCRIPT" --apply --vllm-root "$VLLM_ROOT"
run_policy() {
local policy="$1"
local skip_dr="$2"
local rundir="$OUTROOT/$policy"
mkdir -p "$rundir"
echo ""
echo "====== $policy ; DR_SYNC_DISABLED=$skip_dr ======"
if [ "$skip_dr" = "1" ]; then
export VLLM_MOONCAKE_DISABLE_DIRECT_READ_SYNC=1
else
unset VLLM_MOONCAKE_DISABLE_DIRECT_READ_SYNC
fi
bash "$PROJ_DIR/scripts/b3_isolated_policy.sh" "$policy" "$TRACE" "$rundir" \
2>&1 | tee "$rundir/orchestrator.log" | tail -30
rc="${PIPESTATUS[0]}"
if [ "$rc" != "0" ]; then
echo "[FAIL] policy $policy rc=$rc"
fi
# Belt-and-braces cleanup between policies
pkill -9 -f cache_aware_proxy 2>/dev/null || true
pkill -9 -f "vllm serve" 2>/dev/null || true
pkill -9 -f "EngineCore" 2>/dev/null || true
sleep 10
return 0
}
run_policy "lmetric" "0"
run_policy "load_only" "0"
run_policy "sticky" "0"
run_policy "unified" "0"
run_policy "unified_v2" "1" # uses Mooncake kv_both; activate DR-fix
echo ""
echo "[stage Z] reverting CT_DR_FIX"
"$PYTHON" "$DR_FIX_SCRIPT" --revert --vllm-root "$VLLM_ROOT"
echo ""
echo "Done. Artifacts: $OUTROOT"
for p in lmetric load_only sticky unified unified_v2; do
echo " $p: $OUTROOT/$p/metrics.jsonl"
done