Files
Gahow Wang 67fcec7933 Unified-routing A+B ablation: decode-aware LMetric + v3 anti-hotspot
cache_aware_proxy: add lmetric_decode_weight (decode-load penalty in the
LMetric fallback score) and a v3 anti-hotspot recent-migration penalty
(effective_load = num_req + recent-migration count over a sliding window),
preventing back-to-back migration clustering. UNIFIED_ABLATION.md documents
the A (overload_factor=1.3) + B' (decode-weight, max(num_req,1)) + RaceFix
sweep: A+B'+RaceFix reaches TTFT p90 7770ms, beating v3 PD-sep migration by
~20%. Runners/analyzer for the b3 trace replay included.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-05-29 11:52:44 +08:00

67 lines
2.2 KiB
Bash
Executable File

#!/usr/bin/env bash
# Trace replay for the new unified_v3 (offload-decode) policy.
#
# Runs the same trace as run_b3_replay.sh on a single policy:
# unified_v3 — prefill on session-affinity host (uses prefix cache),
# decode migrated to a low-load target via Mooncake
# KV transfer (kv_role=kv_both). Session affinity rotates
# to decode_target after migration so next turn lands
# where the KV now lives.
#
# Applies CT_DR_FIX so the run uses the "best Mooncake state" we have
# today (post-e3a1d70 + DR sync skipped).
#
# Usage: bash run_v3_replay.sh
set -uo pipefail
PROJ_DIR="${PROJ_DIR:-/home/admin/cpfs/wjh/agentic-kv}"
TRACE="${TRACE:-$PROJ_DIR/traces/w600_r0.0015_st30.jsonl}"
DATE="$(date +%Y%m%d_%H%M)"
OUTROOT="${OUTROOT:-$PROJ_DIR/outputs/b3_v3_${DATE}}"
PYTHON="$PROJ_DIR/.venv/bin/python"
DR_FIX_SCRIPT="$PROJ_DIR/microbench/connector_tax/cache_sweep/apply_direct_read_fix.py"
VLLM_ROOT="${VLLM_ROOT:-$PROJ_DIR/.venv/lib/python3.12/site-packages/vllm}"
mkdir -p "$OUTROOT"
echo "=== unified_v3 (offload-decode) trace replay ==="
echo "Trace : $TRACE"
echo "Out : $OUTROOT"
echo ""
cleanup_all() {
pkill -9 -f cache_aware_proxy 2>/dev/null || true
pkill -9 -f "vllm serve" 2>/dev/null || true
pkill -9 -f "EngineCore" 2>/dev/null || true
sleep 5
"$PYTHON" "$DR_FIX_SCRIPT" --revert --vllm-root "$VLLM_ROOT" 2>/dev/null || true
}
trap cleanup_all EXIT
cleanup_all
echo "[stage 0] applying CT_DR_FIX (env-gated)"
"$PYTHON" "$DR_FIX_SCRIPT" --apply --vllm-root "$VLLM_ROOT"
cfg_dir="$OUTROOT/unified_v3"
mkdir -p "$cfg_dir"
# Activate the DR-fix env-gate (unified_v3 uses Mooncake kv_both)
export VLLM_MOONCAKE_DISABLE_DIRECT_READ_SYNC=1
echo ""
echo "====== unified_v3 ; DR_SYNC_DISABLED=1 ======"
bash "$PROJ_DIR/scripts/b3_isolated_policy.sh" "unified_v3" "$TRACE" "$cfg_dir" \
2>&1 | tee "$cfg_dir/orchestrator.log" | tail -30
pkill -9 -f cache_aware_proxy 2>/dev/null || true
pkill -9 -f "vllm serve" 2>/dev/null || true
pkill -9 -f "EngineCore" 2>/dev/null || true
sleep 5
echo ""
echo "[stage Z] reverting CT_DR_FIX"
"$PYTHON" "$DR_FIX_SCRIPT" --revert --vllm-root "$VLLM_ROOT"
echo ""
echo "Done. Artifacts: $OUTROOT/unified_v3/metrics.jsonl"