A/B x migration matrix runner (parameterized run_v3_trace.sh + wrapper)

This commit is contained in:
2026-05-28 19:23:16 +08:00
parent 63387f614d
commit 19191940e6
2 changed files with 39 additions and 3 deletions

View File

@@ -0,0 +1,33 @@
#!/usr/bin/env bash
# A/B x migration matrix on the 1200-req trace (sequential, ~47 min each).
# 1. unified (no A/B, no migration) anchor
# 2. unified + A+B (documented champion, no mig)
# 3. unified_v3 + A+B + layer-wise (champion + cheap mig)
# We already have: unified_v3 + layer-wise (no A/B) from the prior run.
#
# Q1 (migration benefit w/ layer-wise): #1 vs prior v3+layerwise(noAB)
# Q2 (does migration add to champion): #2 vs #3
set -uo pipefail
PROJ_DIR="${PROJ_DIR:-/home/admin/cpfs/wjh/agentic-kv}"
R="$PROJ_DIR/microbench/connector_tax/layerwise/run_v3_trace.sh"
AB="--overload-factor 1.3 --lmetric-decode-weight 0.01"
LOGD=/tmp/dst_break_logs; mkdir -p "$LOGD"
echo "########## 1/3 unified plain ##########"
TAG=unified_plain POLICY=unified MODE=baseline AB_FLAGS="" \
bash "$R" 2>&1 | tee "$LOGD/abmatrix_1_unified_plain.log" | tail -6
echo "########## 2/3 unified + A+B ##########"
TAG=unified_AB POLICY=unified MODE=baseline AB_FLAGS="$AB" \
bash "$R" 2>&1 | tee "$LOGD/abmatrix_2_unified_AB.log" | tail -6
echo "########## 3/3 unified_v3 + A+B + layer-wise ##########"
TAG=v3_AB_lw POLICY=unified_v3 MODE=layerwise AB_FLAGS="$AB" \
bash "$R" 2>&1 | tee "$LOGD/abmatrix_3_v3_AB_lw.log" | tail -6
echo "########## MATRIX DONE ##########"
for t in unified_plain unified_AB v3_AB_lw; do
D=$(ls -dt "$PROJ_DIR"/outputs/v3trace_${t}_*/unified_v3 2>/dev/null | head -1)
echo "=== $t ($D) ==="
sed -n '/\[stats\]/,/\[done\]/p' "$LOGD"/abmatrix_*_${t}.log 2>/dev/null | grep -E "requests:|TTFT|migrations:" || true
done

View File

@@ -11,12 +11,15 @@
set -uo pipefail set -uo pipefail
MODE="${MODE:-baseline}" MODE="${MODE:-baseline}"
POLICY="${POLICY:-unified_v3}"
AB_FLAGS="${AB_FLAGS:-}" # e.g. "--overload-factor 1.3 --lmetric-decode-weight 0.01"
TAG="${TAG:-$MODE}"
PROJ_DIR="${PROJ_DIR:-/home/admin/cpfs/wjh/agentic-kv}" PROJ_DIR="${PROJ_DIR:-/home/admin/cpfs/wjh/agentic-kv}"
VENV="$PROJ_DIR/.venv" VENV="$PROJ_DIR/.venv"
VLLM_ROOT="$VENV/lib/python3.12/site-packages/vllm" VLLM_ROOT="$VENV/lib/python3.12/site-packages/vllm"
TRACE="${TRACE:-$PROJ_DIR/traces/w600_r0.0015_st30.jsonl}" TRACE="${TRACE:-$PROJ_DIR/traces/w600_r0.0015_st30.jsonl}"
DATE="$(date +%Y%m%d_%H%M)" DATE="$(date +%Y%m%d_%H%M)"
OUTROOT="${OUTROOT:-$PROJ_DIR/outputs/v3trace_${MODE}_${DATE}}" OUTROOT="${OUTROOT:-$PROJ_DIR/outputs/v3trace_${TAG}_${DATE}}"
PYTHON="$VENV/bin/python" PYTHON="$VENV/bin/python"
DR_FIX="$PROJ_DIR/microbench/connector_tax/cache_sweep/apply_direct_read_fix.py" DR_FIX="$PROJ_DIR/microbench/connector_tax/cache_sweep/apply_direct_read_fix.py"
MC_FILE="$VLLM_ROOT/distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_connector.py" MC_FILE="$VLLM_ROOT/distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_connector.py"
@@ -61,8 +64,8 @@ echo "[DR-fix] apply"
"$PYTHON" "$DR_FIX" --apply --vllm-root "$VLLM_ROOT" "$PYTHON" "$DR_FIX" --apply --vllm-root "$VLLM_ROOT"
export VLLM_MOONCAKE_DISABLE_DIRECT_READ_SYNC=1 export VLLM_MOONCAKE_DISABLE_DIRECT_READ_SYNC=1
echo "[run] unified_v3 (MOONCAKE_LAYERWISE=${MOONCAKE_LAYERWISE:-0} EAR_WRITE_MODE=${EAR_WRITE_MODE:-0})" echo "[run] $POLICY AB=[$AB_FLAGS] (MOONCAKE_LAYERWISE=${MOONCAKE_LAYERWISE:-0} EAR_WRITE_MODE=${EAR_WRITE_MODE:-0})"
bash "$PROJ_DIR/scripts/b3_isolated_policy.sh" "unified_v3" "$TRACE" "$cfg_dir" \ EXTRA_PROXY_ARGS="$AB_FLAGS" bash "$PROJ_DIR/scripts/b3_isolated_policy.sh" "$POLICY" "$TRACE" "$cfg_dir" \
2>&1 | tee "$cfg_dir/orchestrator.log" | tail -20 2>&1 | tee "$cfg_dir/orchestrator.log" | tail -20
pkill -9 -f cache_aware_proxy 2>/dev/null || true pkill -9 -f cache_aware_proxy 2>/dev/null || true