97 lines
4.2 KiB
Bash
Executable File
97 lines
4.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Full 1200-req v3 trace, two modes (MODE env), for layer-wise re-profile.
|
|
# MODE=baseline : stock connector + stock proxy (post-hoc transfer)
|
|
# MODE=layerwise : LAYERWISE connector + write-mode proxy (overlapped)
|
|
# Both: unified_v3 routing + DR-fix. Connector & proxy restored from backup
|
|
# on exit. Output-equivalence/correctness gate = success rate + migrated-req
|
|
# TTFT distribution (byte-level KV correctness already validated on mb7).
|
|
#
|
|
# Usage (on dash0): MODE=baseline bash run_v3_trace.sh
|
|
# MODE=layerwise bash run_v3_trace.sh
|
|
|
|
set -uo pipefail
|
|
MODE="${MODE:-baseline}"
|
|
POLICY="${POLICY:-unified_v3}"
|
|
AB_FLAGS="${AB_FLAGS:-}" # e.g. "--overload-factor 1.3 --lmetric-decode-weight 0.01"
|
|
TAG="${TAG:-$MODE}"
|
|
PROJ_DIR="${PROJ_DIR:-/home/admin/cpfs/wjh/agentic-kv}"
|
|
VENV="$PROJ_DIR/.venv"
|
|
VLLM_ROOT="$VENV/lib/python3.12/site-packages/vllm"
|
|
TRACE="${TRACE:-$PROJ_DIR/traces/w600_r0.0015_st30.jsonl}"
|
|
DATE="$(date +%Y%m%d_%H%M)"
|
|
OUTROOT="${OUTROOT:-$PROJ_DIR/outputs/v3trace_${TAG}_${DATE}}"
|
|
PYTHON="$VENV/bin/python"
|
|
DR_FIX="$PROJ_DIR/microbench/connector_tax/cache_sweep/apply_direct_read_fix.py"
|
|
MC_FILE="$VLLM_ROOT/distributed/kv_transfer/kv_connector/v1/mooncake/mooncake_connector.py"
|
|
PROXY_FILE="$PROJ_DIR/scripts/cache_aware_proxy.py"
|
|
LW_CONN="${LW_CONN:-/tmp/mooncake_connector.LAYERWISE.py}"
|
|
WM_PROXY="${WM_PROXY:-/tmp/cache_aware_proxy.WRITEMODE.py}"
|
|
|
|
mkdir -p "$OUTROOT"
|
|
cfg_dir="$OUTROOT/unified_v3"; mkdir -p "$cfg_dir"
|
|
|
|
# Backups (connector backup already exists as .ORIG_BACKUP; make proxy one).
|
|
[ -f "$MC_FILE.ORIG_BACKUP" ] || cp "$MC_FILE" "$MC_FILE.ORIG_BACKUP"
|
|
[ -f "$PROXY_FILE.ORIG_BACKUP" ] || cp "$PROXY_FILE" "$PROXY_FILE.ORIG_BACKUP"
|
|
|
|
restore() {
|
|
cp -f "$MC_FILE.ORIG_BACKUP" "$MC_FILE"
|
|
cp -f "$PROXY_FILE.ORIG_BACKUP" "$PROXY_FILE"
|
|
"$PYTHON" "$DR_FIX" --revert --vllm-root "$VLLM_ROOT" 2>/dev/null || true
|
|
echo "[restore] connector+proxy reset to ORIG, DR-fix reverted"
|
|
}
|
|
cleanup() {
|
|
pkill -9 -f cache_aware_proxy 2>/dev/null || true
|
|
pkill -9 -f "vllm serve" 2>/dev/null || true
|
|
pkill -9 -f "EngineCore" 2>/dev/null || true
|
|
sleep 5
|
|
restore
|
|
}
|
|
trap cleanup EXIT
|
|
pkill -9 -f "vllm serve" 2>/dev/null || true; sleep 3
|
|
restore # start from clean
|
|
|
|
echo "=== v3 trace ($MODE) -> $OUTROOT ==="
|
|
if [ "$MODE" = "layerwise" ]; then
|
|
cp -f "$LW_CONN" "$MC_FILE"
|
|
cp -f "$WM_PROXY" "$PROXY_FILE"
|
|
"$PYTHON" -c "import ast; ast.parse(open('$MC_FILE').read()); ast.parse(open('$PROXY_FILE').read()); print('[deploy] LAYERWISE conn + WRITEMODE proxy AST OK')" || exit 1
|
|
export MOONCAKE_LAYERWISE=1
|
|
export EAR_WRITE_MODE=1
|
|
fi
|
|
|
|
echo "[DR-fix] apply"
|
|
"$PYTHON" "$DR_FIX" --apply --vllm-root "$VLLM_ROOT"
|
|
export VLLM_MOONCAKE_DISABLE_DIRECT_READ_SYNC=1
|
|
|
|
echo "[run] $POLICY AB=[$AB_FLAGS] (MOONCAKE_LAYERWISE=${MOONCAKE_LAYERWISE:-0} EAR_WRITE_MODE=${EAR_WRITE_MODE:-0})"
|
|
EXTRA_PROXY_ARGS="$AB_FLAGS" bash "$PROJ_DIR/scripts/b3_isolated_policy.sh" "$POLICY" "$TRACE" "$cfg_dir" \
|
|
2>&1 | tee "$cfg_dir/orchestrator.log" | tail -20
|
|
|
|
pkill -9 -f cache_aware_proxy 2>/dev/null || true
|
|
pkill -9 -f "vllm serve" 2>/dev/null || true
|
|
sleep 5
|
|
|
|
echo "[stats] $MODE"
|
|
"$PYTHON" - "$cfg_dir" << 'PYEOF'
|
|
import json, sys, statistics
|
|
d = sys.argv[1]
|
|
ms = [json.loads(l) for l in open(f"{d}/metrics.jsonl")]
|
|
ok = [m for m in ms if not m.get("error")]
|
|
ttft = sorted(m["ttft_s"] for m in ok if m.get("ttft_s") is not None)
|
|
def p(q): return ttft[min(len(ttft)-1, int(q*len(ttft)))] if ttft else 0
|
|
print(f" requests: {len(ms)} success: {len(ok)} ({len(ok)/max(1,len(ms))*100:.1f}%)")
|
|
print(f" TTFT s : p50={p(.5):.2f} p90={p(.9):.2f} p99={p(.99):.2f}")
|
|
# migrated reqs from proxy breakdown
|
|
try:
|
|
bd = json.load(open(f"{d}/breakdown.json"))
|
|
mig = [x for x in bd if x.get("route_class") == "PD_SEP_V2"]
|
|
mids = {x["request_id"] for x in mig}
|
|
mt = sorted(m["ttft_s"] for m in ok if m["request_id"] in mids and m.get("ttft_s"))
|
|
print(f" migrations: {len(mig)} migrated-req TTFT: "
|
|
f"p50={mt[len(mt)//2]:.2f} p90={mt[int(len(mt)*.9)]:.2f} max={mt[-1]:.2f}" if mt else f" migrations: {len(mig)}")
|
|
except Exception as e:
|
|
print(f" (breakdown parse: {e})")
|
|
PYEOF
|
|
echo "[done] $cfg_dir (metrics.jsonl, breakdown.json)"
|