Reuse and concurrency axes redone with proper controlled variables, plus
the orchestration used to run them on dash0:
- run_reuse_fixed.sh: hold REAL prefill work (delta) constant, vary only
cached prefix -> reuse = C/(C+U). Supersedes old fig1 (which held
input=8192 and sliced prefix out, confounding "more reuse" with "less
prefill").
- run_conc.sh: agentic-corner config (in=32768, delta=512, reuse=0.984,
out=128) that exposes PD's structural KV-transfer tax. Supersedes old fig3.
- run_campaign{,2,3}.sh, backfill_d2048o128.sh: serial campaign drivers
(strictly one driver at a time), out=128 sweeps, PD wall-cap for
collapse-draining high-reuse arms, and flaked-arm backfill.
- mb5_run_gpu.sh: per-config bring-up / replay / teardown orchestrator.
- plot_pd_crossover.py: render the reuse_compare figures from fig_agg dumps.
- fig_agg.py: tolerate null stats from fully-collapsed arms (0 successes
write the stat keys as null; `dict.get(k, {})` returns null, not {}).
Data: fig1_reuse_fixed.json, fig1_reuse_d{1024,2048}_o128.json
Figs: reuse_compare_AB.png, reuse_compare_ABC.png
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
71 lines
3.9 KiB
Bash
71 lines
3.9 KiB
Bash
#!/usr/bin/env bash
|
|
# Concurrency axis, agentic-corner config. Supersedes old fig3 (in~8192/out256).
|
|
# RETUNED 2026-05-31 for realism (C2): hold total context in=32768 but shrink the
|
|
# real per-turn new-prefill to delta=512 and push reuse to 0.984 (real agentic
|
|
# reuse ->99.6%). prefix 32256 + delta 512. out=128. This is the corner that
|
|
# exposes PD's structural tax: colo keeps the 32k resident KV local, but PD must
|
|
# KV-transfer the whole 32k context every turn even though only 512 tokens are new
|
|
# (C2 PD-tax ~250-450x). Sweep closed-loop N by step 8 up to mean-E2E<=SLO ceiling.
|
|
# Wiring per memory project-mb5-pd-ablation-wiring: .venv_dash0, traces_synth/,
|
|
# CONFIG 8C-proxy + PD, MB5_P_ROUTING=session + MB5_COLO_ROUTING=session,
|
|
# N=REPLAY_MAX_INFLIGHT closed loop + REPLAY_INTER_TURN_THINK_S,
|
|
# REPLAY_NO_REALIZED_PREFIX=1. RUN ONLY ONE DRIVER AT A TIME (shared GPUs/ports).
|
|
set -eo pipefail
|
|
cd /home/admin/cpfs/wjh/agentic-kv-fresh
|
|
export MB5_VENV="${MB5_VENV:-/home/admin/cpfs/wjh/agentic-kv-fresh/.venv_dash0}"
|
|
VPY="$MB5_VENV/bin/python"
|
|
|
|
PFX="${PFX:-32256}"; DELTA="${DELTA:-512}"; OL="${OL:-128}" # reuse=0.984, in=32768
|
|
THINK="${THINK:-0.5}"; TURNS="${TURNS:-8}"
|
|
NSTART="${NSTART:-8}"; NSTEP="${NSTEP:-8}"; NMAX="${NMAX:-128}"
|
|
NLIST="${NLIST:-}" # explicit N grid (overrides NSTART/STEP/MAX), e.g. "8 16 32 48 64 96 128"
|
|
CONC_PD_MAXDUR="${CONC_PD_MAXDUR:-600}" # wall-deadline (s) for PD arms only; bounds collapsed-arm
|
|
# drain (un-run turns = failures). colo (8C-proxy) runs UNCAPPED
|
|
# so the headline reference is always fully measured.
|
|
SLO="${SLO:-10.0}"
|
|
SESS_PER_N="${SESS_PER_N:-4}"
|
|
CFGS="${CFGS:-8C-proxy 2P+6D 4P+4D 6P+2D}"
|
|
ONLY_N="${ONLY_N:-}"
|
|
|
|
run_N() {
|
|
local N="$1"; local sess=$(( SESS_PER_N * N ))
|
|
local tag="conc32k_N${N}"; local trace="traces_synth/${tag}.jsonl"
|
|
"$VPY" scripts/gen_synthetic_trace.py --out "$trace" --mode regular \
|
|
--qps "$sess" --duration-s 1 --turns "$TURNS" \
|
|
--prefix-len "$PFX" --delta-len "$DELTA" --output-len "$OL" --seed 42 >/dev/null
|
|
echo "[conc32k] N=$N sess=$sess in=$((PFX+DELTA)) out=$OL -> $trace"
|
|
for cfg in $CFGS; do
|
|
echo " -> $cfg"
|
|
local dur=""; [ "$cfg" != "8C-proxy" ] && dur="$CONC_PD_MAXDUR" # colo uncapped
|
|
MB5_P_ROUTING=session MB5_COLO_ROUTING=session \
|
|
REPLAY_MAX_INFLIGHT="$N" REPLAY_INTER_TURN_THINK_S="$THINK" REPLAY_NO_REALIZED_PREFIX=1 \
|
|
REPLAY_MAX_DURATION="$dur" \
|
|
CONFIGS="$cfg" REPS=1 TRACE="$trace" RUN_TAG="$tag" \
|
|
bash scripts/mb5_run_gpu.sh >/dev/null 2>&1 || echo " [warn] ${tag}_${cfg} failed" >&2
|
|
done
|
|
local d="mb5_runs/${tag}_8C-proxy_rep1"
|
|
if [ -f "$d/replay_metrics.summary.json" ]; then
|
|
"$VPY" scripts/fig_agg.py --json "$d" 2>/dev/null \
|
|
| "$VPY" -c "import sys,json;r=json.load(sys.stdin);print(r[0].get('e2e_mean') if r else 'nan')"
|
|
else echo nan; fi
|
|
}
|
|
|
|
if [ -n "$ONLY_N" ]; then
|
|
echo "[conc32k] SMOKE N=$ONLY_N cfgs='$CFGS'"
|
|
t0=$(date +%s); m=$(run_N "$ONLY_N"); t1=$(date +%s)
|
|
echo "[conc32k] SMOKE N=$ONLY_N colo mean-E2E=${m}s wall=$(( t1 - t0 ))s; compare:"
|
|
"$VPY" scripts/fig_agg.py mb5_runs/conc32k_N${ONLY_N}_*_rep1 2>&1
|
|
exit 0
|
|
fi
|
|
|
|
if [ -n "$NLIST" ]; then NSEQ="$NLIST"; else NSEQ=$(seq "$NSTART" "$NSTEP" "$NMAX"); fi
|
|
for N in $NSEQ; do
|
|
echo "[conc32k] === N=$N ==="
|
|
m=$(run_N "$N"); echo "[conc32k] N=$N colo mean-E2E=${m}s"
|
|
over=$("$VPY" -c "print(1 if float('${m}')>${SLO} else 0)" 2>/dev/null || echo 0)
|
|
[ "$over" = "1" ] && { echo "[conc32k] colo crossed SLO ${SLO}s at N=$N -> stop"; break; }
|
|
done
|
|
dirs=(); for d in mb5_runs/conc32k_N*_rep1; do [ -d "$d" ] && dirs+=("$d"); done
|
|
"$VPY" scripts/fig_agg.py --json "${dirs[@]}" > analysis/mb5_pd_ablation/fig3_conc32k.json
|
|
echo "[conc32k] done -> analysis/mb5_pd_ablation/fig3_conc32k.json (${#dirs[@]} dirs)"
|