Reuse and concurrency axes redone with proper controlled variables, plus
the orchestration used to run them on dash0:
- run_reuse_fixed.sh: hold REAL prefill work (delta) constant, vary only
cached prefix -> reuse = C/(C+U). Supersedes old fig1 (which held
input=8192 and sliced prefix out, confounding "more reuse" with "less
prefill").
- run_conc.sh: agentic-corner config (in=32768, delta=512, reuse=0.984,
out=128) that exposes PD's structural KV-transfer tax. Supersedes old fig3.
- run_campaign{,2,3}.sh, backfill_d2048o128.sh: serial campaign drivers
(strictly one driver at a time), out=128 sweeps, PD wall-cap for
collapse-draining high-reuse arms, and flaked-arm backfill.
- mb5_run_gpu.sh: per-config bring-up / replay / teardown orchestrator.
- plot_pd_crossover.py: render the reuse_compare figures from fig_agg dumps.
- fig_agg.py: tolerate null stats from fully-collapsed arms (0 successes
write the stat keys as null; `dict.get(k, {})` returns null, not {}).
Data: fig1_reuse_fixed.json, fig1_reuse_d{1024,2048}_o128.json
Figs: reuse_compare_AB.png, reuse_compare_ABC.png
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
73 lines
4.0 KiB
Bash
73 lines
4.0 KiB
Bash
#!/usr/bin/env bash
|
|
# Reuse axis, DONE RIGHT (controlled variable). Supersedes old fig1.
|
|
# Hold REAL (uncached) prefill work constant: --delta-len = U fixed.
|
|
# Vary only --prefix-len = C -> reuse = C/(C+U). Context grows with reuse but
|
|
# the tokens that must actually be prefilled each turn stays = U.
|
|
# Old fig1 held input=8192 and sliced prefix out of it, so delta shrank 15x as
|
|
# reuse rose -> confounded "more reuse" with "less prefill". This fixes that.
|
|
#
|
|
# Wiring matches the corrected MB5 stack (see memory project-mb5-pd-ablation-wiring):
|
|
# .venv_dash0, traces_synth/, CONFIG 8C-proxy + PD, MB5_P_ROUTING=session,
|
|
# N injected via REPLAY_MAX_INFLIGHT (closed loop) + REPLAY_INTER_TURN_THINK_S,
|
|
# REPLAY_NO_REALIZED_PREFIX=1 (reuse governed by hash_ids, required for this sweep).
|
|
set -eo pipefail
|
|
cd /home/admin/cpfs/wjh/agentic-kv-fresh
|
|
export MB5_VENV="${MB5_VENV:-/home/admin/cpfs/wjh/agentic-kv-fresh/.venv_dash0}"
|
|
VPY="$MB5_VENV/bin/python"
|
|
|
|
DELTA="${DELTA:-2048}" # fixed real prefill per turn (USER-CHOSEN)
|
|
OL="${OL:-256}"
|
|
N="${N:-8}"
|
|
THINK="${THINK:-0.5}"
|
|
TURNS="${TURNS:-8}"
|
|
NSESS="${NSESS:-48}" # number of sessions (closed-loop: arrival rate is
|
|
# irrelevant, only the count matters; ~6 waves at N=8)
|
|
PFXS="${PFXS:-512 2048 4096 8192 18432 38912}" # reuse .20 .50 .67 .80 .90 .95
|
|
CFGS="${CFGS:-8C-proxy 2P+6D 4P+4D 6P+2D}"
|
|
REUSE_PD_MAXDUR="${REUSE_PD_MAXDUR:-500}" # wall-deadline (s) for PD arms only (colo uncapped):
|
|
# bounds the collapse-drain that stalls high-reuse PD arms
|
|
# (un-run turns = failures, honest completion%). 0/empty = off.
|
|
ONLY_PFX="${ONLY_PFX:-}" # smoke a single prefix then exit
|
|
|
|
run_point() { # <pfx>
|
|
local pfx="$1"
|
|
local reuse; reuse=$(python3 -c "print(f'{$pfx/($pfx+$DELTA):.3f}')")
|
|
local tag="reuse_p${pfx}_d${DELTA}_o${OL}" # _o${OL} so different output lens don't collide
|
|
local trace="traces_synth/${tag}.jsonl"
|
|
# Closed-loop: pass NSESS as qps with duration 1 so n_sessions = NSESS
|
|
# exactly (gen_regular: n_sessions = int(duration_s * session_qps)).
|
|
"$VPY" scripts/gen_synthetic_trace.py --out "$trace" --mode regular \
|
|
--qps "$NSESS" --duration-s 1 --turns "$TURNS" \
|
|
--prefix-len "$pfx" --delta-len "$DELTA" --output-len "$OL" --seed 42 >/dev/null
|
|
echo "[reuse] pfx=$pfx delta=$DELTA reuse=$reuse in=$((pfx+DELTA)) -> $trace"
|
|
for cfg in $CFGS; do
|
|
echo " -> $cfg"
|
|
# Both routings set to session so BOTH colo (kv_both) and PD producers
|
|
# pin a session's turns to one instance and reuse its prefix cache — the
|
|
# fair cache-aware comparison. P_ROUTING is ignored by colo, COLO_ROUTING
|
|
# by PD, so setting both is harmless and symmetric.
|
|
local dur=""; [ "$cfg" != "8C-proxy" ] && dur="$REUSE_PD_MAXDUR" # colo uncapped
|
|
MB5_P_ROUTING=session MB5_COLO_ROUTING=session \
|
|
REPLAY_MAX_INFLIGHT="$N" REPLAY_INTER_TURN_THINK_S="$THINK" \
|
|
REPLAY_NO_REALIZED_PREFIX=1 REPLAY_MAX_DURATION="$dur" \
|
|
CONFIGS="$cfg" REPS=1 TRACE="$trace" RUN_TAG="$tag" \
|
|
bash scripts/mb5_run_gpu.sh >/dev/null 2>&1 || echo " [warn] $cfg failed" >&2
|
|
done
|
|
}
|
|
|
|
if [ -n "$ONLY_PFX" ]; then
|
|
echo "[reuse] SMOKE pfx=$ONLY_PFX cfgs='$CFGS'"
|
|
t0=$(date +%s); run_point "$ONLY_PFX"; t1=$(date +%s)
|
|
echo "[reuse] SMOKE done wall=$(( t1 - t0 ))s; compare:"
|
|
"$VPY" scripts/fig_agg.py mb5_runs/reuse_p${ONLY_PFX}_d${DELTA}_o${OL}_*_rep1
|
|
exit 0
|
|
fi
|
|
|
|
for pfx in $PFXS; do run_point "$pfx"; done
|
|
# Aggregate ONLY this sweep's dirs (matched by delta+output) so the three
|
|
# reuse figures (d2048/o256, d1024/o128, d2048/o128) never cross-contaminate.
|
|
dirs=(); for d in mb5_runs/reuse_*_d${DELTA}_o${OL}_*_rep1; do [ -d "$d" ] && dirs+=("$d"); done
|
|
OUTJSON="analysis/mb5_pd_ablation/fig1_reuse_d${DELTA}_o${OL}.json"
|
|
"$VPY" scripts/fig_agg.py --json "${dirs[@]}" > "$OUTJSON"
|
|
echo "[reuse] done -> $OUTJSON (${#dirs[@]} dirs)"
|