#!/usr/bin/env bash # Reuse axis, DONE RIGHT (controlled variable). Supersedes old fig1. # Hold REAL (uncached) prefill work constant: --delta-len = U fixed. # Vary only --prefix-len = C -> reuse = C/(C+U). Context grows with reuse but # the tokens that must actually be prefilled each turn stays = U. # Old fig1 held input=8192 and sliced prefix out of it, so delta shrank 15x as # reuse rose -> confounded "more reuse" with "less prefill". This fixes that. # # Wiring matches the corrected MB5 stack (see memory project-mb5-pd-ablation-wiring): # .venv_dash0, traces_synth/, CONFIG 8C-proxy + PD, MB5_P_ROUTING=session, # N injected via REPLAY_MAX_INFLIGHT (closed loop) + REPLAY_INTER_TURN_THINK_S, # REPLAY_NO_REALIZED_PREFIX=1 (reuse governed by hash_ids, required for this sweep). set -eo pipefail cd /home/admin/cpfs/wjh/agentic-kv-fresh export MB5_VENV="${MB5_VENV:-/home/admin/cpfs/wjh/agentic-kv-fresh/.venv_dash0}" VPY="$MB5_VENV/bin/python" DELTA="${DELTA:-2048}" # fixed real prefill per turn (USER-CHOSEN) OL="${OL:-256}" N="${N:-8}" THINK="${THINK:-0.5}" TURNS="${TURNS:-8}" NSESS="${NSESS:-48}" # number of sessions (closed-loop: arrival rate is # irrelevant, only the count matters; ~6 waves at N=8) PFXS="${PFXS:-512 2048 4096 8192 18432 38912}" # reuse .20 .50 .67 .80 .90 .95 CFGS="${CFGS:-8C-proxy 2P+6D 4P+4D 6P+2D}" REUSE_PD_MAXDUR="${REUSE_PD_MAXDUR:-500}" # wall-deadline (s) for PD arms only (colo uncapped): # bounds the collapse-drain that stalls high-reuse PD arms # (un-run turns = failures, honest completion%). 0/empty = off. ONLY_PFX="${ONLY_PFX:-}" # smoke a single prefix then exit run_point() { # local pfx="$1" local reuse; reuse=$(python3 -c "print(f'{$pfx/($pfx+$DELTA):.3f}')") local tag="reuse_p${pfx}_d${DELTA}_o${OL}" # _o${OL} so different output lens don't collide local trace="traces_synth/${tag}.jsonl" # Closed-loop: pass NSESS as qps with duration 1 so n_sessions = NSESS # exactly (gen_regular: n_sessions = int(duration_s * session_qps)). "$VPY" scripts/gen_synthetic_trace.py --out "$trace" --mode regular \ --qps "$NSESS" --duration-s 1 --turns "$TURNS" \ --prefix-len "$pfx" --delta-len "$DELTA" --output-len "$OL" --seed 42 >/dev/null echo "[reuse] pfx=$pfx delta=$DELTA reuse=$reuse in=$((pfx+DELTA)) -> $trace" for cfg in $CFGS; do echo " -> $cfg" # Both routings set to session so BOTH colo (kv_both) and PD producers # pin a session's turns to one instance and reuse its prefix cache — the # fair cache-aware comparison. P_ROUTING is ignored by colo, COLO_ROUTING # by PD, so setting both is harmless and symmetric. local dur=""; [ "$cfg" != "8C-proxy" ] && dur="$REUSE_PD_MAXDUR" # colo uncapped MB5_P_ROUTING=session MB5_COLO_ROUTING=session \ REPLAY_MAX_INFLIGHT="$N" REPLAY_INTER_TURN_THINK_S="$THINK" \ REPLAY_NO_REALIZED_PREFIX=1 REPLAY_MAX_DURATION="$dur" \ CONFIGS="$cfg" REPS=1 TRACE="$trace" RUN_TAG="$tag" \ bash scripts/mb5_run_gpu.sh >/dev/null 2>&1 || echo " [warn] $cfg failed" >&2 done } if [ -n "$ONLY_PFX" ]; then echo "[reuse] SMOKE pfx=$ONLY_PFX cfgs='$CFGS'" t0=$(date +%s); run_point "$ONLY_PFX"; t1=$(date +%s) echo "[reuse] SMOKE done wall=$(( t1 - t0 ))s; compare:" "$VPY" scripts/fig_agg.py mb5_runs/reuse_p${ONLY_PFX}_d${DELTA}_o${OL}_*_rep1 exit 0 fi for pfx in $PFXS; do run_point "$pfx"; done # Aggregate ONLY this sweep's dirs (matched by delta+output) so the three # reuse figures (d2048/o256, d1024/o128, d2048/o128) never cross-contaminate. dirs=(); for d in mb5_runs/reuse_*_d${DELTA}_o${OL}_*_rep1; do [ -d "$d" ] && dirs+=("$d"); done OUTJSON="analysis/mb5_pd_ablation/fig1_reuse_d${DELTA}_o${OL}.json" "$VPY" scripts/fig_agg.py --json "${dirs[@]}" > "$OUTJSON" echo "[reuse] done -> $OUTJSON (${#dirs[@]} dirs)"