Files
agentic-kvc/microbench/fresh_setup/plot_pd_crossover.py
Gahow Wang 9c105cf05a MB5 PD ablation: controlled-variable reuse/conc redo + campaign tooling
Reuse and concurrency axes redone with proper controlled variables, plus
the orchestration used to run them on dash0:

- run_reuse_fixed.sh: hold REAL prefill work (delta) constant, vary only
  cached prefix -> reuse = C/(C+U). Supersedes old fig1 (which held
  input=8192 and sliced prefix out, confounding "more reuse" with "less
  prefill").
- run_conc.sh: agentic-corner config (in=32768, delta=512, reuse=0.984,
  out=128) that exposes PD's structural KV-transfer tax. Supersedes old fig3.
- run_campaign{,2,3}.sh, backfill_d2048o128.sh: serial campaign drivers
  (strictly one driver at a time), out=128 sweeps, PD wall-cap for
  collapse-draining high-reuse arms, and flaked-arm backfill.
- mb5_run_gpu.sh: per-config bring-up / replay / teardown orchestrator.
- plot_pd_crossover.py: render the reuse_compare figures from fig_agg dumps.
- fig_agg.py: tolerate null stats from fully-collapsed arms (0 successes
  write the stat keys as null; `dict.get(k, {})` returns null, not {}).

Data: fig1_reuse_fixed.json, fig1_reuse_d{1024,2048}_o128.json
Figs: reuse_compare_AB.png, reuse_compare_ABC.png

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-01 01:03:27 +08:00

185 lines
8.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Render the three PD-vs-colo crossover figures from fig_agg JSON dumps.
Inputs (produced by `fig_agg.py --json`):
analysis/mb5_pd_ablation/fig1_reuse_fixed.json reuse axis (N=8, FIXED real
prefill delta=2048; vary cached prefix -> reuse = pfx/(pfx+delta).
Controlled-variable: real new-prefill work is constant across the sweep,
only the cached fraction (and total context) grows. Supersedes the old
fig1.json, which held input=8192 and sliced prefix out of it so delta
shrank 15x as reuse rose — a confound, not a pure reuse axis.)
analysis/mb5_pd_ablation/fig2.json shape axis (N=8, reuse~70%)
analysis/mb5_pd_ablation/fig3_conc32k.json concurrency (in32768/out128,
reuse~0.984 = 32256 resident + 512 real new-prefill per turn; retuned
2026-05-31 to the agentic corner so PD pays the full-context per-turn
KV-transfer tax while colo keeps it resident; vary N by step 8 up to the
mean-E2E<=10s SLO ceiling)
Each figure overlays colo + the three PD ratios and marks the PD-best advantage.
All three share the corrected (uncontaminated, e13391e-gated-off) stack.
"""
from __future__ import annotations
import json
import re
from pathlib import Path
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
ROOT = Path(__file__).resolve().parents[2]
DATA = ROOT / "analysis" / "mb5_pd_ablation"
OUT = ROOT / "figs" / "mb5_pd_ablation"
OUT.mkdir(parents=True, exist_ok=True)
PD_ARMS = ["2P+6D", "4P+4D", "6P+2D"]
STYLE = {
"colo": dict(color="k", marker="o", lw=2.4, ls="-", label="colo (8×kv_both)"),
"2P+6D": dict(color="#1f77b4", marker="s", lw=1.6, ls="--", label="PD 2P+6D"),
"4P+4D": dict(color="#2ca02c", marker="^", lw=1.6, ls="--", label="PD 4P+4D"),
"6P+2D": dict(color="#ff7f0e", marker="v", lw=1.6, ls="--", label="PD 6P+2D"),
}
def load(name):
return json.load(open(DATA / name))
def by_axis(rows, keyfn):
"""Group rows -> {axis_val: {arm: row}}."""
out = {}
for r in rows:
k = keyfn(r["name"])
if k is None:
continue
out.setdefault(k, {})[r["arm"]] = r
return out
def pd_best(armmap, metric="e2e_p90"):
vals = [(a, armmap[a][metric]) for a in PD_ARMS
if a in armmap and armmap[a].get(metric) is not None]
return min(vals, key=lambda t: t[1]) if vals else (None, None)
def series(grp, xs, arm, metric):
return [grp[x][arm].get(metric) if arm in grp[x] else None for x in xs]
# ---------- Fig 1: reuse axis ----------
def _reuse_pct(name):
"""Reuse % from a `reuse_p{pfx}_d{delta}_{arm}` run name: pfx/(pfx+delta)."""
m = re.search(r"_p(\d+)_d(\d+)", name)
if not m:
return None
pfx, delta = int(m.group(1)), int(m.group(2))
return round(pfx / (pfx + delta) * 100)
def fig_reuse():
g = by_axis(load("fig1_reuse_fixed.json"), _reuse_pct)
xs = sorted(g)
reuse = xs
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(11, 4.2))
for arm in ["colo", *PD_ARMS]:
ax1.plot(reuse, series(g, xs, arm, "e2e_p90"), **STYLE[arm])
ax1.set_xlabel("intra-session KV reuse (%) [fixed real prefill, delta=2048]")
ax1.set_ylabel("E2E latency p90 (s)")
ax1.set_title("(a) E2E-p90 vs reuse (N=8, delta=2048/out256)")
ax1.legend(fontsize=8); ax1.grid(alpha=.3)
adv, putil = [], []
for x in xs:
co = g[x]["colo"]["e2e_p90"]; _, b = pd_best(g[x])
adv.append(co / b if b else None)
a = pd_best(g[x])[0]
putil.append(g[x][a].get("pu") if a else None)
ax2.plot(reuse, adv, color="purple", marker="D", lw=2, label="PD-best advantage (colo/PD)")
ax2.axhline(1.0, color="grey", ls=":", lw=1)
ax2.set_xlabel("intra-session KV reuse (%)"); ax2.set_ylabel("advantage (>1 = PD wins)")
ax2b = ax2.twinx()
ax2b.plot(reuse, putil, color="brown", marker="x", lw=1.4, ls="-.", label="PD-best prefill-GPU util")
ax2b.set_ylabel("prefill-GPU util (%)", color="brown"); ax2b.tick_params(axis="y", colors="brown")
ax2.set_title("(b) advantage erodes; prefill GPUs go idle")
l1, la1 = ax2.get_legend_handles_labels(); l2, la2 = ax2b.get_legend_handles_labels()
ax2.legend(l1 + l2, la1 + la2, fontsize=8, loc="center right"); ax2.grid(alpha=.3)
fig.suptitle("Fig 1 — Reuse axis (fixed real prefill delta=2048): PD's edge vs rising cache reuse",
fontsize=11, y=1.02)
fig.tight_layout(); p = OUT / "fig1_reuse_axis.png"; fig.savefig(p, dpi=130, bbox_inches="tight")
print("wrote", p)
# ---------- Fig 2: shape axis ----------
def fig_shape():
g = by_axis(load("fig2.json"),
lambda n: ((int(m.group(1)), int(m.group(2)))
if (m := re.search(r"_in(\d+)_out(\d+)_", n)) else None))
xs = sorted(g, key=lambda t: t[0]) # ascending input
labels = [f"in{i}\nout{o}" for i, o in xs]
xi = list(range(len(xs)))
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(11, 4.2))
for arm in ["colo", *PD_ARMS]:
ax1.plot(xi, series(g, xs, arm, "e2e_p90"), **STYLE[arm])
ax1.set_xticks(xi); ax1.set_xticklabels(labels, fontsize=7)
ax1.set_xlabel("shape (decode-heavy → prefill-heavy)"); ax1.set_ylabel("E2E latency p90 (s)")
ax1.set_title("(a) E2E-p90 vs shape (N=8, reuse~70%)")
ax1.legend(fontsize=8); ax1.grid(alpha=.3)
adv, comp = [], []
for x in xs:
co = g[x]["colo"]["e2e_p90"]; a, b = pd_best(g[x])
adv.append(co / b if b else None)
# completion of the worst PD arm (exposes catastrophic ratio)
worst = min((g[x][arm]["n"] / g[x][arm]["req"]) for arm in PD_ARMS if arm in g[x])
comp.append(worst * 100)
ax2.plot(xi, adv, color="purple", marker="D", lw=2, label="PD-best advantage (colo/PD)")
ax2.axhline(1.0, color="grey", ls=":", lw=1)
ax2.set_xticks(xi); ax2.set_xticklabels(labels, fontsize=7)
ax2.set_xlabel("shape"); ax2.set_ylabel("advantage (>1 = PD wins)")
ax2b = ax2.twinx()
ax2b.plot(xi, comp, color="red", marker="x", lw=1.4, ls="-.", label="worst-PD-arm completion %")
ax2b.set_ylabel("worst PD completion (%)", color="red"); ax2b.tick_params(axis="y", colors="red")
ax2b.set_ylim(80, 101)
ax2.set_title("(b) advantage peaks mid-sweep; wrong ratio catastrophic at prefill extreme")
l1, la1 = ax2.get_legend_handles_labels(); l2, la2 = ax2b.get_legend_handles_labels()
ax2.legend(l1 + l2, la1 + la2, fontsize=8, loc="lower left"); ax2.grid(alpha=.3)
fig.suptitle("Fig 2 — Shape axis: PD wins decode-heavy, ties prefill-heavy; optimal ratio rotates",
fontsize=11, y=1.02)
fig.tight_layout(); p = OUT / "fig2_shape_axis.png"; fig.savefig(p, dpi=130, bbox_inches="tight")
print("wrote", p)
# ---------- Fig 3: concurrency axis ----------
def fig_conc():
g = by_axis(load("fig3_conc32k.json"),
lambda n: (int(m.group(1)) if (m := re.search(r"_N(\d+)_", n)) else None))
xs = sorted(g)
fig, axes = plt.subplots(1, 3, figsize=(15, 4.2))
ax1, ax2, ax3 = axes
for arm in ["colo", *PD_ARMS]:
ax1.plot(xs, series(g, xs, arm, "e2e_mean"), **STYLE[arm])
ax1.axhline(10.0, color="red", ls=":", lw=1, label="SLO (mean E2E 10s)")
ax1.set_yscale("log"); ax1.set_xticks(xs); ax1.set_xticklabels(xs, fontsize=7)
ax1.set_xlabel("concurrent sessions N"); ax1.set_ylabel("E2E latency mean (s, log)")
ax1.set_title("(a) mean-E2E vs concurrency"); ax1.legend(fontsize=8); ax1.grid(alpha=.3, which="both")
for arm in ["colo", *PD_ARMS]:
ax2.plot(xs, series(g, xs, arm, "tps"), **STYLE[arm])
ax2.set_xticks(xs); ax2.set_xticklabels(xs, fontsize=7)
ax2.set_xlabel("concurrent sessions N"); ax2.set_ylabel("throughput (tok/s)")
ax2.set_title("(b) TPS: colo scales, PD plateaus/drops"); ax2.legend(fontsize=8); ax2.grid(alpha=.3)
for arm in ["colo", *PD_ARMS]:
ax3.plot(xs, [v * 100 if v is not None else None for v in series(g, xs, arm, "apc")], **STYLE[arm])
ax3.set_xticks(xs); ax3.set_xticklabels(xs, fontsize=7)
ax3.set_xlabel("concurrent sessions N"); ax3.set_ylabel("producer prefix-cache hit-rate (%)")
ax3.set_title("(c) APC vs concurrency"); ax3.legend(fontsize=8); ax3.grid(alpha=.3)
fig.suptitle("Fig 3 — Concurrency axis (in32768/out128, reuse~0.984): sweep N by 8 to the 10s-SLO ceiling",
fontsize=11, y=1.02)
fig.tight_layout(); p = OUT / "fig3_concurrency_axis.png"; fig.savefig(p, dpi=130, bbox_inches="tight")
print("wrote", p)
if __name__ == "__main__":
fig_reuse(); fig_shape(); fig_conc()