#!/usr/bin/env python3 """Plot MB1 phase-interference data. Single output: figs/mb1_interference.png — effective per-stream TPOT during a prefill burst, vs prefill size, one line per concurrent decode batch size D. Earlier versions of this script also produced figs/pd_cost_vs_benefit.png which composed a "max PD-disagg benefit = decode duration (50–200 ms) band" against the MB2 transfer-cost curve. That accounting was wrong (see RESULTS_SUMMARY.md §4 correction): phase-isolation benefit is per-prefill-event, equal to D × T_prefill across stalled streams, not capped by a single request's decode duration. That figure has been removed; the math it implied was structurally backwards. The dominant reason static PD-disagg fails in agentic is **D-side KV capacity** (see figs/f4b_pdsep_kv_wall.png), not cost-vs-benefit on phase isolation. """ from __future__ import annotations import argparse import json from pathlib import Path import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt def main() -> None: p = argparse.ArgumentParser() p.add_argument("--mb1", type=Path, required=True) p.add_argument("--out", type=Path, default=Path("figs/mb1_interference.png")) args = p.parse_args() mb1 = json.loads(args.mb1.read_text())["summary"] fig, ax = plt.subplots(figsize=(9, 5.5)) Ds = sorted({s["decode_batch_size"] for s in mb1}) colors = {1: "#1f77b4", 4: "#ff7f0e", 8: "#d62728"} for D in Ds: rows = [s for s in mb1 if s["decode_batch_size"] == D] rows.sort(key=lambda s: s["new_prefill_tokens"]) xs = [s["new_prefill_tokens"] for s in rows] ys = [s["effective_tpot_during_ms"] for s in rows] ax.plot(xs, ys, "o-", lw=2, markersize=7, color=colors.get(D, "gray"), label=f"D={D} (baseline TPOT {rows[0]['baseline_tpot_ms']:.1f} ms)") ax.set_xscale("log"); ax.set_yscale("log") ax.set_xlabel("Prefill burst size (tokens, log)") ax.set_ylabel("Per-stream effective TPOT during prefill burst (ms, log)") ax.set_title("MB1: each ongoing decode is essentially halted while prefill runs\n" "(chunked-prefill ON, vLLM 0.18.1 default, single H20). " "Per-prefill aggregate stall = D × T_prefill.") ax.grid(True, which="both", alpha=0.3) ax.legend(loc="upper left", fontsize=9) args.out.parent.mkdir(parents=True, exist_ok=True) fig.tight_layout(); fig.savefig(args.out, dpi=150); plt.close(fig) print(f"wrote {args.out}") if __name__ == "__main__": main()