"""Render the PD-disagg crossover figure from analyze_goodput.py JSONs. Two sweeps bracket the prefill<->decode bottleneck axis: D2: fixed input, grow OUTPUT -> decode-bound -> PD_advantage rises above 1 D1: fixed output, grow INPUT -> prefill-bound -> PD_advantage falls below 1 Top row: PD_advantage (4P+4D / colo SLO-goodput) vs swept dim, y=1 = crossover. Bottom row: completion rate, colo vs 4P+4D. Agentic operating region (input p50~33k, output p50~92) annotated. """ from __future__ import annotations import json import re from pathlib import Path import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt CROSS = Path("analysis/crossover") OUT = Path("figs/crossover_pd_advantage.png") def load_series(prefix: str, key_re: str): pts = [] for f in sorted(CROSS.glob(f"{prefix}*_goodput.json")): m = re.search(key_re, f.name) if not m: continue d = json.loads(f.read_text()) g = d["slo_grid"][0]["arms"] pts.append({ "x": int(m.group(1)), "adv": g["4P+4D"]["pd_advantage"], "colo_att": g["8C-proxy"]["attainment"], "pd_att": g["4P+4D"]["attainment"], "colo_compl": d["arms"]["8C-proxy"]["completion_rate"], "pd_compl": d["arms"]["4P+4D"]["completion_rate"], "colo_ampl": d["arms"]["8C-proxy"]["amplification"], "pd_ampl": d["arms"]["4P+4D"]["amplification"], }) pts.sort(key=lambda p: p["x"]) return pts def main(): d2 = load_series("d2_o", r"d2_o(\d+)_") # x = output length d1 = load_series("d1_i", r"d1_i(\d+)_") # x = input length fig, ax = plt.subplots(2, 2, figsize=(12, 8)) def adv_plot(a, pts, xlabel, title, agentic_x, agentic_label): CAP = 50.0 # display cap for "colo=0, PD>0" (PD wins outright) line_x, line_y = [], [] for p in pts: adv, colo_att, pd_att = p["adv"], p["colo_att"], p["pd_att"] finite = isinstance(adv, (int, float)) and adv == adv if finite: line_x.append(p["x"]); line_y.append(max(adv, 1e-3)) elif colo_att == 0 and pd_att > 0: # PD wins outright line_x.append(p["x"]); line_y.append(CAP) a.annotate("PD wins\n(colo=0)", (p["x"], CAP), fontsize=7, ha="center", va="top") else: # both fail SLO -> not a PD win a.scatter([p["x"]], [1.0], marker="x", color="gray", zorder=5) a.annotate("both\nfail SLO", (p["x"], 1.0), fontsize=7, ha="center", va="bottom", color="gray") a.plot(line_x, line_y, "o-", color="tab:blue", lw=2) a.axhline(1.0, color="k", ls="--", lw=1, label="crossover (PD=colo)") a.set_xscale("log"); a.set_yscale("log") a.set_xlabel(xlabel) a.set_ylabel("PD_advantage = goodput(4P+4D)/goodput(colo)") a.set_title(title) a.axvline(agentic_x, color="tab:red", ls=":", lw=1.5) a.annotate(agentic_label, (agentic_x, 1.2), color="tab:red", fontsize=8, rotation=90, va="bottom", ha="right") a.legend(fontsize=8, loc="best") a.grid(True, which="both", alpha=0.3) def compl_plot(a, pts, xlabel, title): xs = [p["x"] for p in pts] a.plot(xs, [100*p["colo_compl"] for p in pts], "s-", color="tab:orange", label="colo (8C-proxy)") a.plot(xs, [100*p["pd_compl"] for p in pts], "o-", color="tab:blue", label="PD (4P+4D)") a.set_xscale("log") a.set_xlabel(xlabel) a.set_ylabel("completion rate (%)") a.set_title(title) a.set_ylim(0, 105) a.legend(fontsize=8, loc="best") a.grid(True, alpha=0.3) adv_plot(ax[0][0], d2, "output length (tokens), fixed input=2048, q12", "D2 decode-bound sweep — PD wins as output grows", 92, "agentic out~92") adv_plot(ax[0][1], d1, "input length (tokens), fixed output=64, q4", "D1 prefill-bound sweep — PD collapses as input grows", 33533, "agentic in~33k") compl_plot(ax[1][0], d2, "output length (tokens)", "D2 completion") compl_plot(ax[1][1], d1, "input length (tokens)", "D1 completion") fig.suptitle("PD-disaggregation vs colocation: the prefill<->decode bottleneck crossover\n" "(single node 8xH20, vLLM 0.18.1 chunked-prefill; zero-reuse synthetic)", fontsize=11) fig.tight_layout(rect=[0, 0, 1, 0.96]) OUT.parent.mkdir(parents=True, exist_ok=True) fig.savefig(OUT, dpi=130) print(f"wrote {OUT}") # also dump the numeric series for the record print("D2 (output -> adv):", [(p["x"], round(p["adv"],2) if p["adv"]==p["adv"] else "inf") for p in d2]) print("D1 (input -> adv):", [(p["x"], round(p["adv"],2) if p["adv"]==p["adv"] else "inf") for p in d1]) if __name__ == "__main__": main()