agentic-kvc/microbench/fresh_setup/plot_crossover.py

"""Render the PD-disagg crossover figure from analyze_goodput.py JSONs.

Two sweeps bracket the prefill<->decode bottleneck axis:
  D2: fixed input, grow OUTPUT  -> decode-bound  -> PD_advantage rises above 1
  D1: fixed output, grow INPUT  -> prefill-bound -> PD_advantage falls below 1

Top row: PD_advantage (4P+4D / colo SLO-goodput) vs swept dim, y=1 = crossover.
Bottom row: completion rate, colo vs 4P+4D.
Agentic operating region (input p50~33k, output p50~92) annotated.
"""

from __future__ import annotations

import json
import re
from pathlib import Path

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt

CROSS = Path("analysis/crossover")
OUT = Path("figs/crossover_pd_advantage.png")


def load_series(prefix: str, key_re: str):
    pts = []
    for f in sorted(CROSS.glob(f"{prefix}*_goodput.json")):
        m = re.search(key_re, f.name)
        if not m:
            continue
        d = json.loads(f.read_text())
        g = d["slo_grid"][0]["arms"]
        pts.append({
            "x": int(m.group(1)),
            "adv": g["4P+4D"]["pd_advantage"],
            "colo_att": g["8C-proxy"]["attainment"],
            "pd_att": g["4P+4D"]["attainment"],
            "colo_compl": d["arms"]["8C-proxy"]["completion_rate"],
            "pd_compl": d["arms"]["4P+4D"]["completion_rate"],
            "colo_ampl": d["arms"]["8C-proxy"]["amplification"],
            "pd_ampl": d["arms"]["4P+4D"]["amplification"],
        })
    pts.sort(key=lambda p: p["x"])
    return pts


def main():
    d2 = load_series("d2_o", r"d2_o(\d+)_")   # x = output length
    d1 = load_series("d1_i", r"d1_i(\d+)_")   # x = input length

    fig, ax = plt.subplots(2, 2, figsize=(12, 8))

    def adv_plot(a, pts, xlabel, title, agentic_x, agentic_label):
        CAP = 50.0  # display cap for "colo=0, PD>0" (PD wins outright)
        line_x, line_y = [], []
        for p in pts:
            adv, colo_att, pd_att = p["adv"], p["colo_att"], p["pd_att"]
            finite = isinstance(adv, (int, float)) and adv == adv
            if finite:
                line_x.append(p["x"]); line_y.append(max(adv, 1e-3))
            elif colo_att == 0 and pd_att > 0:        # PD wins outright
                line_x.append(p["x"]); line_y.append(CAP)
                a.annotate("PD wins\n(colo=0)", (p["x"], CAP), fontsize=7, ha="center", va="top")
            else:                                      # both fail SLO -> not a PD win
                a.scatter([p["x"]], [1.0], marker="x", color="gray", zorder=5)
                a.annotate("both\nfail SLO", (p["x"], 1.0), fontsize=7, ha="center",
                           va="bottom", color="gray")
        a.plot(line_x, line_y, "o-", color="tab:blue", lw=2)
        a.axhline(1.0, color="k", ls="--", lw=1, label="crossover (PD=colo)")
        a.set_xscale("log"); a.set_yscale("log")
        a.set_xlabel(xlabel)
        a.set_ylabel("PD_advantage = goodput(4P+4D)/goodput(colo)")
        a.set_title(title)
        a.axvline(agentic_x, color="tab:red", ls=":", lw=1.5)
        a.annotate(agentic_label, (agentic_x, 1.2), color="tab:red", fontsize=8,
                   rotation=90, va="bottom", ha="right")
        a.legend(fontsize=8, loc="best")
        a.grid(True, which="both", alpha=0.3)

    def compl_plot(a, pts, xlabel, title):
        xs = [p["x"] for p in pts]
        a.plot(xs, [100*p["colo_compl"] for p in pts], "s-", color="tab:orange", label="colo (8C-proxy)")
        a.plot(xs, [100*p["pd_compl"] for p in pts], "o-", color="tab:blue", label="PD (4P+4D)")
        a.set_xscale("log")
        a.set_xlabel(xlabel)
        a.set_ylabel("completion rate (%)")
        a.set_title(title)
        a.set_ylim(0, 105)
        a.legend(fontsize=8, loc="best")
        a.grid(True, alpha=0.3)

    adv_plot(ax[0][0], d2, "output length (tokens), fixed input=2048, q12",
             "D2 decode-bound sweep — PD wins as output grows",
             92, "agentic out~92")
    adv_plot(ax[0][1], d1, "input length (tokens), fixed output=64, q4",
             "D1 prefill-bound sweep — PD collapses as input grows",
             33533, "agentic in~33k")
    compl_plot(ax[1][0], d2, "output length (tokens)", "D2 completion")
    compl_plot(ax[1][1], d1, "input length (tokens)", "D1 completion")

    fig.suptitle("PD-disaggregation vs colocation: the prefill<->decode bottleneck crossover\n"
                 "(single node 8xH20, vLLM 0.18.1 chunked-prefill; zero-reuse synthetic)",
                 fontsize=11)
    fig.tight_layout(rect=[0, 0, 1, 0.96])
    OUT.parent.mkdir(parents=True, exist_ok=True)
    fig.savefig(OUT, dpi=130)
    print(f"wrote {OUT}")
    # also dump the numeric series for the record
    print("D2 (output -> adv):", [(p["x"], round(p["adv"],2) if p["adv"]==p["adv"] else "inf") for p in d2])
    print("D1 (input  -> adv):", [(p["x"], round(p["adv"],2) if p["adv"]==p["adv"] else "inf") for p in d1])


if __name__ == "__main__":
    main()