agentic-kvc/analysis/pd_sep_paper_section/scripts/plot_routing_lever.py

"""C7: routing lever vs PD-separation lever.

Side-by-side comparison of the magnitude of two design changes on the same
agentic workload:
  (A) Round-robin -> cache-aware routing, both Combined-mode
  (B) Combined -> PD-separated, both cache-aware

For each, plot delta TTFT p50 / TPOT p90 / APC. Green = improvement, red =
regression. Numbers come from REPORT.md §3.1 (PD-separation_analysis.md §3.1).

CAVEAT shown on the figure: these numbers are from the legacy
trace methodology (random sampling, 1 req/GPU). They are not yet reproduced
on the trace-driven 850-req sampling at production concurrency, and the
PD-sep runs were captured with --enforce-eager. The current plot is meant
to show the qualitative gap between the two levers; a re-run is required
for paper-grade quantitative claims.
"""
import argparse
from pathlib import Path

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np

# (label, RR baseline, cache-aware baseline, PD-sep w/ cache-aware,
#  unit, format, "improve_when_smaller")
ROWS = [
    ("TTFT p50 (s)",  1.836, 0.731, 1.261, "s",  "{:.2f}", True),
    ("TPOT p90 (s)",  0.086, 0.073, 0.074, "s",  "{:.3f}", True),
    ("APC (%)",       20.8,  44.7,  40.2,  "pp", "{:.1f}", False),
]


def pct_delta(before, after, improve_when_smaller):
    """Return signed % change framed so positive = improvement.

    For APC (pp): return absolute pp delta because relative % is misleading.
    """
    diff = after - before
    if improve_when_smaller:
        improvement = -(diff / before) * 100
        return improvement, f"{improvement:+.0f}%"
    pp = diff
    return pp, f"{pp:+.1f}pp"


def plot(out_path):
    fig, axes = plt.subplots(1, 3, figsize=(10, 3.5))

    bar_colors = lambda val: "#2ca02c" if val >= 0 else "#d62728"

    for ax, (metric, rr, ca, pdsep, unit, fmt, smaller_better) in zip(axes, ROWS):
        # lever A: RR -> cache-aware (both combined)
        a_val, a_txt = pct_delta(rr, ca, smaller_better)
        # lever B: combined -> PD-sep (both cache-aware)
        b_val, b_txt = pct_delta(ca, pdsep, smaller_better)

        bars = ax.bar(
            ["RR → cache-aware\n(within Combined)",
             "Combined → PD-Sep\n(both cache-aware)"],
            [a_val, b_val],
            color=[bar_colors(a_val), bar_colors(b_val)],
            edgecolor="black", linewidth=0.6, width=0.55,
        )

        ymax = max(abs(a_val), abs(b_val))
        ax.set_ylim(-ymax * 1.35, ymax * 1.35)
        ax.axhline(0, color="black", lw=0.6)

        for bar, val, txt in zip(bars, [a_val, b_val], [a_txt, b_txt]):
            yoff = ymax * 0.06 if val >= 0 else -ymax * 0.06
            ax.text(bar.get_x() + bar.get_width() / 2,
                    val + yoff,
                    txt,
                    ha="center", va="bottom" if val >= 0 else "top",
                    fontsize=10, fontweight="bold")

        ax.set_title(metric, fontsize=10)
        if smaller_better:
            ax.set_ylabel("Δ (positive = improvement)")
        else:
            ax.set_ylabel("Δ percentage points")
        ax.grid(True, axis="y", alpha=0.25)
        ax.tick_params(axis="x", labelsize=8.5)
        u = "" if unit == "pp" else unit
        ax.set_xlabel(
            f"RR={fmt.format(rr)}{u}  ·  CA={fmt.format(ca)}{u}  ·  PD-Sep={fmt.format(pdsep)}{u}",
            fontsize=8, color="#555", labelpad=8,
        )

    fig.suptitle(
        "Cache-aware routing is a larger lever than PD separation on agentic workload",
        fontsize=11, y=1.02,
    )
    fig.tight_layout(rect=(0, 0.10, 1, 0.96))
    footer = (
        "Source: REPORT.md §3.1 / analysis/pd_separation_analysis.md §3.1. "
        "Legacy random-sampling methodology + --enforce-eager. "
        "Re-run on trace-driven w600_r0.0015_st30 with cuda-graph required before paper-grade citation."
    )
    fig.text(0.5, 0.01, footer, ha="center", fontsize=7.5, color="#666",
             style="italic", wrap=True)
    fig.savefig(out_path, bbox_inches="tight")
    plt.close(fig)
    print(f"[C7] wrote {out_path}")
    for metric, rr, ca, pdsep, unit, fmt, smaller in ROWS:
        a, a_txt = pct_delta(rr, ca, smaller)
        b, b_txt = pct_delta(ca, pdsep, smaller)
        print(f"     {metric:14s}  RR→CA: {a_txt:>7s}   Combined→PD-Sep: {b_txt:>7s}")


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--outdir", default="analysis/pd_sep_paper_section/figures")
    args = ap.parse_args()
    out = Path(args.outdir)
    out.mkdir(parents=True, exist_ok=True)
    plot(out / "fig_c7_routing_lever.pdf")


if __name__ == "__main__":
    main()