"""C7: routing lever vs PD-separation lever. Side-by-side comparison of the magnitude of two design changes on the same agentic workload: (A) Round-robin -> cache-aware routing, both Combined-mode (B) Combined -> PD-separated, both cache-aware For each, plot delta TTFT p50 / TPOT p90 / APC. Green = improvement, red = regression. Numbers come from REPORT.md §3.1 (PD-separation_analysis.md §3.1). CAVEAT shown on the figure: these numbers are from the legacy trace methodology (random sampling, 1 req/GPU). They are not yet reproduced on the trace-driven 850-req sampling at production concurrency, and the PD-sep runs were captured with --enforce-eager. The current plot is meant to show the qualitative gap between the two levers; a re-run is required for paper-grade quantitative claims. """ import argparse from pathlib import Path import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import numpy as np # (label, RR baseline, cache-aware baseline, PD-sep w/ cache-aware, # unit, format, "improve_when_smaller") ROWS = [ ("TTFT p50 (s)", 1.836, 0.731, 1.261, "s", "{:.2f}", True), ("TPOT p90 (s)", 0.086, 0.073, 0.074, "s", "{:.3f}", True), ("APC (%)", 20.8, 44.7, 40.2, "pp", "{:.1f}", False), ] def pct_delta(before, after, improve_when_smaller): """Return signed % change framed so positive = improvement. For APC (pp): return absolute pp delta because relative % is misleading. """ diff = after - before if improve_when_smaller: improvement = -(diff / before) * 100 return improvement, f"{improvement:+.0f}%" pp = diff return pp, f"{pp:+.1f}pp" def plot(out_path): fig, axes = plt.subplots(1, 3, figsize=(10, 3.5)) bar_colors = lambda val: "#2ca02c" if val >= 0 else "#d62728" for ax, (metric, rr, ca, pdsep, unit, fmt, smaller_better) in zip(axes, ROWS): # lever A: RR -> cache-aware (both combined) a_val, a_txt = pct_delta(rr, ca, smaller_better) # lever B: combined -> PD-sep (both cache-aware) b_val, b_txt = pct_delta(ca, pdsep, smaller_better) bars = ax.bar( ["RR → cache-aware\n(within Combined)", "Combined → PD-Sep\n(both cache-aware)"], [a_val, b_val], color=[bar_colors(a_val), bar_colors(b_val)], edgecolor="black", linewidth=0.6, width=0.55, ) ymax = max(abs(a_val), abs(b_val)) ax.set_ylim(-ymax * 1.35, ymax * 1.35) ax.axhline(0, color="black", lw=0.6) for bar, val, txt in zip(bars, [a_val, b_val], [a_txt, b_txt]): yoff = ymax * 0.06 if val >= 0 else -ymax * 0.06 ax.text(bar.get_x() + bar.get_width() / 2, val + yoff, txt, ha="center", va="bottom" if val >= 0 else "top", fontsize=10, fontweight="bold") ax.set_title(metric, fontsize=10) if smaller_better: ax.set_ylabel("Δ (positive = improvement)") else: ax.set_ylabel("Δ percentage points") ax.grid(True, axis="y", alpha=0.25) ax.tick_params(axis="x", labelsize=8.5) u = "" if unit == "pp" else unit ax.set_xlabel( f"RR={fmt.format(rr)}{u} · CA={fmt.format(ca)}{u} · PD-Sep={fmt.format(pdsep)}{u}", fontsize=8, color="#555", labelpad=8, ) fig.suptitle( "Cache-aware routing is a larger lever than PD separation on agentic workload", fontsize=11, y=1.02, ) fig.tight_layout(rect=(0, 0.10, 1, 0.96)) footer = ( "Source: REPORT.md §3.1 / analysis/pd_separation_analysis.md §3.1. " "Legacy random-sampling methodology + --enforce-eager. " "Re-run on trace-driven w600_r0.0015_st30 with cuda-graph required before paper-grade citation." ) fig.text(0.5, 0.01, footer, ha="center", fontsize=7.5, color="#666", style="italic", wrap=True) fig.savefig(out_path, bbox_inches="tight") plt.close(fig) print(f"[C7] wrote {out_path}") for metric, rr, ca, pdsep, unit, fmt, smaller in ROWS: a, a_txt = pct_delta(rr, ca, smaller) b, b_txt = pct_delta(ca, pdsep, smaller) print(f" {metric:14s} RR→CA: {a_txt:>7s} Combined→PD-Sep: {b_txt:>7s}") def main(): ap = argparse.ArgumentParser() ap.add_argument("--outdir", default="analysis/pd_sep_paper_section/figures") args = ap.parse_args() out = Path(args.outdir) out.mkdir(parents=True, exist_ok=True) plot(out / "fig_c7_routing_lever.pdf") if __name__ == "__main__": main()