Files
agentic-kvc/analysis/pd_sep_paper_section/scripts/plot_routing_lever.py
Gahow Wang d71a111099 Paper section: PD-sep scaffold + drop --enforce-eager from launch scripts
Adds analysis/pd_sep_paper_section/ as the home for the "PD separation is
net negative under agentic workloads" paper section: plot scripts for C1
(workload chars), C6 (roofline), C7 (routing-vs-PD-sep lever), the C6/C7
PDFs already rendered, and a README mapping candidate claims to required
figures plus open re-run items.

Removes --enforce-eager from bench.sh and all active launch scripts so
cuda graphs are captured -- the prior methodology suppressed one of
PD-sep's structural advantages (D-node fixed-shape decode). Legacy
scripts under scripts/legacy/ are intentionally untouched as historical
records.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-25 11:24:16 +08:00

124 lines
4.5 KiB
Python

"""C7: routing lever vs PD-separation lever.
Side-by-side comparison of the magnitude of two design changes on the same
agentic workload:
(A) Round-robin -> cache-aware routing, both Combined-mode
(B) Combined -> PD-separated, both cache-aware
For each, plot delta TTFT p50 / TPOT p90 / APC. Green = improvement, red =
regression. Numbers come from REPORT.md §3.1 (PD-separation_analysis.md §3.1).
CAVEAT shown on the figure: these numbers are from the legacy
trace methodology (random sampling, 1 req/GPU). They are not yet reproduced
on the trace-driven 850-req sampling at production concurrency, and the
PD-sep runs were captured with --enforce-eager. The current plot is meant
to show the qualitative gap between the two levers; a re-run is required
for paper-grade quantitative claims.
"""
import argparse
from pathlib import Path
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np
# (label, RR baseline, cache-aware baseline, PD-sep w/ cache-aware,
# unit, format, "improve_when_smaller")
ROWS = [
("TTFT p50 (s)", 1.836, 0.731, 1.261, "s", "{:.2f}", True),
("TPOT p90 (s)", 0.086, 0.073, 0.074, "s", "{:.3f}", True),
("APC (%)", 20.8, 44.7, 40.2, "pp", "{:.1f}", False),
]
def pct_delta(before, after, improve_when_smaller):
"""Return signed % change framed so positive = improvement.
For APC (pp): return absolute pp delta because relative % is misleading.
"""
diff = after - before
if improve_when_smaller:
improvement = -(diff / before) * 100
return improvement, f"{improvement:+.0f}%"
pp = diff
return pp, f"{pp:+.1f}pp"
def plot(out_path):
fig, axes = plt.subplots(1, 3, figsize=(10, 3.5))
bar_colors = lambda val: "#2ca02c" if val >= 0 else "#d62728"
for ax, (metric, rr, ca, pdsep, unit, fmt, smaller_better) in zip(axes, ROWS):
# lever A: RR -> cache-aware (both combined)
a_val, a_txt = pct_delta(rr, ca, smaller_better)
# lever B: combined -> PD-sep (both cache-aware)
b_val, b_txt = pct_delta(ca, pdsep, smaller_better)
bars = ax.bar(
["RR → cache-aware\n(within Combined)",
"Combined → PD-Sep\n(both cache-aware)"],
[a_val, b_val],
color=[bar_colors(a_val), bar_colors(b_val)],
edgecolor="black", linewidth=0.6, width=0.55,
)
ymax = max(abs(a_val), abs(b_val))
ax.set_ylim(-ymax * 1.35, ymax * 1.35)
ax.axhline(0, color="black", lw=0.6)
for bar, val, txt in zip(bars, [a_val, b_val], [a_txt, b_txt]):
yoff = ymax * 0.06 if val >= 0 else -ymax * 0.06
ax.text(bar.get_x() + bar.get_width() / 2,
val + yoff,
txt,
ha="center", va="bottom" if val >= 0 else "top",
fontsize=10, fontweight="bold")
ax.set_title(metric, fontsize=10)
if smaller_better:
ax.set_ylabel("Δ (positive = improvement)")
else:
ax.set_ylabel("Δ percentage points")
ax.grid(True, axis="y", alpha=0.25)
ax.tick_params(axis="x", labelsize=8.5)
u = "" if unit == "pp" else unit
ax.set_xlabel(
f"RR={fmt.format(rr)}{u} · CA={fmt.format(ca)}{u} · PD-Sep={fmt.format(pdsep)}{u}",
fontsize=8, color="#555", labelpad=8,
)
fig.suptitle(
"Cache-aware routing is a larger lever than PD separation on agentic workload",
fontsize=11, y=1.02,
)
fig.tight_layout(rect=(0, 0.10, 1, 0.96))
footer = (
"Source: REPORT.md §3.1 / analysis/pd_separation_analysis.md §3.1. "
"Legacy random-sampling methodology + --enforce-eager. "
"Re-run on trace-driven w600_r0.0015_st30 with cuda-graph required before paper-grade citation."
)
fig.text(0.5, 0.01, footer, ha="center", fontsize=7.5, color="#666",
style="italic", wrap=True)
fig.savefig(out_path, bbox_inches="tight")
plt.close(fig)
print(f"[C7] wrote {out_path}")
for metric, rr, ca, pdsep, unit, fmt, smaller in ROWS:
a, a_txt = pct_delta(rr, ca, smaller)
b, b_txt = pct_delta(ca, pdsep, smaller)
print(f" {metric:14s} RR→CA: {a_txt:>7s} Combined→PD-Sep: {b_txt:>7s}")
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--outdir", default="analysis/pd_sep_paper_section/figures")
args = ap.parse_args()
out = Path(args.outdir)
out.mkdir(parents=True, exist_ok=True)
plot(out / "fig_c7_routing_lever.pdf")
if __name__ == "__main__":
main()