Adds analysis/pd_sep_paper_section/ as the home for the "PD separation is net negative under agentic workloads" paper section: plot scripts for C1 (workload chars), C6 (roofline), C7 (routing-vs-PD-sep lever), the C6/C7 PDFs already rendered, and a README mapping candidate claims to required figures plus open re-run items. Removes --enforce-eager from bench.sh and all active launch scripts so cuda graphs are captured -- the prior methodology suppressed one of PD-sep's structural advantages (D-node fixed-shape decode). Legacy scripts under scripts/legacy/ are intentionally untouched as historical records. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
124 lines
4.5 KiB
Python
124 lines
4.5 KiB
Python
"""C7: routing lever vs PD-separation lever.
|
|
|
|
Side-by-side comparison of the magnitude of two design changes on the same
|
|
agentic workload:
|
|
(A) Round-robin -> cache-aware routing, both Combined-mode
|
|
(B) Combined -> PD-separated, both cache-aware
|
|
|
|
For each, plot delta TTFT p50 / TPOT p90 / APC. Green = improvement, red =
|
|
regression. Numbers come from REPORT.md §3.1 (PD-separation_analysis.md §3.1).
|
|
|
|
CAVEAT shown on the figure: these numbers are from the legacy
|
|
trace methodology (random sampling, 1 req/GPU). They are not yet reproduced
|
|
on the trace-driven 850-req sampling at production concurrency, and the
|
|
PD-sep runs were captured with --enforce-eager. The current plot is meant
|
|
to show the qualitative gap between the two levers; a re-run is required
|
|
for paper-grade quantitative claims.
|
|
"""
|
|
import argparse
|
|
from pathlib import Path
|
|
|
|
import matplotlib
|
|
matplotlib.use("Agg")
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
|
|
# (label, RR baseline, cache-aware baseline, PD-sep w/ cache-aware,
|
|
# unit, format, "improve_when_smaller")
|
|
ROWS = [
|
|
("TTFT p50 (s)", 1.836, 0.731, 1.261, "s", "{:.2f}", True),
|
|
("TPOT p90 (s)", 0.086, 0.073, 0.074, "s", "{:.3f}", True),
|
|
("APC (%)", 20.8, 44.7, 40.2, "pp", "{:.1f}", False),
|
|
]
|
|
|
|
|
|
def pct_delta(before, after, improve_when_smaller):
|
|
"""Return signed % change framed so positive = improvement.
|
|
|
|
For APC (pp): return absolute pp delta because relative % is misleading.
|
|
"""
|
|
diff = after - before
|
|
if improve_when_smaller:
|
|
improvement = -(diff / before) * 100
|
|
return improvement, f"{improvement:+.0f}%"
|
|
pp = diff
|
|
return pp, f"{pp:+.1f}pp"
|
|
|
|
|
|
def plot(out_path):
|
|
fig, axes = plt.subplots(1, 3, figsize=(10, 3.5))
|
|
|
|
bar_colors = lambda val: "#2ca02c" if val >= 0 else "#d62728"
|
|
|
|
for ax, (metric, rr, ca, pdsep, unit, fmt, smaller_better) in zip(axes, ROWS):
|
|
# lever A: RR -> cache-aware (both combined)
|
|
a_val, a_txt = pct_delta(rr, ca, smaller_better)
|
|
# lever B: combined -> PD-sep (both cache-aware)
|
|
b_val, b_txt = pct_delta(ca, pdsep, smaller_better)
|
|
|
|
bars = ax.bar(
|
|
["RR → cache-aware\n(within Combined)",
|
|
"Combined → PD-Sep\n(both cache-aware)"],
|
|
[a_val, b_val],
|
|
color=[bar_colors(a_val), bar_colors(b_val)],
|
|
edgecolor="black", linewidth=0.6, width=0.55,
|
|
)
|
|
|
|
ymax = max(abs(a_val), abs(b_val))
|
|
ax.set_ylim(-ymax * 1.35, ymax * 1.35)
|
|
ax.axhline(0, color="black", lw=0.6)
|
|
|
|
for bar, val, txt in zip(bars, [a_val, b_val], [a_txt, b_txt]):
|
|
yoff = ymax * 0.06 if val >= 0 else -ymax * 0.06
|
|
ax.text(bar.get_x() + bar.get_width() / 2,
|
|
val + yoff,
|
|
txt,
|
|
ha="center", va="bottom" if val >= 0 else "top",
|
|
fontsize=10, fontweight="bold")
|
|
|
|
ax.set_title(metric, fontsize=10)
|
|
if smaller_better:
|
|
ax.set_ylabel("Δ (positive = improvement)")
|
|
else:
|
|
ax.set_ylabel("Δ percentage points")
|
|
ax.grid(True, axis="y", alpha=0.25)
|
|
ax.tick_params(axis="x", labelsize=8.5)
|
|
u = "" if unit == "pp" else unit
|
|
ax.set_xlabel(
|
|
f"RR={fmt.format(rr)}{u} · CA={fmt.format(ca)}{u} · PD-Sep={fmt.format(pdsep)}{u}",
|
|
fontsize=8, color="#555", labelpad=8,
|
|
)
|
|
|
|
fig.suptitle(
|
|
"Cache-aware routing is a larger lever than PD separation on agentic workload",
|
|
fontsize=11, y=1.02,
|
|
)
|
|
fig.tight_layout(rect=(0, 0.10, 1, 0.96))
|
|
footer = (
|
|
"Source: REPORT.md §3.1 / analysis/pd_separation_analysis.md §3.1. "
|
|
"Legacy random-sampling methodology + --enforce-eager. "
|
|
"Re-run on trace-driven w600_r0.0015_st30 with cuda-graph required before paper-grade citation."
|
|
)
|
|
fig.text(0.5, 0.01, footer, ha="center", fontsize=7.5, color="#666",
|
|
style="italic", wrap=True)
|
|
fig.savefig(out_path, bbox_inches="tight")
|
|
plt.close(fig)
|
|
print(f"[C7] wrote {out_path}")
|
|
for metric, rr, ca, pdsep, unit, fmt, smaller in ROWS:
|
|
a, a_txt = pct_delta(rr, ca, smaller)
|
|
b, b_txt = pct_delta(ca, pdsep, smaller)
|
|
print(f" {metric:14s} RR→CA: {a_txt:>7s} Combined→PD-Sep: {b_txt:>7s}")
|
|
|
|
|
|
def main():
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("--outdir", default="analysis/pd_sep_paper_section/figures")
|
|
args = ap.parse_args()
|
|
out = Path(args.outdir)
|
|
out.mkdir(parents=True, exist_ok=True)
|
|
plot(out / "fig_c7_routing_lever.pdf")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|