Files
agentic-kvc/v2/exp_d_policy_dispatch/plot.py
Gahow Wang 0b180c191e v2 exp(d): expand figure to 6 panels (TTFT/E2E mean+p90, TPS, per-worker GPU util)
Per request: TTFT mean+p90, E2E mean+p90, decode TPS (output goodput; total/
prefill TPS omitted as cache-miss-inflated), and per-worker GPU-util boxplots
(8 workers/arm, tracets vs thinktime) showing utilization level + balance.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-05-30 21:10:27 +08:00

92 lines
4.1 KiB
Python

"""exp (d): 5-policy routing under tracets vs thinktime dispatch.
Six panels: TTFT mean/p90, E2E mean/p90, decode-TPS (output goodput), and the
per-worker GPU-util distribution. Shows the ranking FLIP — under faithful
`thinktime` the parameter-free LPWL (leastwork) is the clear winner; under
`tracets` (think-collapse bursts) its advantage disappears.
Reads the two bench_report summaries; writes v2/figs/exp_d_policy_dispatch.png.
Usage: python v2/exp_d_policy_dispatch/plot.py
"""
import json
import os
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
HERE = os.path.dirname(__file__)
TC = json.load(open(os.path.join(HERE, "results/tracets.json")))
TT = json.load(open(os.path.join(HERE, "results/thinktime.json")))
ARMS = ["leastwork", "unified_ab", "unified_def", "lmetric", "sticky"]
LABEL = {"leastwork": "LPWL\n(leastwork)", "unified_ab": "unified\n+A+B",
"unified_def": "unified\ndefault", "lmetric": "LMetric", "sticky": "sticky"}
C_TC, C_TT = "#d62728", "#2ca02c" # tracets red / thinktime green (match exp_c)
W = 0.38
def bar_panel(ax, tc, tt, title, ylab, fmt="{:.1f}", higher_better=False):
x = range(len(ARMS))
b1 = ax.bar([i - W / 2 for i in x], tc, W, color=C_TC)
b2 = ax.bar([i + W / 2 for i in x], tt, W, color=C_TT)
for bars in (b1, b2):
for r in bars:
ax.text(r.get_x() + r.get_width() / 2, r.get_height(),
fmt.format(r.get_height()), ha="center", va="bottom", fontsize=7.5)
ax.set_xticks(list(x)); ax.set_xticklabels([LABEL[a] for a in ARMS], fontsize=8.5)
arrow = "higher = better" if higher_better else "lower = better"
ax.set_ylabel(ylab); ax.set_title(f"{title} ({arrow})", fontsize=10.5)
ax.grid(axis="y", alpha=.3); ax.set_ylim(0, max(tc + tt) * 1.18)
def gpu_panel(ax):
"""Per-worker gpu_util_mean distribution: tracets vs thinktime box per policy."""
def utils(D, a):
pw = D[a]["per_worker"]
return [pw[w]["gpu_util_mean"] for w in sorted(pw, key=int)
if pw[w].get("gpu_util_mean") is not None]
for i, a in enumerate(ARMS):
for D, off, c in [(TC, -W / 2, C_TC), (TT, +W / 2, C_TT)]:
bp = ax.boxplot([utils(D, a)], positions=[i + off], widths=0.30,
patch_artist=True, showfliers=False,
medianprops=dict(color="black"))
bp["boxes"][0].set(facecolor=c, alpha=.65)
ax.set_xticks(range(len(ARMS)))
ax.set_xticklabels([LABEL[a] for a in ARMS], fontsize=8.5)
ax.set_ylabel("per-worker GPU util %"); ax.set_ylim(0, 100)
ax.set_title("Per-worker GPU util (box = 8 workers; tighter = balanced)", fontsize=10.5)
ax.grid(axis="y", alpha=.3)
def col(D, key, sub, scale=1.0):
return [D[a][key][sub] * scale for a in ARMS]
fig, ax = plt.subplots(2, 3, figsize=(15.5, 8.6))
bar_panel(ax[0, 0], col(TC, "ttft_ms", "mean", 1e-3), col(TT, "ttft_ms", "mean", 1e-3),
"TTFT mean", "s")
bar_panel(ax[0, 1], col(TC, "ttft_ms", "p90", 1e-3), col(TT, "ttft_ms", "p90", 1e-3),
"TTFT p90", "s")
bar_panel(ax[0, 2],
[TC[a]["throughput"]["decode_tps"] for a in ARMS],
[TT[a]["throughput"]["decode_tps"] for a in ARMS],
"Decode TPS (output goodput)", "tok/s", fmt="{:.0f}", higher_better=True)
bar_panel(ax[1, 0], col(TC, "e2e_ms", "mean", 1e-3), col(TT, "e2e_ms", "mean", 1e-3),
"E2E mean", "s")
bar_panel(ax[1, 1], col(TC, "e2e_ms", "p90", 1e-3), col(TT, "e2e_ms", "p90", 1e-3),
"E2E p90", "s")
gpu_panel(ax[1, 2])
fig.legend(handles=[Patch(facecolor=C_TC, label="tracets (burst artifact)"),
Patch(facecolor=C_TT, label="thinktime (faithful load)")],
loc="lower center", ncol=2, fontsize=10.5, bbox_to_anchor=(0.5, 0.0))
fig.suptitle("5-policy routing: tracets vs thinktime (807 reqs, dash0 8xH20) — "
"LPWL wins across the board under faithful thinktime",
fontsize=12.5)
fig.tight_layout(rect=(0, 0.035, 1, 0.96))
out = os.path.join(HERE, "..", "figs", "exp_d_policy_dispatch.png")
fig.savefig(out, dpi=140)
print("wrote", os.path.normpath(out))