Validates the elastic_migration_v2 finding that kv_role=kv_both adds
TTFT p90 +45% even when PD-sep never fires. Replicates under
single-instance, synthetic, open-loop workload to disambiguate
mechanism cost from 8-instance feedback amplification.
Configurations (8):
plain, noop_connector, mooncake_{producer,consumer,both},
nixl_both, lmcache_only, multi_mooncake_lmcache.
Pre-flight verification gates risky configs (kv_consumer needs dummy
bootstrap, multi-connector composition, NoOp custom class loading).
Workload: two-phase sweep
Phase A: rate {0.5..32} req/s × shape (4096, 256), saturation criteria
Phase B: ref_safe rate × cartesian (input ∈ {512,4k,32k}, output ∈ {64,256,1024})
Step-timing patch enriches vLLM's existing AGENTIC_STEP_LOG_PATH emit
with step_duration_us and build_meta_us — directly measures per-step
substrate cost, not just user-visible TTFT/TPOT.
run_all.sh runs as 5-stage barrier:
0 pre-flight + apply patch
1 Phase A all configs
2 pick ref_safe / ref_load
3 Phase B all configs
4 revert patch + analyze + plot
Outputs aggregate.{json,csv}, MANIFEST.tsv, and 5 figures.
Estimated runtime: 4-5.5 hours on idle dash0 H20.
264 lines
8.7 KiB
Python
264 lines
8.7 KiB
Python
#!/usr/bin/env python3
|
||
"""Plot Figures 1-5 from connector_tax aggregate.
|
||
|
||
Requires aggregate.json + aggregate.csv from analyze.py.
|
||
|
||
Figure 1: TTFT p90 vs send rate, line per config (Phase A)
|
||
Figure 2: TPOT p90 vs send rate
|
||
Figure 3: Achieved throughput vs requested
|
||
Figure 4: Substrate tax bar at ref_safe and ref_load
|
||
Figure 5: Shape-dependent tax heatmap (Phase B)
|
||
"""
|
||
|
||
import argparse
|
||
import json
|
||
from collections import defaultdict
|
||
from pathlib import Path
|
||
|
||
import numpy as np
|
||
import matplotlib
|
||
matplotlib.use("Agg")
|
||
import matplotlib.pyplot as plt
|
||
|
||
|
||
CONFIG_COLORS = {
|
||
"plain": "#000000",
|
||
"noop_connector": "#7f7f7f",
|
||
"mooncake_producer": "#1f77b4",
|
||
"mooncake_consumer": "#17becf",
|
||
"mooncake_both": "#d62728",
|
||
"nixl_both": "#ff7f0e",
|
||
"lmcache_only": "#2ca02c",
|
||
"multi_mooncake_lmcache": "#9467bd",
|
||
}
|
||
|
||
|
||
def load(root: Path):
|
||
agg = json.loads((root / "aggregate.json").read_text())
|
||
return agg
|
||
|
||
|
||
def fig1_2_3(agg, out_dir):
|
||
configs = agg["configs"]
|
||
|
||
# Rates
|
||
rates = agg["rates_swept"]
|
||
|
||
# ----- fig 1: TTFT p90 -----
|
||
fig, ax = plt.subplots(figsize=(10, 5.5))
|
||
for cfg, d in configs.items():
|
||
cells = sorted(d["phase_a"], key=lambda c: c["rate_target"])
|
||
xs = [c["rate_target"] for c in cells]
|
||
ys = [c.get("ttft_ms_p90") for c in cells]
|
||
ax.plot(xs, ys, "o-", label=cfg,
|
||
color=CONFIG_COLORS.get(cfg, None), linewidth=2)
|
||
# mark saturation
|
||
for c in cells:
|
||
if c.get("saturated"):
|
||
ax.plot([c["rate_target"]], [c["ttft_ms_p90"]], "x",
|
||
markersize=12, mew=2,
|
||
color=CONFIG_COLORS.get(cfg, "red"))
|
||
ax.set_xscale("log", base=2)
|
||
ax.set_yscale("log")
|
||
ax.set_xticks(rates)
|
||
ax.set_xticklabels([str(r) for r in rates])
|
||
ax.set_xlabel("Send rate (req/s)")
|
||
ax.set_ylabel("TTFT p90 (ms, log)")
|
||
ax.set_title("Figure 1 — TTFT p90 vs send rate (Phase A)\n"
|
||
"× = saturation criterion fired")
|
||
ax.grid(True, which="both", linestyle="--", alpha=0.4)
|
||
ax.legend(fontsize=8, loc="upper left")
|
||
fig.tight_layout()
|
||
fig.savefig(out_dir / "fig1_ttft_vs_rate.png", dpi=160)
|
||
plt.close(fig)
|
||
|
||
# ----- fig 2: TPOT p90 -----
|
||
fig, ax = plt.subplots(figsize=(10, 5.5))
|
||
for cfg, d in configs.items():
|
||
cells = sorted(d["phase_a"], key=lambda c: c["rate_target"])
|
||
xs = [c["rate_target"] for c in cells]
|
||
ys = [c.get("tpot_ms_p90") for c in cells]
|
||
ax.plot(xs, ys, "o-", label=cfg,
|
||
color=CONFIG_COLORS.get(cfg, None), linewidth=2)
|
||
ax.set_xscale("log", base=2)
|
||
ax.set_xticks(rates)
|
||
ax.set_xticklabels([str(r) for r in rates])
|
||
ax.set_xlabel("Send rate (req/s)")
|
||
ax.set_ylabel("TPOT p90 (ms)")
|
||
ax.set_title("Figure 2 — TPOT p90 vs send rate (Phase A)")
|
||
ax.grid(True, linestyle="--", alpha=0.4)
|
||
ax.legend(fontsize=8, loc="upper left")
|
||
fig.tight_layout()
|
||
fig.savefig(out_dir / "fig2_tpot_vs_rate.png", dpi=160)
|
||
plt.close(fig)
|
||
|
||
# ----- fig 3: throughput -----
|
||
fig, ax = plt.subplots(figsize=(10, 5.5))
|
||
max_x = max(rates) if rates else 1
|
||
ax.plot([0, max_x], [0, max_x], "k--", alpha=0.4, label="ideal y=x")
|
||
for cfg, d in configs.items():
|
||
cells = sorted(d["phase_a"], key=lambda c: c["rate_target"])
|
||
xs = [c["rate_target"] for c in cells]
|
||
ys = [c.get("throughput_effective_rps") for c in cells]
|
||
ax.plot(xs, ys, "o-", label=cfg,
|
||
color=CONFIG_COLORS.get(cfg, None), linewidth=2)
|
||
ax.set_xlabel("Send rate (req/s)")
|
||
ax.set_ylabel("Effective throughput (req/s)")
|
||
ax.set_title("Figure 3 — Throughput tracking (Phase A)")
|
||
ax.grid(True, linestyle="--", alpha=0.4)
|
||
ax.legend(fontsize=8, loc="upper left")
|
||
fig.tight_layout()
|
||
fig.savefig(out_dir / "fig3_throughput_vs_rate.png", dpi=160)
|
||
plt.close(fig)
|
||
|
||
|
||
def fig4(agg, out_dir):
|
||
configs = agg["configs"]
|
||
if "plain" not in configs:
|
||
return
|
||
plain = configs["plain"]
|
||
|
||
def cell_at(d, r):
|
||
for c in d["phase_a"]:
|
||
if abs(c["rate_target"] - r) < 1e-6:
|
||
return c
|
||
return None
|
||
|
||
def tax(c_cfg, c_plain, key):
|
||
if c_cfg is None or c_plain is None:
|
||
return None
|
||
a, b = c_cfg.get(key), c_plain.get(key)
|
||
if not a or not b:
|
||
return None
|
||
return a / b - 1
|
||
|
||
rates_used = []
|
||
if agg.get("ref_safe") is not None:
|
||
rates_used.append(("ref_safe", agg["ref_safe"]))
|
||
if agg.get("ref_load") is not None and agg["ref_load"] != agg.get("ref_safe"):
|
||
rates_used.append(("ref_load", agg["ref_load"]))
|
||
|
||
if not rates_used:
|
||
return
|
||
|
||
cfg_names = [c for c in configs if c != "plain"]
|
||
fig, axes = plt.subplots(1, 2, figsize=(13, 5))
|
||
if len(rates_used) == 1:
|
||
axes = [axes[0]]
|
||
for ax, (label, r) in zip(axes, rates_used):
|
||
plain_cell = cell_at(plain, r)
|
||
ttft_taxes = []
|
||
tpot_taxes = []
|
||
for cfg in cfg_names:
|
||
c = cell_at(configs[cfg], r)
|
||
ttft_taxes.append(tax(c, plain_cell, "ttft_ms_p90") or 0)
|
||
tpot_taxes.append(tax(c, plain_cell, "tpot_ms_p90") or 0)
|
||
|
||
x = np.arange(len(cfg_names))
|
||
w = 0.4
|
||
ax.bar(x - w/2, [v * 100 for v in ttft_taxes], width=w,
|
||
label="TTFT p90 tax %", color="#d62728", alpha=0.85)
|
||
ax.bar(x + w/2, [v * 100 for v in tpot_taxes], width=w,
|
||
label="TPOT p90 tax %", color="#1f77b4", alpha=0.85)
|
||
ax.axhline(0, color="black", linewidth=0.5)
|
||
ax.set_xticks(x)
|
||
ax.set_xticklabels(cfg_names, rotation=30, ha="right", fontsize=8)
|
||
ax.set_ylabel("Tax vs plain (%)")
|
||
ax.set_title(f"{label} (rate={r} req/s)")
|
||
ax.grid(True, axis="y", linestyle="--", alpha=0.4)
|
||
ax.legend(fontsize=8)
|
||
fig.suptitle("Figure 4 — Substrate tax (TTFT p90 + TPOT p90) "
|
||
"at reference rates", fontweight="bold")
|
||
fig.tight_layout()
|
||
fig.savefig(out_dir / "fig4_substrate_tax.png", dpi=160)
|
||
plt.close(fig)
|
||
|
||
|
||
def fig5(agg, out_dir):
|
||
configs = agg["configs"]
|
||
if "plain" not in configs:
|
||
return
|
||
|
||
# Build (input, output) → ttft_p90 per config
|
||
cfg_names = [c for c in configs if c != "plain"]
|
||
|
||
def shape_map(d):
|
||
m = {}
|
||
for c in d.get("phase_b", []):
|
||
key = (c["input_tokens"], c["output_tokens"])
|
||
m[key] = c.get("ttft_ms_p90")
|
||
return m
|
||
|
||
plain_map = shape_map(configs["plain"])
|
||
if not plain_map:
|
||
return
|
||
|
||
inputs = sorted({k[0] for k in plain_map})
|
||
outputs = sorted({k[1] for k in plain_map})
|
||
|
||
n = len(cfg_names)
|
||
cols = min(3, n)
|
||
rows = (n + cols - 1) // cols
|
||
fig, axes = plt.subplots(rows, cols, figsize=(5 * cols, 4 * rows))
|
||
if n == 1:
|
||
axes = np.array([[axes]])
|
||
elif rows == 1:
|
||
axes = axes.reshape(1, -1)
|
||
|
||
for idx, cfg in enumerate(cfg_names):
|
||
ax = axes[idx // cols][idx % cols]
|
||
cmap = shape_map(configs[cfg])
|
||
mat = np.full((len(outputs), len(inputs)), np.nan)
|
||
for i, ip in enumerate(inputs):
|
||
for j, op in enumerate(outputs):
|
||
a = cmap.get((ip, op))
|
||
b = plain_map.get((ip, op))
|
||
if a and b:
|
||
mat[j, i] = a / b - 1
|
||
im = ax.imshow(mat * 100, cmap="YlOrRd", aspect="auto")
|
||
ax.set_xticks(range(len(inputs)))
|
||
ax.set_xticklabels([f"{x//1024}k" if x >= 1024 else str(x) for x in inputs])
|
||
ax.set_yticks(range(len(outputs)))
|
||
ax.set_yticklabels([str(y) for y in outputs])
|
||
ax.set_xlabel("input")
|
||
ax.set_ylabel("output")
|
||
ax.set_title(cfg, fontsize=10)
|
||
for i in range(len(inputs)):
|
||
for j in range(len(outputs)):
|
||
v = mat[j, i]
|
||
if not np.isnan(v):
|
||
txt = f"{v*100:.0f}%"
|
||
ax.text(i, j, txt, ha="center", va="center",
|
||
fontsize=9,
|
||
color="white" if v * 100 > 30 else "black")
|
||
plt.colorbar(im, ax=ax, fraction=0.04, pad=0.02)
|
||
|
||
# Hide leftover axes
|
||
for idx in range(n, rows * cols):
|
||
axes[idx // cols][idx % cols].axis("off")
|
||
|
||
fig.suptitle("Figure 5 — TTFT p90 substrate tax (%) by shape (Phase B)",
|
||
fontweight="bold")
|
||
fig.tight_layout()
|
||
fig.savefig(out_dir / "fig5_shape_tax_heatmap.png", dpi=160)
|
||
plt.close(fig)
|
||
|
||
|
||
def main():
|
||
ap = argparse.ArgumentParser()
|
||
ap.add_argument("--root", type=Path, required=True)
|
||
args = ap.parse_args()
|
||
|
||
agg = load(args.root)
|
||
out = args.root
|
||
out.mkdir(parents=True, exist_ok=True)
|
||
|
||
fig1_2_3(agg, out)
|
||
fig4(agg, out)
|
||
fig5(agg, out)
|
||
print(f"Saved figures into {out}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|