Microbench 1 plots: prefill-decode interference heatmap + lines
plot_interference.py reads the interference sweep summary (4 D × 4 P × 3 reps,
cold prefill prompts) and produces:
fig_interference_heatmap.png
TPOT p90 interference index over (D, P): 14x at D=8 P=2k → 214x at D=1 P=32k.
fig_interference_lines.png
(a) TPOT p90 during prefill vs P, log-y, one line per D + baseline dashed
(b) Cold prefill TTFT vs P (interference window length)
Confirms B2 finding: cold prefill on the same worker stalls overlapping
decodes for 14-214x baseline TPOT. The interference window grows linearly
with P (from ~140ms at 2k to ~4.6s at 32k) and is essentially independent
of decode batch size — prefill compute time dominates.
This commit is contained in:
BIN
microbench/interference/results/fig_interference_heatmap.png
Normal file
BIN
microbench/interference/results/fig_interference_heatmap.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 79 KiB |
BIN
microbench/interference/results/fig_interference_lines.png
Normal file
BIN
microbench/interference/results/fig_interference_lines.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 184 KiB |
49
microbench/interference/results/summary.csv
Normal file
49
microbench/interference/results/summary.csv
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
chunk_size,decode_batch_size,new_prefill_tokens,repetition,tpot_baseline_p50_ms,tpot_baseline_p90_ms,tpot_during_prefill_p50_ms,tpot_during_prefill_p90_ms,tpot_after_prefill_p50_ms,prefill_ttft_ms,num_tokens_during_prefill,tpot_penalty_p50_ms,tpot_penalty_ratio
|
||||||
|
8192,1,16384,0,6.0938840033486485,6.209587014745921,6.038327002897859,721.8174959998579,5.902736505959183,1551.6944919945672,7,-0.05555700045078993,0.9908831542542883
|
||||||
|
8192,1,16384,1,5.923410004470497,6.097983592189848,6.318648986052722,716.1335832090118,5.944762990111485,1538.5930379852653,7,0.39523898158222437,1.0667249069849851
|
||||||
|
8192,1,16384,2,5.858020973391831,6.100462487665936,6.006712967064232,717.0338945928962,5.880023498320952,1537.489719048608,7,0.1486919936724007,1.0253826325217656
|
||||||
|
8192,1,2048,0,5.895276990486309,6.092500360682607,6.601358996704221,101.92482299171388,5.88772798073478,138.6167829623446,3,0.7060820062179118,1.119770794036883
|
||||||
|
8192,1,2048,1,5.84050000179559,6.031724822241813,6.715002004057169,98.402955615893,5.963494011666626,135.26123203337193,3,0.874502002261579,1.149730674084877
|
||||||
|
8192,1,2048,2,5.846929037943482,6.023762607946992,6.49581803008914,98.78565319813788,5.865664483280852,139.52087302459404,3,0.6488889921456575,1.1109794539893867
|
||||||
|
8192,1,32768,0,5.815482989419252,5.98231521435082,7.905923994258046,1327.417733031325,5.914871522691101,4565.813232969958,11,2.0904410048387945,1.35946128784869
|
||||||
|
8192,1,32768,1,5.884206970222294,6.093550205696374,6.913999503012747,1289.1355098807255,5.918594019021839,4564.696346002165,12,1.0297925327904522,1.1750095701938827
|
||||||
|
8192,1,32768,2,5.844508996233344,6.036466604564339,5.949418497039005,1289.868750877213,5.996516003506258,4559.26972796442,12,0.10490950080566108,1.0179500965561474
|
||||||
|
8192,1,8192,0,5.8953959960490465,6.077604810707271,6.700786994770169,400.01660271082085,5.933274980634451,592.7710899850354,4,0.8053909987211227,1.1366135539089954
|
||||||
|
8192,1,8192,1,5.859997007064521,6.044362985994667,6.52295647887513,399.0960885945243,5.9106440166942775,587.9120359895751,4,0.6629594718106091,1.1131330734489076
|
||||||
|
8192,1,8192,2,5.852615984622389,6.077339604962617,6.839454494183883,397.37221606774267,5.842902464792132,585.4344139806926,4,0.986838509561494,1.1686149428143566
|
||||||
|
8192,2,16384,0,6.665366003289819,6.996188021730632,6.953017000341788,753.4122839861084,6.278356013353914,1542.8870850009844,16,0.28765099705196917,1.0431560692856165
|
||||||
|
8192,2,16384,1,6.215651519596577,6.432108790613711,6.437333504436538,825.8602088084448,6.321536988252774,1544.4676239858381,14,0.22168198483996093,1.0356651244267872
|
||||||
|
8192,2,16384,2,6.198290007887408,6.414996081730351,6.23783003538847,791.287462809123,6.229062011698261,1547.6729370420799,15,0.03954002750106156,1.0063791832022617
|
||||||
|
8192,2,2048,0,6.308203533990309,6.5372537123039365,7.2736715083010495,124.34246338671073,6.42395400791429,140.2357509941794,8,0.9654679743107408,1.1530495915530528
|
||||||
|
8192,2,2048,1,6.295669532846659,6.565696361940354,6.80624000960961,122.76347780134529,6.219618488103151,134.00237500900403,5,0.5105704767629504,1.081098678083265
|
||||||
|
8192,2,2048,2,6.221923045814037,6.456784618785605,6.77992551936768,121.53079549898393,6.372162490151823,133.65825096843764,6,0.5580024735536426,1.089683281108572
|
||||||
|
8192,2,32768,0,6.217204965651035,6.411483424017206,6.444861995987594,1327.605677419342,6.279057502979413,4562.765512033366,24,0.22765703033655882,1.0366172631583364
|
||||||
|
8192,2,32768,1,6.249245023354888,6.46523671457544,6.638102000579238,1327.3725654871669,6.282801507040858,4588.010042964015,26,0.38885697722435,1.0622246328590255
|
||||||
|
8192,2,32768,2,6.3047730072867125,6.511582498205826,7.032171968603507,1328.6563935107552,6.678625009953976,4575.238945020828,26,0.7273989613167942,1.1153727438681942
|
||||||
|
8192,2,8192,0,6.216273992322385,6.418060971191153,7.465646980563179,566.2100009096321,6.486822996521369,591.6664049727842,10,1.2493729882407933,1.2009842213814694
|
||||||
|
8192,2,8192,1,6.219989008968696,6.426932819886133,6.889350013807416,563.8807277253363,6.498989008832723,589.0314219868742,10,0.66936100483872,1.1076144996194621
|
||||||
|
8192,2,8192,2,6.202562013641,6.426409410778433,6.2741125002503395,506.35911329300166,6.247896992135793,589.9350599502213,12,0.07155048660933971,1.0115356342188893
|
||||||
|
8192,4,16384,0,6.581096502486616,6.813783489633352,7.222604966955259,896.0794355894913,7.27951800217852,1548.318826011382,32,0.6415084644686431,1.097477443800779
|
||||||
|
8192,4,16384,1,6.632126489421353,6.888536998303607,7.169383025029674,901.4634491875776,6.6380144853610545,1556.7272949847393,32,0.5372565356083214,1.0810081859061764
|
||||||
|
8192,4,16384,2,7.3001839919015765,7.627598161343485,7.801887986715883,895.8070753375076,7.381025032373145,1548.7530149985105,32,0.5017039948143065,1.0687248424657336
|
||||||
|
8192,4,2048,0,6.574946513865143,6.7760384699795395,8.21773149073124,122.99676946713589,7.327278988668695,141.15837798453867,16,1.6427849768660963,1.2498552609366202
|
||||||
|
8192,4,2048,1,6.539304507896304,6.827986013377085,7.7418964938260615,123.06783598614857,7.355214998824522,144.0463720355183,16,1.2025919859297574,1.183902123609263
|
||||||
|
8192,4,2048,2,6.603387999348342,6.889259471790865,8.407105488004163,123.28159245080315,7.304991508135572,139.0016739605926,12,1.8037174886558205,1.2731503114513067
|
||||||
|
8192,4,32768,0,6.574522005394101,6.81457101018168,8.637404011096805,1329.5613984868396,7.352561020525172,4582.872495986521,48,2.062882005702704,1.3137691232929485
|
||||||
|
8192,4,32768,1,6.611509481444955,6.832443462917581,6.810082995798439,1329.5692544896156,6.594637496164069,4584.32119601639,52,0.19857351435348392,1.0300345200911798
|
||||||
|
8192,4,32768,2,7.247850007843226,7.512926589697599,7.698488509049639,1330.0709595321678,7.366928504779935,4593.307823990472,52,0.4506385012064129,1.062175472825563
|
||||||
|
8192,4,8192,0,6.58076599938795,6.8358480057213455,7.33237000531517,563.1351483694743,7.409162994008511,588.1257400033064,20,0.75160400592722,1.114212236994466
|
||||||
|
8192,4,8192,1,7.294313982129097,7.611855585128069,6.714510993333533,567.65656011994,6.66655000532046,593.7537999707274,20,-0.5798029887955636,0.9205130201118203
|
||||||
|
8192,4,8192,2,7.346310012508184,7.632423576433212,7.1000695170369,562.1623511018697,6.561954505741596,589.4501690054312,20,-0.2462404954712838,0.9664810639556427
|
||||||
|
8192,8,16384,0,8.545519027393311,8.815958129707724,10.58233599178493,935.599625587929,8.964366978034377,1564.8929480230436,49,2.036816964391619,1.2383491228399874
|
||||||
|
8192,8,16384,1,8.990490983705968,9.360035922145471,9.607686952222139,929.1803135536611,9.045524493558332,1548.527927021496,49,0.617195968516171,1.0686498623528742
|
||||||
|
8192,8,16384,2,8.913785975892097,9.27077301312238,10.43433096492663,928.7156283855438,8.9906370267272,1554.4496020302176,49,1.5205449890345335,1.1705835200830426
|
||||||
|
8192,8,2048,0,8.98516149027273,9.293531806906685,9.877634001895785,123.17990500014275,8.940142986830324,138.16959800897166,21,0.8924725116230547,1.0993273757616087
|
||||||
|
8192,8,2048,1,8.52142449002713,8.787232195027173,9.08006398822181,122.78757293242961,8.527457976015285,139.2026540124789,22,0.5586394981946796,1.0655570554956477
|
||||||
|
8192,8,2048,2,8.513206499628723,8.81320737535134,9.523249987978488,123.09092239593156,8.918915002141148,138.62996903480962,22,1.0100434883497655,1.1186443073351757
|
||||||
|
8192,8,32768,0,8.519548020558432,8.800235588569194,10.859103000257164,1329.670040984638,8.971089031547308,4627.19459598884,81,2.3395549796987325,1.2746102227551481
|
||||||
|
8192,8,32768,1,8.951486961450428,9.332324599381536,9.67549899360165,1329.5295700081624,8.963910018792376,4593.522923998535,81,0.7240120321512222,1.0808817613508437
|
||||||
|
8192,8,32768,2,8.94658948527649,9.30843852693215,9.116152999922633,1329.5171548146755,8.570115023758262,4582.791887049098,85,0.16956351464614272,1.018952866332494
|
||||||
|
8192,8,8192,0,9.004380000988021,9.330628422321752,10.226367478026077,566.9469262822531,8.936587983043864,595.0289380270988,32,1.2219874770380557,1.13571034062356
|
||||||
|
8192,8,8192,1,8.510532497894019,8.806324569741264,11.104884033557028,573.6782600288279,8.955864497693256,599.6217179927044,31,2.5943515356630087,1.3048400950592687
|
||||||
|
8192,8,8192,2,8.511811000062153,8.759546309011057,9.393914020620286,564.9327670224011,8.936407044529915,594.1418160218745,33,0.8821030205581337,1.1036328250887728
|
||||||
|
158
microbench/plot_interference.py
Normal file
158
microbench/plot_interference.py
Normal file
@@ -0,0 +1,158 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Plot prefill-decode interference results (Microbench 1).
|
||||||
|
|
||||||
|
Reads interference/results/summary.csv and produces two figures:
|
||||||
|
|
||||||
|
1. fig_interference_heatmap.png
|
||||||
|
Heatmap of TPOT p90 interference index (during/baseline) over (D, P).
|
||||||
|
|
||||||
|
2. fig_interference_lines.png
|
||||||
|
Two-panel: TPOT p90 during prefill (absolute, log) and prefill TTFT,
|
||||||
|
one line per decode batch size D, x-axis = prefill tokens P.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
from collections import defaultdict
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib
|
||||||
|
matplotlib.use("Agg")
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
HERE = Path(__file__).parent
|
||||||
|
CSV = HERE / "interference/results/summary.csv"
|
||||||
|
OUT_DIR = HERE / "interference/results"
|
||||||
|
OUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# ── load + aggregate (median across reps) ────────────────────────────────────
|
||||||
|
rows = list(csv.DictReader(open(CSV)))
|
||||||
|
|
||||||
|
agg = defaultdict(list)
|
||||||
|
for r in rows:
|
||||||
|
D = int(r["decode_batch_size"])
|
||||||
|
P = int(r["new_prefill_tokens"])
|
||||||
|
if D == 0:
|
||||||
|
continue
|
||||||
|
bl_p90 = float(r["tpot_baseline_p90_ms"])
|
||||||
|
bl_p50 = float(r["tpot_baseline_p50_ms"])
|
||||||
|
dur_p90 = float(r["tpot_during_prefill_p90_ms"])
|
||||||
|
dur_p50 = float(r["tpot_during_prefill_p50_ms"])
|
||||||
|
ttft = float(r["prefill_ttft_ms"])
|
||||||
|
if bl_p90 <= 0 or dur_p90 <= 0:
|
||||||
|
continue
|
||||||
|
agg[(D, P)].append({
|
||||||
|
"bl_p50": bl_p50, "bl_p90": bl_p90,
|
||||||
|
"dur_p50": dur_p50, "dur_p90": dur_p90,
|
||||||
|
"ttft": ttft,
|
||||||
|
"idx_p90": dur_p90 / bl_p90,
|
||||||
|
"idx_p50": dur_p50 / bl_p50 if bl_p50 > 0 else 0,
|
||||||
|
})
|
||||||
|
|
||||||
|
stat = {k: {kk: float(np.median([e[kk] for e in v])) for kk in v[0]} for k, v in agg.items()}
|
||||||
|
|
||||||
|
D_VALUES = sorted({k[0] for k in stat})
|
||||||
|
P_VALUES = sorted({k[1] for k in stat})
|
||||||
|
|
||||||
|
# ── Figure 1: heatmap of interference index (TPOT p90 during / baseline) ─────
|
||||||
|
mat = np.full((len(D_VALUES), len(P_VALUES)), np.nan)
|
||||||
|
for i, D in enumerate(D_VALUES):
|
||||||
|
for j, P in enumerate(P_VALUES):
|
||||||
|
s = stat.get((D, P))
|
||||||
|
if s:
|
||||||
|
mat[i, j] = s["idx_p90"]
|
||||||
|
|
||||||
|
fig, ax = plt.subplots(figsize=(9.5, 5))
|
||||||
|
im = ax.imshow(mat, cmap="YlOrRd", aspect="auto",
|
||||||
|
norm=matplotlib.colors.LogNorm(vmin=1, vmax=mat[~np.isnan(mat)].max()))
|
||||||
|
|
||||||
|
ax.set_xticks(range(len(P_VALUES)))
|
||||||
|
ax.set_xticklabels([f"{P//1024}k" for P in P_VALUES], fontsize=11)
|
||||||
|
ax.set_yticks(range(len(D_VALUES)))
|
||||||
|
ax.set_yticklabels([f"D={D}" for D in D_VALUES], fontsize=11)
|
||||||
|
ax.set_xlabel("Cold prefill size (P tokens)", fontsize=12)
|
||||||
|
ax.set_ylabel("Decode batch size", fontsize=12)
|
||||||
|
ax.set_title("Prefill-Decode Interference Index\n"
|
||||||
|
"TPOT p90 during prefill / TPOT p90 baseline (log color)",
|
||||||
|
fontsize=13, fontweight="bold")
|
||||||
|
|
||||||
|
# annotate each cell
|
||||||
|
for i in range(len(D_VALUES)):
|
||||||
|
for j in range(len(P_VALUES)):
|
||||||
|
v = mat[i, j]
|
||||||
|
if not np.isnan(v):
|
||||||
|
txt_color = "white" if v > 50 else "black"
|
||||||
|
ax.text(j, i, f"{v:.0f}x",
|
||||||
|
ha="center", va="center",
|
||||||
|
fontsize=10, color=txt_color, fontweight="bold")
|
||||||
|
|
||||||
|
cbar = plt.colorbar(im, ax=ax, fraction=0.04, pad=0.02)
|
||||||
|
cbar.set_label("Interference index (×)", fontsize=10)
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
out_heatmap = OUT_DIR / "fig_interference_heatmap.png"
|
||||||
|
plt.savefig(out_heatmap, dpi=160, bbox_inches="tight")
|
||||||
|
print(f"Saved: {out_heatmap}")
|
||||||
|
plt.close(fig)
|
||||||
|
|
||||||
|
# ── Figure 2: lines, two panels ──────────────────────────────────────────────
|
||||||
|
fig, axes = plt.subplots(1, 2, figsize=(13, 5))
|
||||||
|
|
||||||
|
D_COLORS = {1: "#1f77b4", 2: "#2ca02c", 4: "#ff7f0e", 8: "#d62728"}
|
||||||
|
|
||||||
|
# Panel A: TPOT p90 during prefill (absolute, log y)
|
||||||
|
ax = axes[0]
|
||||||
|
for D in D_VALUES:
|
||||||
|
ys_dur = [stat.get((D, P), {}).get("dur_p90", np.nan) for P in P_VALUES]
|
||||||
|
ys_bl = [stat.get((D, P), {}).get("bl_p90", np.nan) for P in P_VALUES]
|
||||||
|
color = D_COLORS.get(D, "gray")
|
||||||
|
ax.plot(P_VALUES, ys_dur, "o-", color=color, label=f"D={D} (during prefill)",
|
||||||
|
linewidth=2, markersize=7)
|
||||||
|
ax.plot(P_VALUES, ys_bl, "s--", color=color, alpha=0.4,
|
||||||
|
label=f"D={D} (baseline)", linewidth=1, markersize=5)
|
||||||
|
|
||||||
|
ax.set_xscale("log", base=2)
|
||||||
|
ax.set_yscale("log")
|
||||||
|
ax.set_xticks(P_VALUES)
|
||||||
|
ax.set_xticklabels([f"{P//1024}k" for P in P_VALUES])
|
||||||
|
ax.set_xlabel("Cold prefill size (P tokens)", fontsize=12)
|
||||||
|
ax.set_ylabel("TPOT p90 (ms, log)", fontsize=12)
|
||||||
|
ax.set_title("Decode TPOT during prefill chunk", fontsize=12, fontweight="bold")
|
||||||
|
ax.grid(True, which="both", linestyle="--", alpha=0.4)
|
||||||
|
ax.legend(fontsize=8, loc="upper left", ncol=2)
|
||||||
|
|
||||||
|
# Panel B: prefill TTFT vs P
|
||||||
|
ax = axes[1]
|
||||||
|
for D in D_VALUES:
|
||||||
|
ys = [stat.get((D, P), {}).get("ttft", np.nan) for P in P_VALUES]
|
||||||
|
color = D_COLORS.get(D, "gray")
|
||||||
|
ax.plot(P_VALUES, ys, "o-", color=color, label=f"D={D}",
|
||||||
|
linewidth=2, markersize=7)
|
||||||
|
|
||||||
|
ax.set_xscale("log", base=2)
|
||||||
|
ax.set_xticks(P_VALUES)
|
||||||
|
ax.set_xticklabels([f"{P//1024}k" for P in P_VALUES])
|
||||||
|
ax.set_xlabel("Cold prefill size (P tokens)", fontsize=12)
|
||||||
|
ax.set_ylabel("Prefill TTFT (ms)", fontsize=12)
|
||||||
|
ax.set_title("Cold prefill duration (interference window length)",
|
||||||
|
fontsize=12, fontweight="bold")
|
||||||
|
ax.grid(True, linestyle="--", alpha=0.4)
|
||||||
|
ax.legend(fontsize=9, loc="upper left", title="Decode batch")
|
||||||
|
|
||||||
|
fig.suptitle(
|
||||||
|
"Prefill-Decode Interference · Qwen3-Coder-30B-A3B · H20 · chunk_size=8192",
|
||||||
|
fontsize=13, fontweight="bold", y=1.02)
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
out_lines = OUT_DIR / "fig_interference_lines.png"
|
||||||
|
plt.savefig(out_lines, dpi=160, bbox_inches="tight")
|
||||||
|
print(f"Saved: {out_lines}")
|
||||||
|
plt.close(fig)
|
||||||
|
|
||||||
|
# ── print summary table ──────────────────────────────────────────────────────
|
||||||
|
print(f"\n{'D':>3} {'P':>6} | {'bl_p90':>7} {'dur_p90':>8} {'idx_p90':>7} | {'ttft':>7}")
|
||||||
|
print("-" * 55)
|
||||||
|
for (D, P) in sorted(stat.keys()):
|
||||||
|
s = stat[(D, P)]
|
||||||
|
print(f"{D:>3} {P:>6} | {s['bl_p90']:>6.2f} {s['dur_p90']:>7.1f} {s['idx_p90']:>5.1f}x | {s['ttft']:>6.0f}")
|
||||||
Reference in New Issue
Block a user