Working-set figure: right panel = W(t) time series
Replace the (redundant) nodes-vs-T cost curve with the working-set W(t) over wall-clock time for T=2/30/300s. Shows footprint is steady (peak ~ median) after a short warm-up, so peak-based sizing is sound; the 300s curve hugs the 14-node ceiling throughout. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -105,8 +105,10 @@ def _series(starts, ends, grid):
|
||||
return np.searchsorted(s, grid, side="right") - np.searchsorted(e, grid, side="right")
|
||||
|
||||
|
||||
def compute_working_set(ids, ts, taus):
|
||||
"""Return dict with appearance stats + per-tau Denning peaks + oracle/all."""
|
||||
def compute_working_set(ids, ts, taus, series_taus=()):
|
||||
"""Return dict with appearance stats + per-tau Denning peaks + oracle/all.
|
||||
|
||||
For each T in series_taus, also return the full W(t) time series on `grid`."""
|
||||
A = len(ids)
|
||||
order = np.lexsort((ts, ids))
|
||||
ids_s, ts_s = ids[order], ts[order]
|
||||
@@ -127,6 +129,7 @@ def compute_working_set(ids, ts, taus):
|
||||
oracle_peak = _sweep_peak(first[seen], last[seen])
|
||||
|
||||
rows = []
|
||||
series = {}
|
||||
for T in taus:
|
||||
enter = ts_s[prev_gap > T]
|
||||
exit_ = ts_s[next_gap > T] + T
|
||||
@@ -138,11 +141,15 @@ def compute_working_set(ids, ts, taus):
|
||||
"p50_blocks": float(np.percentile(ser, 50)),
|
||||
"apc": float((prev_gap <= T).sum() / A),
|
||||
})
|
||||
if T in series_taus:
|
||||
series[T] = ser
|
||||
return {
|
||||
"A": A, "n_unique": n_unique, "n_reuse": A - n_unique,
|
||||
"apc_ceiling": (A - n_unique) / A,
|
||||
"oracle_peak_blocks": oracle_peak,
|
||||
"span": float(ts.max() - ts.min()),
|
||||
"grid_s": grid - grid.min(),
|
||||
"series": series,
|
||||
"taus": rows,
|
||||
}
|
||||
|
||||
@@ -200,28 +207,26 @@ def plot(ws, hw, block_bytes, label, out_path):
|
||||
ax1.set_title("Benefit vs cost: APC per cluster size", fontweight="bold")
|
||||
ax1.grid(alpha=.3); ax1.grid(alpha=.15, which="minor"); ax1.legend(loc="center right")
|
||||
|
||||
# ===== panel 2: cost -- nodes needed to retain T seconds of reuse =====
|
||||
ax2.plot(tau, nodes, "s-", color="#1f77b4", lw=2, ms=7, zorder=4)
|
||||
ax2.axhline(1, ls="--", color="#2ca02c", lw=1.8, label="1 node (your budget)")
|
||||
ax2.axhline(oracle_nodes, ls=":", color="#d62728", lw=1.8,
|
||||
label=f"full ceiling = {oracle_nodes:.0f} nodes")
|
||||
# where the curve crosses the 1-node budget
|
||||
tau_at_1 = float(np.interp(1.0, nodes, tau))
|
||||
ax2.scatter([tau_at_1], [1], s=90, facecolors="none", edgecolors="#ff7f0e", lw=2, zorder=6)
|
||||
ax2.annotate(f"1 node only retains\n~{tau_at_1:.0f}s of reuse", (tau_at_1, 1),
|
||||
textcoords="offset points", xytext=(8, 14), fontsize=9, color="#ff7f0e")
|
||||
for r, x, y in zip(rows, tau, nodes):
|
||||
if y >= 1.5:
|
||||
ax2.annotate(f"{r['tau']:g}s", (x, y), textcoords="offset points",
|
||||
xytext=(4, 5), fontsize=8.5)
|
||||
ax2.set_xscale("log")
|
||||
ax2.set_xticks([1, 2, 5, 10, 30, 60, 300])
|
||||
ax2.set_xticklabels(["1s", "2s", "5s", "10s", "30s", "60s", "300s"])
|
||||
# ===== panel 2: working set W(t) over time (steady -> peak ~ median) =====
|
||||
apc_of = {r["tau"]: r["apc"] * 100 for r in ws["taus"]}
|
||||
t_min = ws["grid_s"] / 60.0 # minutes
|
||||
colors = {2: "#2ca02c", 30: "#ff7f0e", 300: "#1f77b4"}
|
||||
for T, ser in sorted(ws["series"].items()):
|
||||
y = ser * bgb / pool
|
||||
c = colors.get(T, "#777")
|
||||
ax2.plot(t_min, y, lw=1.8, color=c, label=f"keep {T:g}s reuse (APC {apc_of[T]:.0f}%)")
|
||||
ax2.axhline(float(np.median(y)), ls=":", color=c, alpha=.6, lw=1)
|
||||
ax2.axhline(1, ls="--", color="#2ca02c", lw=1.6, alpha=.8)
|
||||
ax2.text(t_min.max(), 1, " 1-node budget", color="#2ca02c", fontsize=8.5, va="center")
|
||||
ax2.axhline(oracle_nodes, ls="--", color="#d62728", lw=1.6, alpha=.8)
|
||||
ax2.text(t_min.max(), oracle_nodes, " ceiling: 14 nodes", color="#d62728",
|
||||
fontsize=8.5, va="center")
|
||||
ax2.set_ylim(0, XMAX); ax2.set_yticks(range(0, XMAX + 1, 2))
|
||||
ax2.set_xlabel("retention window T (how long-idle a session's KV we keep)")
|
||||
ax2.set_ylabel("# nodes of GPU HBM needed")
|
||||
ax2.set_title("Cost: nodes needed to retain T-seconds of reuse", fontweight="bold")
|
||||
ax2.grid(alpha=.3, which="both"); ax2.legend(loc="upper left", fontsize=9)
|
||||
ax2.set_xlim(0, t_min.max())
|
||||
ax2.set_xlabel("wall-clock time into the trace (min)")
|
||||
ax2.set_ylabel("# nodes of GPU HBM resident (W(t))")
|
||||
ax2.set_title("Working set over time (flat -> peak ~ median)", fontweight="bold")
|
||||
ax2.grid(alpha=.3); ax2.legend(loc="center right", fontsize=9)
|
||||
|
||||
fig.suptitle(label, fontsize=13, fontweight="bold")
|
||||
fig.tight_layout(rect=[0, 0, 1, 0.97])
|
||||
@@ -258,8 +263,9 @@ def main():
|
||||
hw = {"gpus_per_replica": gpus_per_replica, "kv_pool_gb": kv_pool_gb, "gpu": a.gpu}
|
||||
|
||||
taus = [1, 2, 5, 10, 30, 60, 300, 600, 1800]
|
||||
series_taus = [2, 30, 300] # W(t) lines drawn in panel 2
|
||||
n, ids, ts = load_trace(a.trace, a.min_ts, a.max_ts)
|
||||
ws = compute_working_set(ids, ts, taus)
|
||||
ws = compute_working_set(ids, ts, taus, series_taus)
|
||||
|
||||
label = a.label or f"{model['name']} {a.gpu} TP{a.tp}" + (f" EP{a.ep}" if a.ep else "")
|
||||
print("=" * 84)
|
||||
|
||||
Reference in New Issue
Block a user