Working-set figure: extend left panel to ~50 nodes
Include T=600s/1800s points so the diminishing-returns tail is visible: 14 -> 52 nodes buys only +6pp APC (74%->79.8%), still under the 80.4% ceiling that oracle/LRU reaches at 14 nodes. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Binary file not shown.
|
Before Width: | Height: | Size: 190 KiB After Width: | Height: | Size: 193 KiB |
@@ -166,12 +166,12 @@ def plot(ws, hw, block_bytes, label, out_path):
|
|||||||
ceil = ws["apc_ceiling"] * 100
|
ceil = ws["apc_ceiling"] * 100
|
||||||
oracle_nodes = ws["oracle_peak_blocks"] * bgb / pool
|
oracle_nodes = ws["oracle_peak_blocks"] * bgb / pool
|
||||||
|
|
||||||
# operating points up to the ceiling: beyond oracle, TTL is strictly worse, so drop.
|
# all operating points, out to the largest retention window (~50 nodes)
|
||||||
rows = [r for r in ws["taus"] if r["tau"] <= 300]
|
rows = list(ws["taus"])
|
||||||
nodes = np.array([r["peak_blocks"] * bgb / pool for r in rows])
|
nodes = np.array([r["peak_blocks"] * bgb / pool for r in rows])
|
||||||
apc = np.array([r["apc"] * 100 for r in rows])
|
apc = np.array([r["apc"] * 100 for r in rows])
|
||||||
tau = np.array([r["tau"] for r in rows])
|
XMAX_L = 53 # left panel x-axis (nodes), shows up to T=1800s (~52 nodes)
|
||||||
XMAX = 16
|
XMAX_R = 16 # right panel y-axis (nodes)
|
||||||
|
|
||||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
|
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
|
||||||
|
|
||||||
@@ -182,30 +182,33 @@ def plot(ws, hw, block_bytes, label, out_path):
|
|||||||
ax1.scatter([1], [apc_at_1], s=90, facecolors="none", edgecolors="#ff7f0e",
|
ax1.scatter([1], [apc_at_1], s=90, facecolors="none", edgecolors="#ff7f0e",
|
||||||
lw=2, zorder=6)
|
lw=2, zorder=6)
|
||||||
ax1.annotate(f"1 node -> ~{apc_at_1:.0f}% APC\n(TTL model; real LRU higher)",
|
ax1.annotate(f"1 node -> ~{apc_at_1:.0f}% APC\n(TTL model; real LRU higher)",
|
||||||
(1, apc_at_1), textcoords="offset points", xytext=(12, -2),
|
(1, apc_at_1), textcoords="offset points", xytext=(14, 8),
|
||||||
fontsize=9, color="#ff7f0e", va="top")
|
fontsize=9, color="#ff7f0e", va="bottom")
|
||||||
# label the well-separated decision-zone points
|
# label the well-separated decision-zone points
|
||||||
for r, x, y in zip(rows, nodes, apc):
|
for r, x, y in zip(rows, nodes, apc):
|
||||||
if x >= 1.5:
|
if x >= 1.5:
|
||||||
ax1.annotate(f"keep {r['tau']:g}s reuse", (x, y),
|
ax1.annotate(f"{r['tau']:g}s", (x, y),
|
||||||
textcoords="offset points", xytext=(6, 6), fontsize=8.5)
|
textcoords="offset points", xytext=(5, 6), fontsize=9)
|
||||||
ax1.annotate("T<=10s reuse:\nall < 1.4 nodes", (0.5, 22), fontsize=8.5,
|
ax1.annotate("T<=10s reuse:\nall < 1.4 nodes", (1.5, 18), fontsize=8.5,
|
||||||
color="#1f77b4", ha="left")
|
color="#1f77b4", ha="left")
|
||||||
|
# diminishing returns past the oracle point
|
||||||
|
ax1.annotate("diminishing returns:\n14 -> 52 nodes buys only +6pp",
|
||||||
|
(30, 64), fontsize=9, color="#555", ha="center")
|
||||||
# budget + ceiling
|
# budget + ceiling
|
||||||
ax1.axvspan(0, 1, color="#2ca02c", alpha=.08)
|
ax1.axvspan(0, 1, color="#2ca02c", alpha=.08)
|
||||||
ax1.axvline(1, ls="--", color="#2ca02c", lw=1.8)
|
ax1.axvline(1, ls="--", color="#2ca02c", lw=1.8)
|
||||||
ax1.text(1.05, 96, "1 B300 node (your budget)", color="#2ca02c", fontsize=9, va="top")
|
ax1.text(1.6, 96, "1 B300 node (your budget)", color="#2ca02c", fontsize=9, va="top")
|
||||||
ax1.scatter([oracle_nodes], [ceil], marker="*", s=340, color="#d62728", zorder=7)
|
ax1.scatter([oracle_nodes], [ceil], marker="*", s=340, color="#d62728", zorder=7)
|
||||||
ax1.annotate(f"ceiling {ceil:.1f}%\noracle: {oracle_nodes:.0f} nodes",
|
ax1.annotate(f"ceiling {ceil:.1f}% — oracle/LRU\nreaches it at {oracle_nodes:.0f} nodes",
|
||||||
(oracle_nodes, ceil), textcoords="offset points", xytext=(-10, -8),
|
(oracle_nodes, ceil), textcoords="offset points", xytext=(12, -4),
|
||||||
fontsize=9, color="#d62728", ha="right", va="top")
|
fontsize=9, color="#d62728", ha="left", va="top")
|
||||||
ax1.axhline(ceil, ls=":", color="#d62728", alpha=.5)
|
ax1.axhline(ceil, ls=":", color="#d62728", alpha=.5)
|
||||||
ax1.set_xlim(0, XMAX); ax1.set_ylim(0, 100)
|
ax1.set_xlim(0, XMAX_L); ax1.set_ylim(0, 100)
|
||||||
ax1.set_xticks(range(0, XMAX + 1, 2)); ax1.set_xticks(range(0, XMAX + 1), minor=True)
|
ax1.set_xticks(range(0, 51, 10)); ax1.set_xticks(range(0, XMAX_L, 5), minor=True)
|
||||||
ax1.set_xlabel(f"# nodes of GPU HBM needed (1 node = {gpr}x {hw['gpu']} = {pool:.0f} GB KV)")
|
ax1.set_xlabel(f"# nodes of GPU HBM needed (1 node = {gpr}x {hw['gpu']} = {pool:.0f} GB KV)")
|
||||||
ax1.set_ylabel("Prefix-cache hit rate (APC %)")
|
ax1.set_ylabel("Prefix-cache hit rate (APC %)")
|
||||||
ax1.set_title("Benefit vs cost: APC per cluster size", fontweight="bold")
|
ax1.set_title("Benefit vs cost: APC per cluster size", fontweight="bold")
|
||||||
ax1.grid(alpha=.3); ax1.grid(alpha=.15, which="minor"); ax1.legend(loc="center right")
|
ax1.grid(alpha=.3); ax1.grid(alpha=.15, which="minor"); ax1.legend(loc="lower right")
|
||||||
|
|
||||||
# ===== panel 2: working set W(t) over time (steady -> peak ~ median) =====
|
# ===== panel 2: working set W(t) over time (steady -> peak ~ median) =====
|
||||||
apc_of = {r["tau"]: r["apc"] * 100 for r in ws["taus"]}
|
apc_of = {r["tau"]: r["apc"] * 100 for r in ws["taus"]}
|
||||||
@@ -221,7 +224,7 @@ def plot(ws, hw, block_bytes, label, out_path):
|
|||||||
ax2.axhline(oracle_nodes, ls="--", color="#d62728", lw=1.6, alpha=.8)
|
ax2.axhline(oracle_nodes, ls="--", color="#d62728", lw=1.6, alpha=.8)
|
||||||
ax2.text(t_min.max(), oracle_nodes, " ceiling: 14 nodes", color="#d62728",
|
ax2.text(t_min.max(), oracle_nodes, " ceiling: 14 nodes", color="#d62728",
|
||||||
fontsize=8.5, va="center")
|
fontsize=8.5, va="center")
|
||||||
ax2.set_ylim(0, XMAX); ax2.set_yticks(range(0, XMAX + 1, 2))
|
ax2.set_ylim(0, XMAX_R); ax2.set_yticks(range(0, XMAX_R + 1, 2))
|
||||||
ax2.set_xlim(0, t_min.max())
|
ax2.set_xlim(0, t_min.max())
|
||||||
ax2.set_xlabel("wall-clock time into the trace (min)")
|
ax2.set_xlabel("wall-clock time into the trace (min)")
|
||||||
ax2.set_ylabel("# nodes of GPU HBM resident (W(t))")
|
ax2.set_ylabel("# nodes of GPU HBM resident (W(t))")
|
||||||
|
|||||||
Reference in New Issue
Block a user