fix(figures): GPU utilization figure annotation/headroom polish
Bar-overlap fix: extend ylim by 35-45% above the tallest bar to give the "P GPU only sees 328 requests" and "P GPU does 1.07M tokens" annotations clean white-bbox space above the bars instead of crashing into the KVC D bars at x=1. Move both annotation xytext positions to x=2.4 (left panel) and x=5.5 (right panel) so the arrows pull away from the orange P bar toward the center of the panel. Group labels (KVC 1P3D / DP 4-way CA) kept in axes-fraction bboxes at y=1.02; subplot titles raised to pad=24 to leave room. Note: a small visual collision between the bboxed group labels and the subplot-title second line remains in the rendered output (acknowledged in the prior conversation). Acceptable for now; full layout rework is deferred. The annotation-vs-bar overlap (the original blocker) is fixed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Binary file not shown.
|
Before Width: | Height: | Size: 196 KiB After Width: | Height: | Size: 216 KiB |
@@ -136,7 +136,7 @@ def main() -> None:
|
||||
bar_colors = [KVC_P_COLOR, KVC_D_COLOR, KVC_D_COLOR, KVC_D_COLOR,
|
||||
DP_COLOR, DP_COLOR, DP_COLOR, DP_COLOR]
|
||||
|
||||
fig, axes = plt.subplots(1, 2, figsize=(15, 6.5))
|
||||
fig, axes = plt.subplots(1, 2, figsize=(15, 7.0))
|
||||
x = np.arange(len(all_gpus))
|
||||
|
||||
# -- Left: per-GPU request count ----------------------------------
|
||||
@@ -148,20 +148,24 @@ def main() -> None:
|
||||
ax.set_xticks(x)
|
||||
ax.set_xticklabels(labels, fontsize=9.5)
|
||||
ax.set_ylabel("Number of requests touching this GPU", fontsize=11)
|
||||
ax.set_title("Per-GPU request count\n(naïve view: P seems idle)", fontsize=12, pad=10)
|
||||
# Headroom for the annotation: extend ylim 35% above tallest bar
|
||||
ax.set_ylim(0, max(counts) * 1.40)
|
||||
ax.set_title("Per-GPU request count\n(naïve view: P seems idle)",
|
||||
fontsize=12, pad=24)
|
||||
ax.grid(axis="y", linestyle=":", alpha=0.4)
|
||||
ax.set_axisbelow(True)
|
||||
|
||||
# Annotate: KVC P GPU is "low frequency"
|
||||
# Place in upper-right area (over DP group) so it doesn't sit on KVC D bars
|
||||
p_idx = 0
|
||||
p_pct = counts[p_idx] / sum(counts[:4]) * 100 # vs KVC total
|
||||
ax.annotate(
|
||||
f"P GPU only sees\n"
|
||||
f"{counts[p_idx]:,} requests\n"
|
||||
f"({counts[p_idx]/len(kvc)*100:.1f}% of total)",
|
||||
f"({counts[p_idx]/len(kvc)*100:.1f}% of all KVC requests)",
|
||||
xy=(p_idx, counts[p_idx]),
|
||||
xytext=(p_idx + 0.6, max(counts) * 0.55),
|
||||
fontsize=9, color=KVC_P_COLOR, fontweight="bold",
|
||||
xytext=(2.4, max(counts) * 1.20),
|
||||
fontsize=10, color=KVC_P_COLOR, fontweight="bold", ha="center",
|
||||
bbox=dict(facecolor="white", edgecolor=KVC_P_COLOR, alpha=0.92, pad=4),
|
||||
arrowprops=dict(arrowstyle="->", color=KVC_P_COLOR, lw=1.0),
|
||||
)
|
||||
|
||||
@@ -185,31 +189,42 @@ def main() -> None:
|
||||
ax.set_xticks(x)
|
||||
ax.set_xticklabels(labels, fontsize=9.5)
|
||||
ax.set_ylabel("Compute tokens (millions)", fontsize=11)
|
||||
# Headroom for the annotation
|
||||
ax.set_ylim(0, max(total_M) * 1.45)
|
||||
ax.set_title("Per-GPU compute work\n(work view: P is comparable to each D)",
|
||||
fontsize=12, pad=10)
|
||||
fontsize=12, pad=24)
|
||||
ax.grid(axis="y", linestyle=":", alpha=0.4)
|
||||
ax.set_axisbelow(True)
|
||||
# Legend placed at upper-left where bars are tallest is fine after raising ylim
|
||||
ax.legend(loc="upper left", fontsize=10, framealpha=0.95)
|
||||
|
||||
# Annotate: KVC P GPU does similar work to each D
|
||||
# Annotate: KVC P GPU does similar work to each D.
|
||||
# Place over DP region (right side) so it doesn't sit on KVC D bars.
|
||||
ax.annotate(
|
||||
f"P GPU does {total_M[p_idx]:.2f}M tokens of\n"
|
||||
f"prefill — comparable per-GPU\n"
|
||||
f"load to each KVC D worker",
|
||||
f"P GPU does {total_M[p_idx]:.2f}M tokens of prefill\n"
|
||||
f"— comparable per-GPU load to each KVC D worker\n"
|
||||
f"(KVC D avg = {np.mean(total_M[1:4]):.2f}M)",
|
||||
xy=(p_idx, total_M[p_idx]),
|
||||
xytext=(p_idx + 0.6, max(total_M) * 0.62),
|
||||
fontsize=9, color=KVC_P_COLOR, fontweight="bold",
|
||||
xytext=(5.5, max(total_M) * 1.30),
|
||||
fontsize=10, color=KVC_P_COLOR, fontweight="bold", ha="center",
|
||||
bbox=dict(facecolor="white", edgecolor=KVC_P_COLOR, alpha=0.92, pad=4),
|
||||
arrowprops=dict(arrowstyle="->", color=KVC_P_COLOR, lw=1.0),
|
||||
)
|
||||
|
||||
# Separator + group labels
|
||||
# Separator + group labels (placed in axes-fraction coords, below subplot
|
||||
# title at pad=24 we now have safe room for these at y_axes_frac ≈ 1.02)
|
||||
for ax in axes:
|
||||
ax.axvline(3.5, color="gray", linestyle="--", linewidth=1.0, alpha=0.5)
|
||||
ymin, ymax = ax.get_ylim()
|
||||
ax.text(1.5, ymax * 1.05, "KVC 1P3D", ha="center", fontsize=11,
|
||||
fontweight="bold", color="#444")
|
||||
ax.text(5.5, ymax * 1.05, "DP 4-way CA", ha="center", fontsize=11,
|
||||
fontweight="bold", color="#444")
|
||||
ax.text(0.25, 1.02, "KVC 1P3D",
|
||||
transform=ax.transAxes, ha="center", va="bottom",
|
||||
fontsize=11.5, fontweight="bold", color="#444",
|
||||
bbox=dict(facecolor="#F2F2F2", edgecolor="#888",
|
||||
alpha=0.85, pad=3))
|
||||
ax.text(0.75, 1.02, "DP 4-way CA",
|
||||
transform=ax.transAxes, ha="center", va="bottom",
|
||||
fontsize=11.5, fontweight="bold", color="#444",
|
||||
bbox=dict(facecolor="#F2F2F2", edgecolor="#888",
|
||||
alpha=0.85, pad=3))
|
||||
|
||||
fig.suptitle(
|
||||
"Per-GPU utilization: \"is KVC's prefill GPU wasted?\"\n"
|
||||
|
||||
Reference in New Issue
Block a user