Files
agentic-kvc/analysis/characterization/render_window1_figures.py
Gahow Wang b7902061d1 Window 1 analysis: APC upper bound, B2 window-overlap, figure renderer
Three CPU-only analysis pieces that turn raw Window 1 artifacts into
publishable numbers and figures.

scripts/compute_apc_upper_bound.py
  Block-level trie walk over hash_ids to compute the theoretical APC
  ceiling on a trace, decomposed into intra-session / any-session /
  shared-prefix-only. Gives a fixed reference for what each routing
  policy could *possibly* achieve. w600 result: 79.6% intra-session,
  80.3% any-session, 0.1% shared-prefix.

analysis/characterization/b2_sweep_analysis.py (rewrite)
  Previous version used joined_analysis.interference_index() which
  labeled overlap = "any prefill in any other request during this
  decode". With short-prompt decode load this is always true
  (everyone's prefill overlaps everyone else's decode); n_overlap
  was 239/240 even in the different-worker control.

  New version labels overlap iff the decode's [t_first_token, t_finish]
  intersects an actual large *injection* window, computed from the
  cell's "prefill"-tagged metric rows. Different-worker control now
  cleanly sits at idx ≈ 1.0, same-worker scales monotonically.

analysis/characterization/render_window1_figures.py
  Renders 8 PNGs from the result JSONs: B3 latency / APC vs ceiling
  / APC vs hotspot scatter / per-worker TTFT / failure breakdown,
  B2 TPOT and TTFT curves (overlap vs clean and idx), reuse
  decomposition, KV footprint.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-25 23:24:54 +08:00

304 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Render PNG figures for Window 1 results (B1', B2, B3).
Inputs (all expected under <results-dir>):
- b3_policy_comparison.json (per-policy table)
- b2_sweep_summary.json (per-cell B2 sweep)
- apc_upper_w600.json (theoretical bounds)
- lmetric_reuse.json (intra/cross/shared decomp)
- kv_footprint_summary.json (full trace KV stats)
Outputs (under <out-dir>):
- fig_b3_apc_vs_hotspot.png
- fig_b3_latency_bars.png
- fig_b3_apc_vs_upper.png
- fig_b3_failure_breakdown.png
- fig_b3_per_worker_ttft_p90.png
- fig_b2_tpot_vs_prefill.png
- fig_b2_ttft_vs_prefill.png
- fig_reuse_decomposition.png
- fig_kv_footprint_cdf.png
"""
from __future__ import annotations
import argparse
import json
from pathlib import Path
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
POLICY_ORDER = ["lmetric", "load_only", "sticky", "unified", "capped"]
POLICY_COLOR = {
"lmetric": "#1f77b4",
"load_only": "#ff7f0e",
"sticky": "#d62728",
"unified": "#2ca02c",
"capped": "#9467bd",
}
def _load(results_dir: Path, name: str) -> dict:
return json.loads((results_dir / name).read_text())
def fig_b3_apc_vs_hotspot(comp: dict, upper: dict, out: Path) -> None:
upper_intra = upper["apc_upper_intra_session"]
fig, ax = plt.subplots(figsize=(6, 4.5))
for r in comp["rows"]:
pol = r["policy"]
ax.scatter(r["apc_ratio"] * 100, r["hotspot_index_ttft_p90"],
s=180, color=POLICY_COLOR.get(pol, "gray"), label=pol,
edgecolors="black", linewidths=0.5)
ax.annotate(pol, (r["apc_ratio"] * 100, r["hotspot_index_ttft_p90"]),
xytext=(7, 7), textcoords="offset points",
fontsize=9)
ax.axvline(upper_intra * 100, linestyle="--", color="gray", alpha=0.6,
label=f"intra-session APC upper {upper_intra * 100:.1f}%")
ax.set_xlabel("APC achieved (%)")
ax.set_ylabel("hotspot_index = max(worker TTFT p90) / median")
ax.set_title("B3: APC vs hot-spot tradeoff across policies")
ax.grid(alpha=0.3)
fig.tight_layout()
fig.savefig(out, dpi=120)
plt.close(fig)
def fig_b3_latency_bars(comp: dict, out: Path) -> None:
by = {r["policy"]: r for r in comp["rows"]}
pols = [p for p in POLICY_ORDER if p in by]
metrics = [("TTFT p90 (s)", "ttft_p90_s"),
("TPOT p90 (ms)", "tpot_p90_s"),
("E2E p90 (s)", "e2e_p90_s")]
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
for ax, (label, key) in zip(axes, metrics):
vals = [by[p][key] * (1000 if "TPOT" in label else 1) for p in pols]
ax.bar(pols, vals, color=[POLICY_COLOR.get(p, "gray") for p in pols],
edgecolor="black", linewidth=0.5)
ax.set_title(label)
ax.tick_params(axis="x", rotation=20)
for i, v in enumerate(vals):
ax.text(i, v, f"{v:.1f}", ha="center", va="bottom", fontsize=9)
ax.grid(alpha=0.3, axis="y")
fig.suptitle("B3 headline latencies per policy")
fig.tight_layout()
fig.savefig(out, dpi=120)
plt.close(fig)
def fig_b3_apc_vs_upper(comp: dict, upper: dict, out: Path) -> None:
by = {r["policy"]: r for r in comp["rows"]}
pols = [p for p in POLICY_ORDER if p in by]
achieved = [by[p]["apc_ratio"] * 100 for p in pols]
fig, ax = plt.subplots(figsize=(6.5, 4))
bars = ax.bar(pols, achieved,
color=[POLICY_COLOR.get(p, "gray") for p in pols],
edgecolor="black", linewidth=0.5)
ax.axhline(upper["apc_upper_intra_session"] * 100, linestyle="--",
color="black", alpha=0.7,
label=f"intra-session ceiling {upper['apc_upper_intra_session'] * 100:.1f}%")
ax.axhline(upper["apc_upper_any_session"] * 100, linestyle=":",
color="darkgray", alpha=0.7,
label=f"any-session ceiling {upper['apc_upper_any_session'] * 100:.1f}%")
for b, v in zip(bars, achieved):
ax.text(b.get_x() + b.get_width() / 2, v + 1, f"{v:.1f}%",
ha="center", fontsize=9)
ax.set_ylim(0, 100)
ax.set_ylabel("APC ratio (%)")
ax.set_title("B3: APC achieved vs theoretical ceiling")
ax.legend(loc="upper right", fontsize=9)
ax.grid(alpha=0.3, axis="y")
fig.tight_layout()
fig.savefig(out, dpi=120)
plt.close(fig)
def fig_b3_failure_breakdown(comp: dict, out: Path) -> None:
by = {r["policy"]: r for r in comp["rows"]}
pols = [p for p in POLICY_ORDER if p in by]
causes = ["same_worker_prefill_overlap", "hot_worker_queue",
"cache_miss_large_append", "high_kv_occupancy", "unknown"]
cause_color = {
"same_worker_prefill_overlap": "#d62728",
"hot_worker_queue": "#ff7f0e",
"cache_miss_large_append": "#1f77b4",
"high_kv_occupancy": "#8c564b",
"unknown": "#7f7f7f",
}
fig, ax = plt.subplots(figsize=(7, 4.5))
bottom = [0.0] * len(pols)
for c in causes:
vals = [(by[p].get("failure_counts") or {}).get(c, 0) for p in pols]
ax.bar(pols, vals, bottom=bottom, label=c.replace("_", " "),
color=cause_color[c], edgecolor="black", linewidth=0.3)
bottom = [a + b for a, b in zip(bottom, vals)]
for i, total in enumerate(bottom):
ax.text(i, total + 3, f"n={int(total)}", ha="center", fontsize=9)
ax.set_ylabel("slow request count (TTFT > 2× p90 threshold)")
ax.set_title("B3: slow-request cause breakdown per policy")
ax.legend(fontsize=8, loc="upper right")
ax.grid(alpha=0.3, axis="y")
fig.tight_layout()
fig.savefig(out, dpi=120)
plt.close(fig)
def fig_b3_per_worker_ttft(results_dir: Path, comp: dict, out: Path) -> None:
"""Per-worker TTFT p90 grouped bars; reads each policy's hotspot_index.json."""
by = {r["policy"]: r for r in comp["rows"]}
pols = [p for p in POLICY_ORDER if p in by]
fig, axes = plt.subplots(1, len(pols), figsize=(3 * len(pols), 4),
sharey=True)
if len(pols) == 1:
axes = [axes]
for ax, pol in zip(axes, pols):
path = results_dir / f"per_worker_{pol}.json"
if not path.exists():
ax.text(0.5, 0.5, f"{pol}: no data", ha="center", va="center",
transform=ax.transAxes)
continue
per = json.loads(path.read_text()).get("per_worker_ttft_p90_s") or {}
items = sorted(per.items(), key=lambda kv: int(kv[0].rsplit(":", 1)[1]))
labels = [f"e{int(k.rsplit(':', 1)[1]) - 8000}" for k, _ in items]
vals = [v for _, v in items]
ax.bar(labels, vals, color=POLICY_COLOR.get(pol, "gray"),
edgecolor="black", linewidth=0.5)
for i, v in enumerate(vals):
ax.text(i, v, f"{v:.1f}", ha="center", va="bottom", fontsize=8)
ax.set_title(f"{pol}\nhotspot={by[pol]['hotspot_index_ttft_p90']:.2f}",
fontsize=10)
ax.tick_params(axis="x", labelsize=8)
ax.grid(alpha=0.3, axis="y")
axes[0].set_ylabel("worker TTFT p90 (s)")
fig.suptitle("B3 per-worker TTFT p90 distribution")
fig.tight_layout()
fig.savefig(out, dpi=120)
plt.close(fig)
def fig_b2_curves(b2: dict, out_tpot: Path, out_ttft: Path) -> None:
sizes = sorted({r["prefill_size"] for r in b2["rows"]})
by_var = {"same": {}, "different": {}}
for r in b2["rows"]:
by_var[r["variant"]][r["prefill_size"]] = r
for name, key, ylabel, ymax_log, out in [
("TPOT", "tpot_p90", "TPOT p90 (ms)", True, out_tpot),
("TTFT", "ttft_p90", "TTFT p90 (s)", True, out_ttft),
]:
fig, axes = plt.subplots(1, 2, figsize=(11, 4))
ax_abs, ax_idx = axes
for variant in ("different", "same"):
xs, ys_o, ys_c, idxs = [], [], [], []
for sz in sizes:
r = by_var[variant].get(sz)
if not r: continue
ov = r.get(f"{key}_overlap_s")
cl = r.get(f"{key}_clean_s")
if ov is None or cl is None: continue
xs.append(sz)
scale = 1000 if name == "TPOT" else 1.0
ys_o.append(ov * scale)
ys_c.append(cl * scale)
idxs.append(ov / cl)
color = "#d62728" if variant == "same" else "#1f77b4"
ax_abs.plot(xs, ys_o, "o-", color=color,
label=f"{variant} (overlap)")
ax_abs.plot(xs, ys_c, "s--", color=color, alpha=0.5,
label=f"{variant} (clean)")
ax_idx.plot(xs, idxs, "o-", color=color, label=variant,
linewidth=2)
ax_abs.set_xscale("log", base=2)
ax_abs.set_yscale("log")
ax_abs.set_xlabel("prefill injection size (tokens)")
ax_abs.set_ylabel(ylabel + " (log)")
ax_abs.set_title(f"B2 {name} absolute (overlap vs clean)")
ax_abs.legend(fontsize=8)
ax_abs.grid(alpha=0.3, which="both")
ax_idx.set_xscale("log", base=2)
if ymax_log:
ax_idx.set_yscale("log")
ax_idx.axhline(1.0, color="black", linestyle=":", alpha=0.5)
ax_idx.set_xlabel("prefill injection size (tokens)")
ax_idx.set_ylabel(f"{name} idx = overlap / clean")
ax_idx.set_title(f"B2 {name} interference index (same vs different worker)")
ax_idx.legend()
ax_idx.grid(alpha=0.3, which="both")
fig.tight_layout()
fig.savefig(out, dpi=120)
plt.close(fig)
def fig_reuse_decomposition(reuse: dict, out: Path) -> None:
fr = reuse.get("fractions") or {}
labels = ["intra-session", "cross-session", "shared-prefix", "unclassified"]
vals = [fr.get("intra", 0), fr.get("cross", 0),
fr.get("shared", 0), fr.get("unclassified", 0)]
colors = ["#2ca02c", "#ff7f0e", "#9467bd", "#7f7f7f"]
fig, ax = plt.subplots(figsize=(6, 3))
bottom = 0.0
for label, v, c in zip(labels, vals, colors):
ax.barh(["lmetric run"], [v], left=[bottom], color=c, edgecolor="black",
linewidth=0.5, label=f"{label} ({v * 100:.1f}%)")
bottom += v
ax.set_xlabel("fraction of cached_tokens")
ax.set_xlim(0, 1)
ax.set_title("Real reuse decomposition (w600 lmetric run)")
ax.legend(fontsize=9, loc="lower right")
ax.grid(alpha=0.3, axis="x")
fig.tight_layout()
fig.savefig(out, dpi=120)
plt.close(fig)
def fig_kv_footprint_cdf(kv: dict, out: Path) -> None:
s = kv.get("kv_mib_per_request") or {}
vals = [s.get(k) for k in ("p50", "p90", "p95", "p99")]
labels = ["p50", "p90", "p95", "p99"]
fig, ax = plt.subplots(figsize=(6, 3.5))
ax.bar(labels, vals, color="#1f77b4", edgecolor="black", linewidth=0.5)
for i, v in enumerate(vals):
ax.text(i, v, f"{v:.0f} MiB", ha="center", va="bottom", fontsize=9)
ax.axhline(95 * 1024, color="red", linestyle="--", alpha=0.5,
label="H20 ~95 GiB usable")
ax.set_ylabel("KV bytes per request (MiB)")
ax.set_title("B1' Per-request KV footprint (Qwen3-Coder-30B-A3B, 98304 B/token)")
ax.legend()
ax.grid(alpha=0.3, axis="y")
fig.tight_layout()
fig.savefig(out, dpi=120)
plt.close(fig)
def main() -> None:
p = argparse.ArgumentParser()
p.add_argument("--results-dir", type=Path, required=True)
p.add_argument("--out-dir", type=Path, required=True)
args = p.parse_args()
args.out_dir.mkdir(parents=True, exist_ok=True)
comp = _load(args.results_dir, "b3_policy_comparison.json")
upper = _load(args.results_dir, "apc_upper_w600.json")
b2 = _load(args.results_dir, "b2_sweep_summary.json")
reuse = _load(args.results_dir, "lmetric_reuse.json")
kv = _load(args.results_dir, "kv_footprint_summary.json")
fig_b3_apc_vs_hotspot(comp, upper, args.out_dir / "fig_b3_apc_vs_hotspot.png")
fig_b3_latency_bars(comp, args.out_dir / "fig_b3_latency_bars.png")
fig_b3_apc_vs_upper(comp, upper, args.out_dir / "fig_b3_apc_vs_upper.png")
fig_b3_failure_breakdown(comp, args.out_dir / "fig_b3_failure_breakdown.png")
fig_b3_per_worker_ttft(args.results_dir, comp,
args.out_dir / "fig_b3_per_worker_ttft_p90.png")
fig_b2_curves(b2,
args.out_dir / "fig_b2_tpot_vs_prefill.png",
args.out_dir / "fig_b2_ttft_vs_prefill.png")
fig_reuse_decomposition(reuse, args.out_dir / "fig_reuse_decomposition.png")
fig_kv_footprint_cdf(kv, args.out_dir / "fig_kv_footprint_cdf.png")
print(f"wrote 8 figures to {args.out_dir}")
if __name__ == "__main__":
main()