Files
agentic-pd-hybrid/scripts/analysis/plot_v2_path_breakdown.py
kzlin b5af19583b docs(kvc): replace v2 path breakdown tables with generated figures
V2_DEEP_ANALYSIS §3.1 (execution_mode distribution) and §3.2 (path-level
latency vs DP) had hand-typed tables with approximate latencies (e.g.
"~1.0s") and required readers to mentally compare 5+ rows × 5 columns.
Both sections now reference generated PNG figures derived directly from
the v2 + DP metrics.jsonl files.

§3.1 figure (v2_execution_mode_distribution.png):
  Horizontal bar chart, log x-axis. 4076 direct-to-D fast-path requests
  (green) dwarf the rest by ~30x; the long tail of slow / fallback /
  failure modes is visible at one glance. Counts and percentages
  annotated on each bar.

§3.2 figure (v2_path_level_latency.png):
  Grouped bar chart, log y-axis. Per-path TTFT p50 / TTFT p99 / Lat p50
  with exact numeric labels (no more "~1.0s" approximations). Sample
  counts annotated below each path. Quick visual reads:
   - KVC fast path TTFT p50 41ms vs DP 92ms (2.2x faster)
   - KVC reseed TTFT p99 5.12s vs DP 0.43s (12x slower) -- the cost
   - KVC no-d-capacity TTFT p99 7.65s (worst case)

Bundled:
- scripts/analysis/plot_v2_path_breakdown.py -- the script that
  generates both figures; rerunable when v2 data changes.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-11 17:38:43 +08:00

224 lines
8.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""Generate the two figures referenced by docs/V2_DEEP_ANALYSIS_ZH.md §3.1 and §3.2.
Inputs:
outputs/qwen3-30b-tp1-ts1-migration-v2/kvc_1p3d_migration_v2_run1_metrics.jsonl
outputs/qwen3-30b-tp1-ts1-validation/dp4_metrics.jsonl
Outputs:
docs/figures/v2_execution_mode_distribution.png (for §3.1)
docs/figures/v2_path_level_latency.png (for §3.2)
"""
from __future__ import annotations
import json
import statistics
from collections import Counter, defaultdict
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
ROOT = Path(__file__).resolve().parents[2]
KVC = ROOT / "outputs/qwen3-30b-tp1-ts1-migration-v2/kvc_1p3d_migration_v2_run1_metrics.jsonl"
DP = ROOT / "outputs/qwen3-30b-tp1-ts1-validation/dp4_metrics.jsonl"
OUT = ROOT / "docs/figures"
OUT.mkdir(parents=True, exist_ok=True)
def load(p: Path) -> list[dict]:
return [json.loads(line) for line in p.open()]
def is_failed(r: dict) -> bool:
if r.get("error"):
return True
fr = r.get("finish_reason")
if fr and ("abort" in str(fr).lower() or "badrequest" in str(fr).lower()):
return True
return False
def pct(vals: list[float], q: float) -> float:
s = sorted(vals)
if not s:
return float("nan")
return s[max(0, min(len(s) - 1, int(len(s) * q)))]
def main() -> None:
kvc = load(KVC)
dp = load(DP)
kvc_ok = [r for r in kvc if not is_failed(r)]
dp_ok = [r for r in dp if not is_failed(r)]
# ------------------------------------------------------------------
# Figure 1: §3.1 execution_mode distribution (horizontal bar)
# Use ALL rows (incl. failures) so percentages match the doc's 91.6%
# ------------------------------------------------------------------
mode_counts = Counter(r["execution_mode"] for r in kvc)
total_kvc = len(kvc)
short_label = {
"kvcache-direct-to-d-session": "direct-to-D-session (fast path)",
"pd-router-d-session-reseed": "d-session-reseed (mooncake reseed)",
"pd-router-fallback-session-not-resident-session-cap":
"fallback: session-not-resident + session-cap",
"pd-router-fallback-session-not-resident-seed-filter-early-turn":
"fallback: session-not-resident + seed-filter",
"pd-router-turn1-seed": "turn1-seed (first turn of each session)",
"pd-router-fallback-no-d-capacity": "fallback: no-d-capacity",
"pd-router-fallback-real-large-append-session-cap":
"fallback: real-large-append",
"pd-router-fallback-policy-no-bypass-session-cap":
"fallback: policy-no-bypass",
"pd-router-d-session-reseed-after-eviction":
"d-session-reseed-after-eviction",
"kvcache-centric": "kvcache-centric (admit-but-then-error)",
}
sorted_modes = mode_counts.most_common()
labels = [short_label.get(m, m) for m, _ in sorted_modes]
counts = [c for _, c in sorted_modes]
pcts = [c / total_kvc * 100 for c in counts]
is_fast = ["direct-to-D" in lbl for lbl in labels]
colors = ["#2C8C2C" if f else "#D62728" for f in is_fast]
fig, ax = plt.subplots(figsize=(11, 5.5))
y = np.arange(len(labels))[::-1]
ax.barh(y, counts, color=colors, edgecolor="black", linewidth=0.5)
ax.set_yticks(y)
ax.set_yticklabels(labels, fontsize=10)
ax.set_xscale("log")
ax.set_xlabel("Request count (log scale)", fontsize=11)
ax.set_xlim(left=1)
# Annotate count + percentage at end of each bar
for yi, (c, p) in zip(y, zip(counts, pcts)):
ax.text(c * 1.05, yi, f"{c} ({p:.1f}%)",
va="center", fontsize=9.5)
ax.set_title(
f"KVC v2 execution_mode distribution (n = {total_kvc} total requests)\n"
"green = fast path (direct-to-D), red = slow / fallback / failure paths",
fontsize=12, pad=12,
)
ax.grid(axis="x", linestyle=":", alpha=0.4)
ax.set_axisbelow(True)
plt.tight_layout()
out1 = OUT / "v2_execution_mode_distribution.png"
plt.savefig(out1, dpi=150)
print(f"wrote {out1}")
plt.close(fig)
# ------------------------------------------------------------------
# Figure 2: §3.2 path-level latency (grouped bars, log y)
# ------------------------------------------------------------------
# Group KVC paths semantically
def kvc_group(mode: str) -> str:
if mode == "kvcache-direct-to-d-session":
return "KVC direct-to-D\n(fast path, 91.6%)"
if "reseed" in mode:
return "KVC reseed\n(slow path, 3.4%)"
if "no-d-capacity" in mode:
return "KVC no-d-capacity\n(fallback, 0.7%)"
if "session-not-resident" in mode:
return "KVC session-not-resident\n(misc, 2.3%)"
return "KVC other\n(<2%)"
groups = defaultdict(list)
for r in kvc_ok:
groups[kvc_group(r["execution_mode"])].append(r)
# Order paths by intuitive progression (fast → slow)
ordered_paths = [
"KVC direct-to-D\n(fast path, 91.6%)",
"KVC session-not-resident\n(misc, 2.3%)",
"KVC reseed\n(slow path, 3.4%)",
"KVC no-d-capacity\n(fallback, 0.7%)",
]
# Filter to only ones present
ordered_paths = [p for p in ordered_paths if p in groups]
ordered_paths.append("DP dp-colo-router\n(100%)")
def stats(rows: list[dict]) -> dict[str, float]:
ttfts = [r["ttft_s"] for r in rows if r.get("ttft_s") is not None]
lats = [r["latency_s"] for r in rows if r.get("latency_s") is not None]
return {
"n": len(rows),
"ttft_p50": pct(ttfts, 0.50),
"ttft_p99": pct(ttfts, 0.99),
"lat_p50": pct(lats, 0.50),
}
path_stats = {p: stats(groups[p]) for p in ordered_paths if "DP" not in p}
path_stats["DP dp-colo-router\n(100%)"] = stats(dp_ok)
metrics = [("TTFT p50", "ttft_p50"), ("TTFT p99", "ttft_p99"), ("Latency p50", "lat_p50")]
bar_w = 0.25
fig, ax = plt.subplots(figsize=(12, 6))
x = np.arange(len(ordered_paths))
colors_metric = ["#1F77B4", "#FF7F0E", "#9467BD"]
for i, (label, key) in enumerate(metrics):
vals = [path_stats[p][key] for p in ordered_paths]
bars = ax.bar(x + (i - 1) * bar_w, vals, bar_w, label=label,
color=colors_metric[i], edgecolor="black", linewidth=0.4)
for xi, v in zip(x + (i - 1) * bar_w, vals):
if v > 0 and v == v: # not nan
fmt = f"{v*1000:.0f}ms" if v < 1 else f"{v:.2f}s"
ax.text(xi, v * 1.10, fmt,
ha="center", va="bottom", fontsize=8.5, rotation=0)
ax.set_yscale("log")
ax.set_xticks(x)
ax.set_xticklabels(ordered_paths, fontsize=9.5)
ax.set_ylabel("Latency (seconds, log scale)", fontsize=11)
ax.set_title(
"Path-level latency: KVC v2 paths vs DP single-path baseline\n"
"log y-axis · same SWE-Bench 50sess trace · ts=1 · 4× H100 80GB",
fontsize=12, pad=12,
)
ax.legend(loc="upper left", fontsize=10, framealpha=0.95)
ax.grid(axis="y", linestyle=":", alpha=0.4, which="both")
ax.set_axisbelow(True)
# Annotate sample counts under each path label
ymin = ax.get_ylim()[0]
for xi, p in zip(x, ordered_paths):
n = path_stats[p]["n"]
ax.text(xi, ymin * 0.5, f"n={n}", ha="center", va="top",
fontsize=8.5, color="#555")
plt.tight_layout()
out2 = OUT / "v2_path_level_latency.png"
plt.savefig(out2, dpi=150)
print(f"wrote {out2}")
plt.close(fig)
# ------------------------------------------------------------------
# Print numeric values used (for doc reference)
# ------------------------------------------------------------------
print("\n=== Numeric values plotted ===")
print("\nExecution mode counts (KVC v2):")
for label, c, p in zip(labels, counts, pcts):
print(f" {c:>5} ({p:>5.2f}%) {label}")
print("\nPath-level latency:")
for p in ordered_paths:
s = path_stats[p]
nl = " | ".join([
f"n={s['n']}",
f"TTFT p50={s['ttft_p50']*1000:.1f}ms",
f"TTFT p99={s['ttft_p99']*1000:.1f}ms",
f"Lat p50={s['lat_p50']:.3f}s",
])
print(f" {p.replace(chr(10), ' '):<55} {nl}")
if __name__ == "__main__":
main()