#!/usr/bin/env python3 """Generate the two figures referenced by docs/V2_DEEP_ANALYSIS_ZH.md §3.1 and §3.2. Inputs: outputs/qwen3-30b-tp1-ts1-migration-v2/kvc_1p3d_migration_v2_run1_metrics.jsonl outputs/qwen3-30b-tp1-ts1-validation/dp4_metrics.jsonl Outputs: docs/figures/v2_execution_mode_distribution.png (for §3.1) docs/figures/v2_path_level_latency.png (for §3.2) """ from __future__ import annotations import json import statistics from collections import Counter, defaultdict from pathlib import Path import matplotlib.pyplot as plt import numpy as np ROOT = Path(__file__).resolve().parents[2] KVC = ROOT / "outputs/qwen3-30b-tp1-ts1-migration-v2/kvc_1p3d_migration_v2_run1_metrics.jsonl" DP = ROOT / "outputs/qwen3-30b-tp1-ts1-validation/dp4_metrics.jsonl" OUT = ROOT / "docs/figures" OUT.mkdir(parents=True, exist_ok=True) def load(p: Path) -> list[dict]: return [json.loads(line) for line in p.open()] def is_failed(r: dict) -> bool: if r.get("error"): return True fr = r.get("finish_reason") if fr and ("abort" in str(fr).lower() or "badrequest" in str(fr).lower()): return True return False def pct(vals: list[float], q: float) -> float: s = sorted(vals) if not s: return float("nan") return s[max(0, min(len(s) - 1, int(len(s) * q)))] def main() -> None: kvc = load(KVC) dp = load(DP) kvc_ok = [r for r in kvc if not is_failed(r)] dp_ok = [r for r in dp if not is_failed(r)] # ------------------------------------------------------------------ # Figure 1: §3.1 execution_mode distribution (horizontal bar) # Use ALL rows (incl. failures) so percentages match the doc's 91.6% # ------------------------------------------------------------------ mode_counts = Counter(r["execution_mode"] for r in kvc) total_kvc = len(kvc) short_label = { "kvcache-direct-to-d-session": "direct-to-D-session (fast path)", "pd-router-d-session-reseed": "d-session-reseed (mooncake reseed)", "pd-router-fallback-session-not-resident-session-cap": "fallback: session-not-resident + session-cap", "pd-router-fallback-session-not-resident-seed-filter-early-turn": "fallback: session-not-resident + seed-filter", "pd-router-turn1-seed": "turn1-seed (first turn of each session)", "pd-router-fallback-no-d-capacity": "fallback: no-d-capacity", "pd-router-fallback-real-large-append-session-cap": "fallback: real-large-append", "pd-router-fallback-policy-no-bypass-session-cap": "fallback: policy-no-bypass", "pd-router-d-session-reseed-after-eviction": "d-session-reseed-after-eviction", "kvcache-centric": "kvcache-centric (admit-but-then-error)", } sorted_modes = mode_counts.most_common() labels = [short_label.get(m, m) for m, _ in sorted_modes] counts = [c for _, c in sorted_modes] pcts = [c / total_kvc * 100 for c in counts] is_fast = ["direct-to-D" in lbl for lbl in labels] colors = ["#2C8C2C" if f else "#D62728" for f in is_fast] fig, ax = plt.subplots(figsize=(11, 5.5)) y = np.arange(len(labels))[::-1] ax.barh(y, counts, color=colors, edgecolor="black", linewidth=0.5) ax.set_yticks(y) ax.set_yticklabels(labels, fontsize=10) ax.set_xscale("log") ax.set_xlabel("Request count (log scale)", fontsize=11) ax.set_xlim(left=1) # Annotate count + percentage at end of each bar for yi, (c, p) in zip(y, zip(counts, pcts)): ax.text(c * 1.05, yi, f"{c} ({p:.1f}%)", va="center", fontsize=9.5) ax.set_title( f"KVC v2 execution_mode distribution (n = {total_kvc} total requests)\n" "green = fast path (direct-to-D), red = slow / fallback / failure paths", fontsize=12, pad=12, ) ax.grid(axis="x", linestyle=":", alpha=0.4) ax.set_axisbelow(True) plt.tight_layout() out1 = OUT / "v2_execution_mode_distribution.png" plt.savefig(out1, dpi=150) print(f"wrote {out1}") plt.close(fig) # ------------------------------------------------------------------ # Figure 2: §3.2 path-level latency (grouped bars, log y) # ------------------------------------------------------------------ # Group KVC paths semantically def kvc_group(mode: str) -> str: if mode == "kvcache-direct-to-d-session": return "KVC direct-to-D\n(fast path, 91.6%)" if "reseed" in mode: return "KVC reseed\n(slow path, 3.4%)" if "no-d-capacity" in mode: return "KVC no-d-capacity\n(fallback, 0.7%)" if "session-not-resident" in mode: return "KVC session-not-resident\n(misc, 2.3%)" return "KVC other\n(<2%)" groups = defaultdict(list) for r in kvc_ok: groups[kvc_group(r["execution_mode"])].append(r) # Order paths by intuitive progression (fast → slow) ordered_paths = [ "KVC direct-to-D\n(fast path, 91.6%)", "KVC session-not-resident\n(misc, 2.3%)", "KVC reseed\n(slow path, 3.4%)", "KVC no-d-capacity\n(fallback, 0.7%)", ] # Filter to only ones present ordered_paths = [p for p in ordered_paths if p in groups] ordered_paths.append("DP dp-colo-router\n(100%)") def stats(rows: list[dict]) -> dict[str, float]: ttfts = [r["ttft_s"] for r in rows if r.get("ttft_s") is not None] lats = [r["latency_s"] for r in rows if r.get("latency_s") is not None] return { "n": len(rows), "ttft_p50": pct(ttfts, 0.50), "ttft_p99": pct(ttfts, 0.99), "lat_p50": pct(lats, 0.50), } path_stats = {p: stats(groups[p]) for p in ordered_paths if "DP" not in p} path_stats["DP dp-colo-router\n(100%)"] = stats(dp_ok) metrics = [("TTFT p50", "ttft_p50"), ("TTFT p99", "ttft_p99"), ("Latency p50", "lat_p50")] bar_w = 0.25 fig, ax = plt.subplots(figsize=(12, 6)) x = np.arange(len(ordered_paths)) colors_metric = ["#1F77B4", "#FF7F0E", "#9467BD"] for i, (label, key) in enumerate(metrics): vals = [path_stats[p][key] for p in ordered_paths] bars = ax.bar(x + (i - 1) * bar_w, vals, bar_w, label=label, color=colors_metric[i], edgecolor="black", linewidth=0.4) for xi, v in zip(x + (i - 1) * bar_w, vals): if v > 0 and v == v: # not nan fmt = f"{v*1000:.0f}ms" if v < 1 else f"{v:.2f}s" ax.text(xi, v * 1.10, fmt, ha="center", va="bottom", fontsize=8.5, rotation=0) ax.set_yscale("log") ax.set_xticks(x) ax.set_xticklabels(ordered_paths, fontsize=9.5) ax.set_ylabel("Latency (seconds, log scale)", fontsize=11) ax.set_title( "Path-level latency: KVC v2 paths vs DP single-path baseline\n" "log y-axis · same SWE-Bench 50sess trace · ts=1 · 4× H100 80GB", fontsize=12, pad=12, ) ax.legend(loc="upper left", fontsize=10, framealpha=0.95) ax.grid(axis="y", linestyle=":", alpha=0.4, which="both") ax.set_axisbelow(True) # Annotate sample counts under each path label ymin = ax.get_ylim()[0] for xi, p in zip(x, ordered_paths): n = path_stats[p]["n"] ax.text(xi, ymin * 0.5, f"n={n}", ha="center", va="top", fontsize=8.5, color="#555") plt.tight_layout() out2 = OUT / "v2_path_level_latency.png" plt.savefig(out2, dpi=150) print(f"wrote {out2}") plt.close(fig) # ------------------------------------------------------------------ # Print numeric values used (for doc reference) # ------------------------------------------------------------------ print("\n=== Numeric values plotted ===") print("\nExecution mode counts (KVC v2):") for label, c, p in zip(labels, counts, pcts): print(f" {c:>5} ({p:>5.2f}%) {label}") print("\nPath-level latency:") for p in ordered_paths: s = path_stats[p] nl = " | ".join([ f"n={s['n']}", f"TTFT p50={s['ttft_p50']*1000:.1f}ms", f"TTFT p99={s['ttft_p99']*1000:.1f}ms", f"Lat p50={s['lat_p50']:.3f}s", ]) print(f" {p.replace(chr(10), ' '):<55} {nl}") if __name__ == "__main__": main()