agentic-pd-hybrid/scripts/analysis/plot_v2_path_breakdown.py

#!/usr/bin/env python3
"""Generate the two figures referenced by docs/V2_DEEP_ANALYSIS_ZH.md §3.1 and §3.2.

Inputs:
  outputs/qwen3-30b-tp1-ts1-migration-v2/kvc_1p3d_migration_v2_run1_metrics.jsonl
  outputs/qwen3-30b-tp1-ts1-validation/dp4_metrics.jsonl

Outputs:
  docs/figures/v2_execution_mode_distribution.png   (for §3.1)
  docs/figures/v2_path_level_latency.png            (for §3.2)
"""

from __future__ import annotations

import json
import statistics
from collections import Counter, defaultdict
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np

ROOT = Path(__file__).resolve().parents[2]
KVC = ROOT / "outputs/qwen3-30b-tp1-ts1-migration-v2/kvc_1p3d_migration_v2_run1_metrics.jsonl"
DP = ROOT / "outputs/qwen3-30b-tp1-ts1-validation/dp4_metrics.jsonl"
OUT = ROOT / "docs/figures"
OUT.mkdir(parents=True, exist_ok=True)


def load(p: Path) -> list[dict]:
    return [json.loads(line) for line in p.open()]


def is_failed(r: dict) -> bool:
    if r.get("error"):
        return True
    fr = r.get("finish_reason")
    if fr and ("abort" in str(fr).lower() or "badrequest" in str(fr).lower()):
        return True
    return False


def pct(vals: list[float], q: float) -> float:
    s = sorted(vals)
    if not s:
        return float("nan")
    return s[max(0, min(len(s) - 1, int(len(s) * q)))]


def main() -> None:
    kvc = load(KVC)
    dp = load(DP)

    kvc_ok = [r for r in kvc if not is_failed(r)]
    dp_ok = [r for r in dp if not is_failed(r)]

    # ------------------------------------------------------------------
    # Figure 1: §3.1 execution_mode distribution (horizontal bar)
    # Use ALL rows (incl. failures) so percentages match the doc's 91.6%
    # ------------------------------------------------------------------
    mode_counts = Counter(r["execution_mode"] for r in kvc)
    total_kvc = len(kvc)

    short_label = {
        "kvcache-direct-to-d-session": "direct-to-D-session  (fast path)",
        "pd-router-d-session-reseed": "d-session-reseed  (mooncake reseed)",
        "pd-router-fallback-session-not-resident-session-cap":
            "fallback: session-not-resident + session-cap",
        "pd-router-fallback-session-not-resident-seed-filter-early-turn":
            "fallback: session-not-resident + seed-filter",
        "pd-router-turn1-seed": "turn1-seed  (first turn of each session)",
        "pd-router-fallback-no-d-capacity": "fallback: no-d-capacity",
        "pd-router-fallback-real-large-append-session-cap":
            "fallback: real-large-append",
        "pd-router-fallback-policy-no-bypass-session-cap":
            "fallback: policy-no-bypass",
        "pd-router-d-session-reseed-after-eviction":
            "d-session-reseed-after-eviction",
        "kvcache-centric": "kvcache-centric (admit-but-then-error)",
    }
    sorted_modes = mode_counts.most_common()
    labels = [short_label.get(m, m) for m, _ in sorted_modes]
    counts = [c for _, c in sorted_modes]
    pcts = [c / total_kvc * 100 for c in counts]

    is_fast = ["direct-to-D" in lbl for lbl in labels]
    colors = ["#2C8C2C" if f else "#D62728" for f in is_fast]

    fig, ax = plt.subplots(figsize=(11, 5.5))
    y = np.arange(len(labels))[::-1]
    ax.barh(y, counts, color=colors, edgecolor="black", linewidth=0.5)
    ax.set_yticks(y)
    ax.set_yticklabels(labels, fontsize=10)
    ax.set_xscale("log")
    ax.set_xlabel("Request count (log scale)", fontsize=11)
    ax.set_xlim(left=1)

    # Annotate count + percentage at end of each bar
    for yi, (c, p) in zip(y, zip(counts, pcts)):
        ax.text(c * 1.05, yi, f"{c}  ({p:.1f}%)",
                va="center", fontsize=9.5)

    ax.set_title(
        f"KVC v2 execution_mode distribution  (n = {total_kvc} total requests)\n"
        "green = fast path (direct-to-D), red = slow / fallback / failure paths",
        fontsize=12, pad=12,
    )
    ax.grid(axis="x", linestyle=":", alpha=0.4)
    ax.set_axisbelow(True)
    plt.tight_layout()
    out1 = OUT / "v2_execution_mode_distribution.png"
    plt.savefig(out1, dpi=150)
    print(f"wrote {out1}")
    plt.close(fig)

    # ------------------------------------------------------------------
    # Figure 2: §3.2 path-level latency (grouped bars, log y)
    # ------------------------------------------------------------------

    # Group KVC paths semantically
    def kvc_group(mode: str) -> str:
        if mode == "kvcache-direct-to-d-session":
            return "KVC direct-to-D\n(fast path, 91.6%)"
        if "reseed" in mode:
            return "KVC reseed\n(slow path, 3.4%)"
        if "no-d-capacity" in mode:
            return "KVC no-d-capacity\n(fallback, 0.7%)"
        if "session-not-resident" in mode:
            return "KVC session-not-resident\n(misc, 2.3%)"
        return "KVC other\n(<2%)"

    groups = defaultdict(list)
    for r in kvc_ok:
        groups[kvc_group(r["execution_mode"])].append(r)

    # Order paths by intuitive progression (fast → slow)
    ordered_paths = [
        "KVC direct-to-D\n(fast path, 91.6%)",
        "KVC session-not-resident\n(misc, 2.3%)",
        "KVC reseed\n(slow path, 3.4%)",
        "KVC no-d-capacity\n(fallback, 0.7%)",
    ]
    # Filter to only ones present
    ordered_paths = [p for p in ordered_paths if p in groups]
    ordered_paths.append("DP dp-colo-router\n(100%)")

    def stats(rows: list[dict]) -> dict[str, float]:
        ttfts = [r["ttft_s"] for r in rows if r.get("ttft_s") is not None]
        lats = [r["latency_s"] for r in rows if r.get("latency_s") is not None]
        return {
            "n": len(rows),
            "ttft_p50": pct(ttfts, 0.50),
            "ttft_p99": pct(ttfts, 0.99),
            "lat_p50": pct(lats, 0.50),
        }

    path_stats = {p: stats(groups[p]) for p in ordered_paths if "DP" not in p}
    path_stats["DP dp-colo-router\n(100%)"] = stats(dp_ok)

    metrics = [("TTFT p50", "ttft_p50"), ("TTFT p99", "ttft_p99"), ("Latency p50", "lat_p50")]
    bar_w = 0.25
    fig, ax = plt.subplots(figsize=(12, 6))
    x = np.arange(len(ordered_paths))

    colors_metric = ["#1F77B4", "#FF7F0E", "#9467BD"]
    for i, (label, key) in enumerate(metrics):
        vals = [path_stats[p][key] for p in ordered_paths]
        bars = ax.bar(x + (i - 1) * bar_w, vals, bar_w, label=label,
                      color=colors_metric[i], edgecolor="black", linewidth=0.4)
        for xi, v in zip(x + (i - 1) * bar_w, vals):
            if v > 0 and v == v:  # not nan
                fmt = f"{v*1000:.0f}ms" if v < 1 else f"{v:.2f}s"
                ax.text(xi, v * 1.10, fmt,
                        ha="center", va="bottom", fontsize=8.5, rotation=0)

    ax.set_yscale("log")
    ax.set_xticks(x)
    ax.set_xticklabels(ordered_paths, fontsize=9.5)
    ax.set_ylabel("Latency (seconds, log scale)", fontsize=11)
    ax.set_title(
        "Path-level latency: KVC v2 paths vs DP single-path baseline\n"
        "log y-axis · same SWE-Bench 50sess trace · ts=1 · 4× H100 80GB",
        fontsize=12, pad=12,
    )
    ax.legend(loc="upper left", fontsize=10, framealpha=0.95)
    ax.grid(axis="y", linestyle=":", alpha=0.4, which="both")
    ax.set_axisbelow(True)

    # Annotate sample counts under each path label
    ymin = ax.get_ylim()[0]
    for xi, p in zip(x, ordered_paths):
        n = path_stats[p]["n"]
        ax.text(xi, ymin * 0.5, f"n={n}", ha="center", va="top",
                fontsize=8.5, color="#555")

    plt.tight_layout()
    out2 = OUT / "v2_path_level_latency.png"
    plt.savefig(out2, dpi=150)
    print(f"wrote {out2}")
    plt.close(fig)

    # ------------------------------------------------------------------
    # Print numeric values used (for doc reference)
    # ------------------------------------------------------------------
    print("\n=== Numeric values plotted ===")
    print("\nExecution mode counts (KVC v2):")
    for label, c, p in zip(labels, counts, pcts):
        print(f"  {c:>5}  ({p:>5.2f}%)  {label}")

    print("\nPath-level latency:")
    for p in ordered_paths:
        s = path_stats[p]
        nl = " | ".join([
            f"n={s['n']}",
            f"TTFT p50={s['ttft_p50']*1000:.1f}ms",
            f"TTFT p99={s['ttft_p99']*1000:.1f}ms",
            f"Lat p50={s['lat_p50']:.3f}s",
        ])
        print(f"  {p.replace(chr(10), ' '):<55}  {nl}")


if __name__ == "__main__":
    main()