Files
agentic-pd-hybrid/scripts/analysis/plot_ttft_pdf.py
kzlin c5519066de docs(kvc): add TTFT probability density figure (KVC v2 vs 4DP)
Adds a two-panel TTFT PDF comparison plot inserted as a new V2_DEEP_ANALYSIS
§3.4 ("TTFT 概率密度对比: bimodal vs unimodal"). Single-percentile numbers
(p50 / p99) hide the qualitative difference between the two distributions;
the figure makes it visible at a glance.

Left panel (linear x in [0, 0.6]s, body):
  KVC has a sharp peak at ~40ms (the direct-to-D fast path).
  DP has a broad peak around 50-200ms (full prefill per request).
  Annotated with p50 and p90 markers for each side.

Right panel (log x in [10ms, 10s], full range):
  KVC is visibly bimodal: a tall fast-path peak plus a small reseed tail
  around 1-5s.
  DP is unimodal: a single broad peak with shorter tail.
  Annotated with p99 callouts pointing to each tail.

KDE: scipy.stats.gaussian_kde, bandwidth=0.15 for the body (Scott's rule
oversmooths the sharp fast-path peak), log10-transformed for the full-range
panel so the bimodal structure is visible.

Bundled:
- scripts/analysis/plot_ttft_pdf.py -- rerunable when v2 / DP data change.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-11 17:46:27 +08:00

200 lines
8.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""Generate TTFT probability density curves: KVC 1P3D v2 vs 4-way DP CA.
Inputs:
outputs/qwen3-30b-tp1-ts1-migration-v2/kvc_1p3d_migration_v2_run1_metrics.jsonl
outputs/qwen3-30b-tp1-ts1-validation/dp4_metrics.jsonl
Outputs:
docs/figures/ttft_pdf_comparison.png -- two-panel figure:
left panel: linear x in [0, 1.0]s zoomed on the body
right panel: log x covering full range (0.01 -- 10 s)
Each KDE curve uses scipy.stats.gaussian_kde with Scott's rule bandwidth.
Aborted requests are excluded (same filter as metrics.py:_is_failed_request).
"""
from __future__ import annotations
import json
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import gaussian_kde
ROOT = Path(__file__).resolve().parents[2]
KVC = ROOT / "outputs/qwen3-30b-tp1-ts1-migration-v2/kvc_1p3d_migration_v2_run1_metrics.jsonl"
DP = ROOT / "outputs/qwen3-30b-tp1-ts1-validation/dp4_metrics.jsonl"
OUT = ROOT / "docs/figures/ttft_pdf_comparison.png"
def load(p: Path) -> list[dict]:
return [json.loads(line) for line in p.open()]
def is_failed(r: dict) -> bool:
if r.get("error"):
return True
fr = r.get("finish_reason")
if fr and ("abort" in str(fr).lower() or "badrequest" in str(fr).lower()):
return True
return False
def pct(vals: np.ndarray, q: float) -> float:
return float(np.quantile(vals, q))
def main() -> None:
kvc = [r for r in load(KVC) if not is_failed(r)]
dp = [r for r in load(DP) if not is_failed(r)]
kvc_ttft = np.array([r["ttft_s"] for r in kvc if r.get("ttft_s") is not None])
dp_ttft = np.array([r["ttft_s"] for r in dp if r.get("ttft_s") is not None])
# Trim absurdly small zeros (rare measurement artifacts) so log KDE behaves.
kvc_ttft = kvc_ttft[kvc_ttft > 1e-4]
dp_ttft = dp_ttft[dp_ttft > 1e-4]
KVC_COLOR = "#1F77B4" # blue
DP_COLOR = "#D62728" # red
fig, axes = plt.subplots(1, 2, figsize=(16, 6.5))
# ------------------------------------------------------------------
# Left panel: linear x ∈ [0, 0.6]s -- body of the distribution
# ------------------------------------------------------------------
ax = axes[0]
x_body = np.linspace(0.0, 0.6, 600)
# KDE on linear ttft values, clipped to body
kde_kvc_lin = gaussian_kde(kvc_ttft, bw_method=0.15)
kde_dp_lin = gaussian_kde(dp_ttft, bw_method=0.15)
ax.plot(x_body, kde_kvc_lin(x_body),
color=KVC_COLOR, lw=2.5, label=f"KVC 1P3D v2 (n={len(kvc_ttft)})")
ax.fill_between(x_body, kde_kvc_lin(x_body), alpha=0.20, color=KVC_COLOR)
ax.plot(x_body, kde_dp_lin(x_body),
color=DP_COLOR, lw=2.5, label=f"4-way DP CA (n={len(dp_ttft)})")
ax.fill_between(x_body, kde_dp_lin(x_body), alpha=0.20, color=DP_COLOR)
# Vertical lines for p50, p90
for q, ls in [(0.50, "-"), (0.90, "--")]:
ax.axvline(pct(kvc_ttft, q), color=KVC_COLOR, ls=ls, alpha=0.55, lw=1.1)
ax.axvline(pct(dp_ttft, q), color=DP_COLOR, ls=ls, alpha=0.55, lw=1.1)
ymax = ax.get_ylim()[1]
ax.text(pct(kvc_ttft, 0.50), ymax * 0.97,
f"KVC p50\n{pct(kvc_ttft, 0.50)*1000:.0f}ms",
color=KVC_COLOR, fontsize=9, va="top", ha="left",
bbox=dict(facecolor="white", edgecolor="none", alpha=0.7, pad=2))
ax.text(pct(dp_ttft, 0.50), ymax * 0.50,
f"DP p50\n{pct(dp_ttft, 0.50)*1000:.0f}ms",
color=DP_COLOR, fontsize=9, va="top", ha="left",
bbox=dict(facecolor="white", edgecolor="none", alpha=0.7, pad=2))
ax.text(pct(kvc_ttft, 0.90), ymax * 0.30,
f"KVC p90\n{pct(kvc_ttft, 0.90)*1000:.0f}ms",
color=KVC_COLOR, fontsize=9, va="top", ha="left",
bbox=dict(facecolor="white", edgecolor="none", alpha=0.7, pad=2))
ax.text(pct(dp_ttft, 0.90), ymax * 0.18,
f"DP p90\n{pct(dp_ttft, 0.90)*1000:.0f}ms",
color=DP_COLOR, fontsize=9, va="top", ha="left",
bbox=dict(facecolor="white", edgecolor="none", alpha=0.7, pad=2))
ax.set_xlim(0, 0.6)
ax.set_xlabel("TTFT (seconds, linear)", fontsize=11)
ax.set_ylabel("Probability density", fontsize=11)
ax.set_title("Body of distribution (TTFT ≤ 0.6 s)", fontsize=12, pad=10)
ax.legend(loc="upper right", fontsize=10, framealpha=0.95)
ax.grid(True, linestyle=":", alpha=0.4)
ax.set_axisbelow(True)
# ------------------------------------------------------------------
# Right panel: log x ∈ [0.01, 10]s -- full range incl. tail
# PDF on log-x: we plot density vs log10(t) so the curve integrates
# to 1 over log space (standard "log-density" presentation).
# ------------------------------------------------------------------
ax = axes[1]
# KDE on log10(ttft) so the resulting curve integrates to 1 over log10 t
kde_kvc_log = gaussian_kde(np.log10(kvc_ttft), bw_method="scott")
kde_dp_log = gaussian_kde(np.log10(dp_ttft), bw_method="scott")
log_x = np.linspace(np.log10(0.01), np.log10(10.0), 600)
x_full = 10 ** log_x
y_kvc = kde_kvc_log(log_x)
y_dp = kde_dp_log(log_x)
ax.plot(x_full, y_kvc, color=KVC_COLOR, lw=2.5, label=f"KVC 1P3D v2 (n={len(kvc_ttft)})")
ax.fill_between(x_full, y_kvc, alpha=0.20, color=KVC_COLOR)
ax.plot(x_full, y_dp, color=DP_COLOR, lw=2.5, label=f"4-way DP CA (n={len(dp_ttft)})")
ax.fill_between(x_full, y_dp, alpha=0.20, color=DP_COLOR)
ax.set_xscale("log")
ax.set_xlim(0.01, 10.0)
# Percentile markers
quartile_styles = [(0.50, "-", "p50"), (0.90, "--", "p90"), (0.99, ":", "p99")]
for q, ls, name in quartile_styles:
ax.axvline(pct(kvc_ttft, q), color=KVC_COLOR, ls=ls, alpha=0.55, lw=1.1)
ax.axvline(pct(dp_ttft, q), color=DP_COLOR, ls=ls, alpha=0.55, lw=1.1)
# Annotate p99 specifically since this is the key reviewer-targeted callout
ymax = max(y_kvc.max(), y_dp.max())
kvc_p99 = pct(kvc_ttft, 0.99)
dp_p99 = pct(dp_ttft, 0.99)
ax.annotate(f"KVC p99 = {kvc_p99:.2f}s\n(slow-path reseed tail)",
xy=(kvc_p99, kde_kvc_log(np.log10(kvc_p99))[0]),
xytext=(2.0, ymax * 0.65),
fontsize=10, color=KVC_COLOR, fontweight="bold",
arrowprops=dict(arrowstyle="->", color=KVC_COLOR, lw=1.0))
ax.annotate(f"DP p99 = {dp_p99*1000:.0f}ms",
xy=(dp_p99, kde_dp_log(np.log10(dp_p99))[0]),
xytext=(0.025, ymax * 0.80),
fontsize=10, color=DP_COLOR, fontweight="bold",
arrowprops=dict(arrowstyle="->", color=DP_COLOR, lw=1.0))
# Highlight the KVC bimodal structure
ax.annotate("KVC fast path\n(direct-to-D, 91.6%)",
xy=(0.05, y_kvc[np.argmin(np.abs(x_full - 0.05))]),
xytext=(0.012, ymax * 0.45),
fontsize=9, color=KVC_COLOR, style="italic",
arrowprops=dict(arrowstyle="->", color=KVC_COLOR, lw=0.7, alpha=0.6))
ax.annotate("KVC slow path\n(reseed, ~3.4%)",
xy=(2.5, y_kvc[np.argmin(np.abs(x_full - 2.5))]),
xytext=(3.0, ymax * 0.30),
fontsize=9, color=KVC_COLOR, style="italic",
arrowprops=dict(arrowstyle="->", color=KVC_COLOR, lw=0.7, alpha=0.6))
# Custom tick labels in seconds (instead of 10^-2, 10^-1, 10^0, 10^1)
ax.set_xticks([0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0])
ax.set_xticklabels(["10ms", "50ms", "100ms", "500ms", "1s", "5s", "10s"])
ax.set_xlabel("TTFT (log scale)", fontsize=11)
ax.set_ylabel("Density (per log₁₀ s)", fontsize=11)
ax.set_title("Full range (TTFT 10 ms 10 s, log x)", fontsize=12, pad=10)
ax.legend(loc="upper left", fontsize=10, framealpha=0.95)
ax.grid(True, which="both", linestyle=":", alpha=0.4)
ax.set_axisbelow(True)
fig.suptitle(
"TTFT probability density: KVC 1P3D v2 vs 4-way DP CA\n"
"SWE-Bench 50sess trace · ts=1 · 4× H100 80GB · aborted/error requests excluded",
fontsize=13, y=1.02,
)
plt.tight_layout()
plt.savefig(OUT, dpi=150, bbox_inches="tight")
print(f"wrote {OUT}")
plt.close(fig)
# ------------------------------------------------------------------
# Print summary stats for doc cross-reference
# ------------------------------------------------------------------
print(f"\n=== TTFT distribution summary ===")
for name, arr in [("KVC v2", kvc_ttft), ("DP 4w", dp_ttft)]:
print(f" {name} (n={len(arr)})")
print(f" min={arr.min()*1000:.1f}ms p10={pct(arr,0.10)*1000:.1f}ms "
f"p50={pct(arr,0.50)*1000:.1f}ms p90={pct(arr,0.90)*1000:.1f}ms "
f"p99={pct(arr,0.99)*1000:.1f}ms max={arr.max()*1000:.1f}ms")
if __name__ == "__main__":
main()