Adds a two-panel TTFT PDF comparison plot inserted as a new V2_DEEP_ANALYSIS
§3.4 ("TTFT 概率密度对比: bimodal vs unimodal"). Single-percentile numbers
(p50 / p99) hide the qualitative difference between the two distributions;
the figure makes it visible at a glance.
Left panel (linear x in [0, 0.6]s, body):
KVC has a sharp peak at ~40ms (the direct-to-D fast path).
DP has a broad peak around 50-200ms (full prefill per request).
Annotated with p50 and p90 markers for each side.
Right panel (log x in [10ms, 10s], full range):
KVC is visibly bimodal: a tall fast-path peak plus a small reseed tail
around 1-5s.
DP is unimodal: a single broad peak with shorter tail.
Annotated with p99 callouts pointing to each tail.
KDE: scipy.stats.gaussian_kde, bandwidth=0.15 for the body (Scott's rule
oversmooths the sharp fast-path peak), log10-transformed for the full-range
panel so the bimodal structure is visible.
Bundled:
- scripts/analysis/plot_ttft_pdf.py -- rerunable when v2 / DP data change.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
200 lines
8.6 KiB
Python
200 lines
8.6 KiB
Python
#!/usr/bin/env python3
|
||
"""Generate TTFT probability density curves: KVC 1P3D v2 vs 4-way DP CA.
|
||
|
||
Inputs:
|
||
outputs/qwen3-30b-tp1-ts1-migration-v2/kvc_1p3d_migration_v2_run1_metrics.jsonl
|
||
outputs/qwen3-30b-tp1-ts1-validation/dp4_metrics.jsonl
|
||
|
||
Outputs:
|
||
docs/figures/ttft_pdf_comparison.png -- two-panel figure:
|
||
left panel: linear x in [0, 1.0]s zoomed on the body
|
||
right panel: log x covering full range (0.01 -- 10 s)
|
||
Each KDE curve uses scipy.stats.gaussian_kde with Scott's rule bandwidth.
|
||
Aborted requests are excluded (same filter as metrics.py:_is_failed_request).
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
from pathlib import Path
|
||
|
||
import matplotlib.pyplot as plt
|
||
import numpy as np
|
||
from scipy.stats import gaussian_kde
|
||
|
||
ROOT = Path(__file__).resolve().parents[2]
|
||
KVC = ROOT / "outputs/qwen3-30b-tp1-ts1-migration-v2/kvc_1p3d_migration_v2_run1_metrics.jsonl"
|
||
DP = ROOT / "outputs/qwen3-30b-tp1-ts1-validation/dp4_metrics.jsonl"
|
||
OUT = ROOT / "docs/figures/ttft_pdf_comparison.png"
|
||
|
||
|
||
def load(p: Path) -> list[dict]:
|
||
return [json.loads(line) for line in p.open()]
|
||
|
||
|
||
def is_failed(r: dict) -> bool:
|
||
if r.get("error"):
|
||
return True
|
||
fr = r.get("finish_reason")
|
||
if fr and ("abort" in str(fr).lower() or "badrequest" in str(fr).lower()):
|
||
return True
|
||
return False
|
||
|
||
|
||
def pct(vals: np.ndarray, q: float) -> float:
|
||
return float(np.quantile(vals, q))
|
||
|
||
|
||
def main() -> None:
|
||
kvc = [r for r in load(KVC) if not is_failed(r)]
|
||
dp = [r for r in load(DP) if not is_failed(r)]
|
||
|
||
kvc_ttft = np.array([r["ttft_s"] for r in kvc if r.get("ttft_s") is not None])
|
||
dp_ttft = np.array([r["ttft_s"] for r in dp if r.get("ttft_s") is not None])
|
||
|
||
# Trim absurdly small zeros (rare measurement artifacts) so log KDE behaves.
|
||
kvc_ttft = kvc_ttft[kvc_ttft > 1e-4]
|
||
dp_ttft = dp_ttft[dp_ttft > 1e-4]
|
||
|
||
KVC_COLOR = "#1F77B4" # blue
|
||
DP_COLOR = "#D62728" # red
|
||
|
||
fig, axes = plt.subplots(1, 2, figsize=(16, 6.5))
|
||
|
||
# ------------------------------------------------------------------
|
||
# Left panel: linear x ∈ [0, 0.6]s -- body of the distribution
|
||
# ------------------------------------------------------------------
|
||
ax = axes[0]
|
||
x_body = np.linspace(0.0, 0.6, 600)
|
||
|
||
# KDE on linear ttft values, clipped to body
|
||
kde_kvc_lin = gaussian_kde(kvc_ttft, bw_method=0.15)
|
||
kde_dp_lin = gaussian_kde(dp_ttft, bw_method=0.15)
|
||
|
||
ax.plot(x_body, kde_kvc_lin(x_body),
|
||
color=KVC_COLOR, lw=2.5, label=f"KVC 1P3D v2 (n={len(kvc_ttft)})")
|
||
ax.fill_between(x_body, kde_kvc_lin(x_body), alpha=0.20, color=KVC_COLOR)
|
||
ax.plot(x_body, kde_dp_lin(x_body),
|
||
color=DP_COLOR, lw=2.5, label=f"4-way DP CA (n={len(dp_ttft)})")
|
||
ax.fill_between(x_body, kde_dp_lin(x_body), alpha=0.20, color=DP_COLOR)
|
||
|
||
# Vertical lines for p50, p90
|
||
for q, ls in [(0.50, "-"), (0.90, "--")]:
|
||
ax.axvline(pct(kvc_ttft, q), color=KVC_COLOR, ls=ls, alpha=0.55, lw=1.1)
|
||
ax.axvline(pct(dp_ttft, q), color=DP_COLOR, ls=ls, alpha=0.55, lw=1.1)
|
||
ymax = ax.get_ylim()[1]
|
||
ax.text(pct(kvc_ttft, 0.50), ymax * 0.97,
|
||
f"KVC p50\n{pct(kvc_ttft, 0.50)*1000:.0f}ms",
|
||
color=KVC_COLOR, fontsize=9, va="top", ha="left",
|
||
bbox=dict(facecolor="white", edgecolor="none", alpha=0.7, pad=2))
|
||
ax.text(pct(dp_ttft, 0.50), ymax * 0.50,
|
||
f"DP p50\n{pct(dp_ttft, 0.50)*1000:.0f}ms",
|
||
color=DP_COLOR, fontsize=9, va="top", ha="left",
|
||
bbox=dict(facecolor="white", edgecolor="none", alpha=0.7, pad=2))
|
||
ax.text(pct(kvc_ttft, 0.90), ymax * 0.30,
|
||
f"KVC p90\n{pct(kvc_ttft, 0.90)*1000:.0f}ms",
|
||
color=KVC_COLOR, fontsize=9, va="top", ha="left",
|
||
bbox=dict(facecolor="white", edgecolor="none", alpha=0.7, pad=2))
|
||
ax.text(pct(dp_ttft, 0.90), ymax * 0.18,
|
||
f"DP p90\n{pct(dp_ttft, 0.90)*1000:.0f}ms",
|
||
color=DP_COLOR, fontsize=9, va="top", ha="left",
|
||
bbox=dict(facecolor="white", edgecolor="none", alpha=0.7, pad=2))
|
||
|
||
ax.set_xlim(0, 0.6)
|
||
ax.set_xlabel("TTFT (seconds, linear)", fontsize=11)
|
||
ax.set_ylabel("Probability density", fontsize=11)
|
||
ax.set_title("Body of distribution (TTFT ≤ 0.6 s)", fontsize=12, pad=10)
|
||
ax.legend(loc="upper right", fontsize=10, framealpha=0.95)
|
||
ax.grid(True, linestyle=":", alpha=0.4)
|
||
ax.set_axisbelow(True)
|
||
|
||
# ------------------------------------------------------------------
|
||
# Right panel: log x ∈ [0.01, 10]s -- full range incl. tail
|
||
# PDF on log-x: we plot density vs log10(t) so the curve integrates
|
||
# to 1 over log space (standard "log-density" presentation).
|
||
# ------------------------------------------------------------------
|
||
ax = axes[1]
|
||
# KDE on log10(ttft) so the resulting curve integrates to 1 over log10 t
|
||
kde_kvc_log = gaussian_kde(np.log10(kvc_ttft), bw_method="scott")
|
||
kde_dp_log = gaussian_kde(np.log10(dp_ttft), bw_method="scott")
|
||
log_x = np.linspace(np.log10(0.01), np.log10(10.0), 600)
|
||
x_full = 10 ** log_x
|
||
|
||
y_kvc = kde_kvc_log(log_x)
|
||
y_dp = kde_dp_log(log_x)
|
||
|
||
ax.plot(x_full, y_kvc, color=KVC_COLOR, lw=2.5, label=f"KVC 1P3D v2 (n={len(kvc_ttft)})")
|
||
ax.fill_between(x_full, y_kvc, alpha=0.20, color=KVC_COLOR)
|
||
ax.plot(x_full, y_dp, color=DP_COLOR, lw=2.5, label=f"4-way DP CA (n={len(dp_ttft)})")
|
||
ax.fill_between(x_full, y_dp, alpha=0.20, color=DP_COLOR)
|
||
|
||
ax.set_xscale("log")
|
||
ax.set_xlim(0.01, 10.0)
|
||
|
||
# Percentile markers
|
||
quartile_styles = [(0.50, "-", "p50"), (0.90, "--", "p90"), (0.99, ":", "p99")]
|
||
for q, ls, name in quartile_styles:
|
||
ax.axvline(pct(kvc_ttft, q), color=KVC_COLOR, ls=ls, alpha=0.55, lw=1.1)
|
||
ax.axvline(pct(dp_ttft, q), color=DP_COLOR, ls=ls, alpha=0.55, lw=1.1)
|
||
|
||
# Annotate p99 specifically since this is the key reviewer-targeted callout
|
||
ymax = max(y_kvc.max(), y_dp.max())
|
||
kvc_p99 = pct(kvc_ttft, 0.99)
|
||
dp_p99 = pct(dp_ttft, 0.99)
|
||
ax.annotate(f"KVC p99 = {kvc_p99:.2f}s\n(slow-path reseed tail)",
|
||
xy=(kvc_p99, kde_kvc_log(np.log10(kvc_p99))[0]),
|
||
xytext=(2.0, ymax * 0.65),
|
||
fontsize=10, color=KVC_COLOR, fontweight="bold",
|
||
arrowprops=dict(arrowstyle="->", color=KVC_COLOR, lw=1.0))
|
||
ax.annotate(f"DP p99 = {dp_p99*1000:.0f}ms",
|
||
xy=(dp_p99, kde_dp_log(np.log10(dp_p99))[0]),
|
||
xytext=(0.025, ymax * 0.80),
|
||
fontsize=10, color=DP_COLOR, fontweight="bold",
|
||
arrowprops=dict(arrowstyle="->", color=DP_COLOR, lw=1.0))
|
||
# Highlight the KVC bimodal structure
|
||
ax.annotate("KVC fast path\n(direct-to-D, 91.6%)",
|
||
xy=(0.05, y_kvc[np.argmin(np.abs(x_full - 0.05))]),
|
||
xytext=(0.012, ymax * 0.45),
|
||
fontsize=9, color=KVC_COLOR, style="italic",
|
||
arrowprops=dict(arrowstyle="->", color=KVC_COLOR, lw=0.7, alpha=0.6))
|
||
ax.annotate("KVC slow path\n(reseed, ~3.4%)",
|
||
xy=(2.5, y_kvc[np.argmin(np.abs(x_full - 2.5))]),
|
||
xytext=(3.0, ymax * 0.30),
|
||
fontsize=9, color=KVC_COLOR, style="italic",
|
||
arrowprops=dict(arrowstyle="->", color=KVC_COLOR, lw=0.7, alpha=0.6))
|
||
|
||
# Custom tick labels in seconds (instead of 10^-2, 10^-1, 10^0, 10^1)
|
||
ax.set_xticks([0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0])
|
||
ax.set_xticklabels(["10ms", "50ms", "100ms", "500ms", "1s", "5s", "10s"])
|
||
|
||
ax.set_xlabel("TTFT (log scale)", fontsize=11)
|
||
ax.set_ylabel("Density (per log₁₀ s)", fontsize=11)
|
||
ax.set_title("Full range (TTFT 10 ms – 10 s, log x)", fontsize=12, pad=10)
|
||
ax.legend(loc="upper left", fontsize=10, framealpha=0.95)
|
||
ax.grid(True, which="both", linestyle=":", alpha=0.4)
|
||
ax.set_axisbelow(True)
|
||
|
||
fig.suptitle(
|
||
"TTFT probability density: KVC 1P3D v2 vs 4-way DP CA\n"
|
||
"SWE-Bench 50sess trace · ts=1 · 4× H100 80GB · aborted/error requests excluded",
|
||
fontsize=13, y=1.02,
|
||
)
|
||
plt.tight_layout()
|
||
plt.savefig(OUT, dpi=150, bbox_inches="tight")
|
||
print(f"wrote {OUT}")
|
||
plt.close(fig)
|
||
|
||
# ------------------------------------------------------------------
|
||
# Print summary stats for doc cross-reference
|
||
# ------------------------------------------------------------------
|
||
print(f"\n=== TTFT distribution summary ===")
|
||
for name, arr in [("KVC v2", kvc_ttft), ("DP 4w", dp_ttft)]:
|
||
print(f" {name} (n={len(arr)})")
|
||
print(f" min={arr.min()*1000:.1f}ms p10={pct(arr,0.10)*1000:.1f}ms "
|
||
f"p50={pct(arr,0.50)*1000:.1f}ms p90={pct(arr,0.90)*1000:.1f}ms "
|
||
f"p99={pct(arr,0.99)*1000:.1f}ms max={arr.max()*1000:.1f}ms")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|