Add full latency grid (mean/p50/p90/p99 × TTFT/TPOT/E2E) as f6 companion

The headline f6_e2e_latency_bars only shows p90, hiding three regimes:
  - mean: unified dominates (3.3s TTFT, 7.0s E2E vs sticky 5.6s / 12.1s)
  - p50: sticky and unified are tied on first-turn TTFT (0.5s each) —
    sticky's first turn of each session is free, after which queues
    accumulate. Unified beats sticky everywhere else.
  - p99: tail amplification reveals unified's biggest gap —
    TTFT 42.3s vs sticky 74.1s; E2E 68.8s vs sticky 139.7s.

The 12-panel figure is the honest full picture; the 3-panel headline
stays for slide-friendly summary.

- analysis/characterization/window_1_results/raw_stats/{policy}.json:
  cached ttft/tpot/e2e {mean,p50,p90,p99} pulled from dash0
  /home/admin/cpfs/wjh/agentic-kv/outputs/b3_sweep_20260525_095043/
  (b3_policy_comparison.json doesn't record mean, only percentiles).
- analysis/characterization/render_window1_figures.py:
  new fig_b3_latency_full_grid renders the 4×3 grid from the cache.
- figs/f6_e2e_latency_full_grid.png: 12-panel companion.
- PAPER_OUTLINE.md §5.2: both figures embedded; main table column
  renamed from "Hotspot idx" to "Worker p90 (median / max)" to match
  the new metric convention.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-27 11:15:18 +08:00
parent 5e6e98aee7
commit 922d79ac95
7 changed files with 145 additions and 4 deletions

View File

@@ -241,13 +241,17 @@ KV transfer 发生在触发该 migration 的 request 的 critical path 上,但
### §5.2 End-to-end Performance ### §5.2 End-to-end Performance
**Figure 6: End-to-end performance** (PARTIAL PD-disagg ) **Figure 6 (headline, p90 only)** (PARTIAL PD-disagg )
![F6 E2E latency bars — 5 policies](figs/f6_e2e_latency_bars.png) ![F6 E2E latency bars — 4 policies, p90 only](figs/f6_e2e_latency_bars.png)
> **🚧 TBD (NEW DATA)**:上图缺 `static PD-disagg` 那一列EAR 列也是 TBD需 migration validation。要再补一张同样格式但包含全 6 个 baseline 的图。 **Figure 6 full (mean / p50 / p90 / p99 × TTFT / TPOT / E2E)** 数据完备
| Scheduler | TTFT p50 | TTFT p90 | TPOT p90 | APC | Hotspot idx | Wall-clock factor | ![F6 full latency grid — 4 percentiles × 3 metrics](figs/f6_e2e_latency_full_grid.png)
> **🚧 TBD (NEW DATA)**:两张图都缺 `static PD-disagg` 那一列EAR 列也是 TBD需 migration validation。要再补同样格式但包含全 6 个 baseline 的版本。Headline 图用 p90 一行进 main paper完整 grid 可进附录或 supplementary。
| Scheduler | TTFT p50 | TTFT p90 | TPOT p90 | APC | Worker p90 (median / max) | Wall-clock factor |
|---|---|---|---|---|---|---| |---|---|---|---|---|---|---|
| load-balance | TBD | TBD | TBD | TBD | TBD | TBD | | load-balance | TBD | TBD | TBD | TBD | TBD | TBD |
| LMetric | TBD | TBD | TBD | 56.9% | 6.53 | ~8x | | LMetric | TBD | TBD | TBD | 56.9% | 6.53 | ~8x |

View File

@@ -89,6 +89,48 @@ def fig_b3_latency_bars(comp: dict, out: Path) -> None:
plt.close(fig) plt.close(fig)
def fig_b3_latency_full_grid(results_dir: Path, out: Path) -> None:
"""4 rows (mean / p50 / p90 / p99) × 3 cols (TTFT / TPOT / E2E) per policy.
Reads per-policy metrics.summary.json caches under raw_stats/, which
expose mean alongside the percentiles (b3_policy_comparison.json does
not record mean).
"""
raw_dir = results_dir / "raw_stats"
pols = [p for p in POLICY_ORDER if (raw_dir / f"{p}.json").exists()]
if not pols:
return
stats = {p: json.loads((raw_dir / f"{p}.json").read_text()) for p in pols}
rows = [("mean", "mean"), ("p50", "p50"), ("p90", "p90"), ("p99", "p99")]
cols = [
("TTFT (s)", "ttft", 1.0),
("TPOT (ms)", "tpot", 1000.0),
("E2E (s)", "e2e", 1.0),
]
fig, axes = plt.subplots(len(rows), len(cols), figsize=(11, 11), sharex=True)
for i, (row_label, agg_key) in enumerate(rows):
for j, (col_label, metric_key, scale) in enumerate(cols):
ax = axes[i][j]
vals = [stats[p][metric_key][agg_key] * scale for p in pols]
ax.bar(pols, vals,
color=[POLICY_COLOR.get(p, "gray") for p in pols],
edgecolor="black", linewidth=0.5)
for k, v in enumerate(vals):
ax.text(k, v, f"{v:.1f}", ha="center", va="bottom", fontsize=8)
if j == 0:
ax.set_ylabel(row_label, fontsize=11)
if i == 0:
ax.set_title(col_label, fontsize=11)
ax.grid(alpha=0.3, axis="y")
ax.tick_params(axis="x", rotation=20, labelsize=9)
ax.margins(y=0.18)
fig.suptitle("B3 latencies per policy — mean / p50 / p90 / p99")
fig.tight_layout()
fig.savefig(out, dpi=120)
plt.close(fig)
def fig_b3_apc_vs_upper(comp: dict, upper: dict, out: Path) -> None: def fig_b3_apc_vs_upper(comp: dict, upper: dict, out: Path) -> None:
by = {r["policy"]: r for r in comp["rows"]} by = {r["policy"]: r for r in comp["rows"]}
pols = [p for p in POLICY_ORDER if p in by] pols = [p for p in POLICY_ORDER if p in by]
@@ -307,6 +349,9 @@ def main() -> None:
fig_b3_apc_vs_hotspot(comp, upper, args.out_dir / "fig_b3_apc_vs_hotspot.png") fig_b3_apc_vs_hotspot(comp, upper, args.out_dir / "fig_b3_apc_vs_hotspot.png")
fig_b3_latency_bars(comp, args.out_dir / "fig_b3_latency_bars.png") fig_b3_latency_bars(comp, args.out_dir / "fig_b3_latency_bars.png")
fig_b3_latency_full_grid(
args.results_dir, args.out_dir / "fig_b3_latency_full_grid.png"
)
fig_b3_apc_vs_upper(comp, upper, args.out_dir / "fig_b3_apc_vs_upper.png") fig_b3_apc_vs_upper(comp, upper, args.out_dir / "fig_b3_apc_vs_upper.png")
fig_b3_failure_breakdown(comp, args.out_dir / "fig_b3_failure_breakdown.png") fig_b3_failure_breakdown(comp, args.out_dir / "fig_b3_failure_breakdown.png")
fig_b3_per_worker_ttft(args.results_dir, comp, fig_b3_per_worker_ttft(args.results_dir, comp,

View File

@@ -0,0 +1,23 @@
{
"ttft": {
"count": 1214.0,
"mean": 5.111546324698484,
"p50": 0.9387824369769078,
"p90": 15.671339168207492,
"p99": 53.56683189840049
},
"tpot": {
"count": 1214.0,
"mean": 0.01757124870168204,
"p50": 0.008854518407308914,
"p90": 0.02122720699121469,
"p99": 0.18280341184277568
},
"e2e": {
"count": 1214.0,
"mean": 9.518126648903337,
"p50": 2.754255389008904,
"p90": 24.8209177934099,
"p99": 80.59924928059091
}
}

View File

@@ -0,0 +1,23 @@
{
"ttft": {
"count": 1214.0,
"mean": 6.268620166597892,
"p50": 1.2609447415161412,
"p90": 20.197147866390882,
"p99": 52.84285237012196
},
"tpot": {
"count": 1214.0,
"mean": 0.02406975794215626,
"p50": 0.009231464695980247,
"p90": 0.026851662550158716,
"p99": 0.3211630676943426
},
"e2e": {
"count": 1214.0,
"mean": 11.702793988628443,
"p50": 3.58568156149704,
"p90": 33.459180271782685,
"p99": 93.95083751494239
}
}

View File

@@ -0,0 +1,23 @@
{
"ttft": {
"count": 1214.0,
"mean": 5.55315460854824,
"p50": 0.5415176274836995,
"p90": 18.021296651283045,
"p99": 74.09429564891524
},
"tpot": {
"count": 1214.0,
"mean": 0.027834537397398284,
"p50": 0.008952101894096181,
"p90": 0.03641285916619554,
"p99": 0.35152006935195085
},
"e2e": {
"count": 1214.0,
"mean": 12.109200157184377,
"p50": 2.081947358994512,
"p90": 34.62592205510591,
"p99": 139.68334607904353
}
}

View File

@@ -0,0 +1,23 @@
{
"ttft": {
"count": 1213.0,
"mean": 3.2790960856202394,
"p50": 0.4997710260213353,
"p90": 7.345769894809922,
"p99": 42.34170345296613
},
"tpot": {
"count": 1213.0,
"mean": 0.012493800538265787,
"p50": 0.008079791456705824,
"p90": 0.017110194704198407,
"p99": 0.12655874612209597
},
"e2e": {
"count": 1213.0,
"mean": 6.961301470549104,
"p50": 1.7495028690318577,
"p90": 18.033410895219994,
"p99": 68.80023987947489
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB