Add full latency grid (mean/p50/p90/p99 × TTFT/TPOT/E2E) as f6 companion
The headline f6_e2e_latency_bars only shows p90, hiding three regimes:
- mean: unified dominates (3.3s TTFT, 7.0s E2E vs sticky 5.6s / 12.1s)
- p50: sticky and unified are tied on first-turn TTFT (0.5s each) —
sticky's first turn of each session is free, after which queues
accumulate. Unified beats sticky everywhere else.
- p99: tail amplification reveals unified's biggest gap —
TTFT 42.3s vs sticky 74.1s; E2E 68.8s vs sticky 139.7s.
The 12-panel figure is the honest full picture; the 3-panel headline
stays for slide-friendly summary.
- analysis/characterization/window_1_results/raw_stats/{policy}.json:
cached ttft/tpot/e2e {mean,p50,p90,p99} pulled from dash0
/home/admin/cpfs/wjh/agentic-kv/outputs/b3_sweep_20260525_095043/
(b3_policy_comparison.json doesn't record mean, only percentiles).
- analysis/characterization/render_window1_figures.py:
new fig_b3_latency_full_grid renders the 4×3 grid from the cache.
- figs/f6_e2e_latency_full_grid.png: 12-panel companion.
- PAPER_OUTLINE.md §5.2: both figures embedded; main table column
renamed from "Hotspot idx" to "Worker p90 (median / max)" to match
the new metric convention.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -241,13 +241,17 @@ KV transfer 发生在触发该 migration 的 request 的 critical path 上,但
|
|||||||
|
|
||||||
### §5.2 End-to-end Performance
|
### §5.2 End-to-end Performance
|
||||||
|
|
||||||
**Figure 6: End-to-end performance** — ✅ (PARTIAL,缺 PD-disagg 列)
|
**Figure 6 (headline, p90 only)** — ✅ (PARTIAL,缺 PD-disagg 列)
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
> **🚧 TBD (NEW DATA)**:上图缺 `static PD-disagg` 那一列;EAR 列也是 TBD(需 migration validation)。要再补一张同样格式但包含全 6 个 baseline 的图。
|
**Figure 6 full (mean / p50 / p90 / p99 × TTFT / TPOT / E2E)** — ✅ 数据完备:
|
||||||
|
|
||||||
| Scheduler | TTFT p50 | TTFT p90 | TPOT p90 | APC | Hotspot idx | Wall-clock factor |
|

|
||||||
|
|
||||||
|
> **🚧 TBD (NEW DATA)**:两张图都缺 `static PD-disagg` 那一列;EAR 列也是 TBD(需 migration validation)。要再补同样格式但包含全 6 个 baseline 的版本。Headline 图用 p90 一行进 main paper,完整 grid 可进附录或 supplementary。
|
||||||
|
|
||||||
|
| Scheduler | TTFT p50 | TTFT p90 | TPOT p90 | APC | Worker p90 (median / max) | Wall-clock factor |
|
||||||
|---|---|---|---|---|---|---|
|
|---|---|---|---|---|---|---|
|
||||||
| load-balance | TBD | TBD | TBD | TBD | TBD | TBD |
|
| load-balance | TBD | TBD | TBD | TBD | TBD | TBD |
|
||||||
| LMetric | TBD | TBD | TBD | 56.9% | 6.53 | ~8x |
|
| LMetric | TBD | TBD | TBD | 56.9% | 6.53 | ~8x |
|
||||||
|
|||||||
@@ -89,6 +89,48 @@ def fig_b3_latency_bars(comp: dict, out: Path) -> None:
|
|||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
|
||||||
|
|
||||||
|
def fig_b3_latency_full_grid(results_dir: Path, out: Path) -> None:
|
||||||
|
"""4 rows (mean / p50 / p90 / p99) × 3 cols (TTFT / TPOT / E2E) per policy.
|
||||||
|
|
||||||
|
Reads per-policy metrics.summary.json caches under raw_stats/, which
|
||||||
|
expose mean alongside the percentiles (b3_policy_comparison.json does
|
||||||
|
not record mean).
|
||||||
|
"""
|
||||||
|
raw_dir = results_dir / "raw_stats"
|
||||||
|
pols = [p for p in POLICY_ORDER if (raw_dir / f"{p}.json").exists()]
|
||||||
|
if not pols:
|
||||||
|
return
|
||||||
|
stats = {p: json.loads((raw_dir / f"{p}.json").read_text()) for p in pols}
|
||||||
|
|
||||||
|
rows = [("mean", "mean"), ("p50", "p50"), ("p90", "p90"), ("p99", "p99")]
|
||||||
|
cols = [
|
||||||
|
("TTFT (s)", "ttft", 1.0),
|
||||||
|
("TPOT (ms)", "tpot", 1000.0),
|
||||||
|
("E2E (s)", "e2e", 1.0),
|
||||||
|
]
|
||||||
|
fig, axes = plt.subplots(len(rows), len(cols), figsize=(11, 11), sharex=True)
|
||||||
|
for i, (row_label, agg_key) in enumerate(rows):
|
||||||
|
for j, (col_label, metric_key, scale) in enumerate(cols):
|
||||||
|
ax = axes[i][j]
|
||||||
|
vals = [stats[p][metric_key][agg_key] * scale for p in pols]
|
||||||
|
ax.bar(pols, vals,
|
||||||
|
color=[POLICY_COLOR.get(p, "gray") for p in pols],
|
||||||
|
edgecolor="black", linewidth=0.5)
|
||||||
|
for k, v in enumerate(vals):
|
||||||
|
ax.text(k, v, f"{v:.1f}", ha="center", va="bottom", fontsize=8)
|
||||||
|
if j == 0:
|
||||||
|
ax.set_ylabel(row_label, fontsize=11)
|
||||||
|
if i == 0:
|
||||||
|
ax.set_title(col_label, fontsize=11)
|
||||||
|
ax.grid(alpha=0.3, axis="y")
|
||||||
|
ax.tick_params(axis="x", rotation=20, labelsize=9)
|
||||||
|
ax.margins(y=0.18)
|
||||||
|
fig.suptitle("B3 latencies per policy — mean / p50 / p90 / p99")
|
||||||
|
fig.tight_layout()
|
||||||
|
fig.savefig(out, dpi=120)
|
||||||
|
plt.close(fig)
|
||||||
|
|
||||||
|
|
||||||
def fig_b3_apc_vs_upper(comp: dict, upper: dict, out: Path) -> None:
|
def fig_b3_apc_vs_upper(comp: dict, upper: dict, out: Path) -> None:
|
||||||
by = {r["policy"]: r for r in comp["rows"]}
|
by = {r["policy"]: r for r in comp["rows"]}
|
||||||
pols = [p for p in POLICY_ORDER if p in by]
|
pols = [p for p in POLICY_ORDER if p in by]
|
||||||
@@ -307,6 +349,9 @@ def main() -> None:
|
|||||||
|
|
||||||
fig_b3_apc_vs_hotspot(comp, upper, args.out_dir / "fig_b3_apc_vs_hotspot.png")
|
fig_b3_apc_vs_hotspot(comp, upper, args.out_dir / "fig_b3_apc_vs_hotspot.png")
|
||||||
fig_b3_latency_bars(comp, args.out_dir / "fig_b3_latency_bars.png")
|
fig_b3_latency_bars(comp, args.out_dir / "fig_b3_latency_bars.png")
|
||||||
|
fig_b3_latency_full_grid(
|
||||||
|
args.results_dir, args.out_dir / "fig_b3_latency_full_grid.png"
|
||||||
|
)
|
||||||
fig_b3_apc_vs_upper(comp, upper, args.out_dir / "fig_b3_apc_vs_upper.png")
|
fig_b3_apc_vs_upper(comp, upper, args.out_dir / "fig_b3_apc_vs_upper.png")
|
||||||
fig_b3_failure_breakdown(comp, args.out_dir / "fig_b3_failure_breakdown.png")
|
fig_b3_failure_breakdown(comp, args.out_dir / "fig_b3_failure_breakdown.png")
|
||||||
fig_b3_per_worker_ttft(args.results_dir, comp,
|
fig_b3_per_worker_ttft(args.results_dir, comp,
|
||||||
|
|||||||
@@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"ttft": {
|
||||||
|
"count": 1214.0,
|
||||||
|
"mean": 5.111546324698484,
|
||||||
|
"p50": 0.9387824369769078,
|
||||||
|
"p90": 15.671339168207492,
|
||||||
|
"p99": 53.56683189840049
|
||||||
|
},
|
||||||
|
"tpot": {
|
||||||
|
"count": 1214.0,
|
||||||
|
"mean": 0.01757124870168204,
|
||||||
|
"p50": 0.008854518407308914,
|
||||||
|
"p90": 0.02122720699121469,
|
||||||
|
"p99": 0.18280341184277568
|
||||||
|
},
|
||||||
|
"e2e": {
|
||||||
|
"count": 1214.0,
|
||||||
|
"mean": 9.518126648903337,
|
||||||
|
"p50": 2.754255389008904,
|
||||||
|
"p90": 24.8209177934099,
|
||||||
|
"p99": 80.59924928059091
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"ttft": {
|
||||||
|
"count": 1214.0,
|
||||||
|
"mean": 6.268620166597892,
|
||||||
|
"p50": 1.2609447415161412,
|
||||||
|
"p90": 20.197147866390882,
|
||||||
|
"p99": 52.84285237012196
|
||||||
|
},
|
||||||
|
"tpot": {
|
||||||
|
"count": 1214.0,
|
||||||
|
"mean": 0.02406975794215626,
|
||||||
|
"p50": 0.009231464695980247,
|
||||||
|
"p90": 0.026851662550158716,
|
||||||
|
"p99": 0.3211630676943426
|
||||||
|
},
|
||||||
|
"e2e": {
|
||||||
|
"count": 1214.0,
|
||||||
|
"mean": 11.702793988628443,
|
||||||
|
"p50": 3.58568156149704,
|
||||||
|
"p90": 33.459180271782685,
|
||||||
|
"p99": 93.95083751494239
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"ttft": {
|
||||||
|
"count": 1214.0,
|
||||||
|
"mean": 5.55315460854824,
|
||||||
|
"p50": 0.5415176274836995,
|
||||||
|
"p90": 18.021296651283045,
|
||||||
|
"p99": 74.09429564891524
|
||||||
|
},
|
||||||
|
"tpot": {
|
||||||
|
"count": 1214.0,
|
||||||
|
"mean": 0.027834537397398284,
|
||||||
|
"p50": 0.008952101894096181,
|
||||||
|
"p90": 0.03641285916619554,
|
||||||
|
"p99": 0.35152006935195085
|
||||||
|
},
|
||||||
|
"e2e": {
|
||||||
|
"count": 1214.0,
|
||||||
|
"mean": 12.109200157184377,
|
||||||
|
"p50": 2.081947358994512,
|
||||||
|
"p90": 34.62592205510591,
|
||||||
|
"p99": 139.68334607904353
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"ttft": {
|
||||||
|
"count": 1213.0,
|
||||||
|
"mean": 3.2790960856202394,
|
||||||
|
"p50": 0.4997710260213353,
|
||||||
|
"p90": 7.345769894809922,
|
||||||
|
"p99": 42.34170345296613
|
||||||
|
},
|
||||||
|
"tpot": {
|
||||||
|
"count": 1213.0,
|
||||||
|
"mean": 0.012493800538265787,
|
||||||
|
"p50": 0.008079791456705824,
|
||||||
|
"p90": 0.017110194704198407,
|
||||||
|
"p99": 0.12655874612209597
|
||||||
|
},
|
||||||
|
"e2e": {
|
||||||
|
"count": 1213.0,
|
||||||
|
"mean": 6.961301470549104,
|
||||||
|
"p50": 1.7495028690318577,
|
||||||
|
"p90": 18.033410895219994,
|
||||||
|
"p99": 68.80023987947489
|
||||||
|
}
|
||||||
|
}
|
||||||
BIN
figs/f6_e2e_latency_full_grid.png
Normal file
BIN
figs/f6_e2e_latency_full_grid.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 110 KiB |
Reference in New Issue
Block a user