diff --git a/PAPER_OUTLINE.md b/PAPER_OUTLINE.md index 528196f..a43f449 100644 --- a/PAPER_OUTLINE.md +++ b/PAPER_OUTLINE.md @@ -241,13 +241,17 @@ KV transfer 发生在触发该 migration 的 request 的 critical path 上,但 ### §5.2 End-to-end Performance -**Figure 6: End-to-end performance** — ✅ (PARTIAL,缺 PD-disagg 列) +**Figure 6 (headline, p90 only)** — ✅ (PARTIAL,缺 PD-disagg 列) -![F6 E2E latency bars — 5 policies](figs/f6_e2e_latency_bars.png) +![F6 E2E latency bars — 4 policies, p90 only](figs/f6_e2e_latency_bars.png) -> **🚧 TBD (NEW DATA)**:上图缺 `static PD-disagg` 那一列;EAR 列也是 TBD(需 migration validation)。要再补一张同样格式但包含全 6 个 baseline 的图。 +**Figure 6 full (mean / p50 / p90 / p99 × TTFT / TPOT / E2E)** — ✅ 数据完备: -| Scheduler | TTFT p50 | TTFT p90 | TPOT p90 | APC | Hotspot idx | Wall-clock factor | +![F6 full latency grid — 4 percentiles × 3 metrics](figs/f6_e2e_latency_full_grid.png) + +> **🚧 TBD (NEW DATA)**:两张图都缺 `static PD-disagg` 那一列;EAR 列也是 TBD(需 migration validation)。要再补同样格式但包含全 6 个 baseline 的版本。Headline 图用 p90 一行进 main paper,完整 grid 可进附录或 supplementary。 + +| Scheduler | TTFT p50 | TTFT p90 | TPOT p90 | APC | Worker p90 (median / max) | Wall-clock factor | |---|---|---|---|---|---|---| | load-balance | TBD | TBD | TBD | TBD | TBD | TBD | | LMetric | TBD | TBD | TBD | 56.9% | 6.53 | ~8x | diff --git a/analysis/characterization/render_window1_figures.py b/analysis/characterization/render_window1_figures.py index dec35bc..a336863 100644 --- a/analysis/characterization/render_window1_figures.py +++ b/analysis/characterization/render_window1_figures.py @@ -89,6 +89,48 @@ def fig_b3_latency_bars(comp: dict, out: Path) -> None: plt.close(fig) +def fig_b3_latency_full_grid(results_dir: Path, out: Path) -> None: + """4 rows (mean / p50 / p90 / p99) × 3 cols (TTFT / TPOT / E2E) per policy. + + Reads per-policy metrics.summary.json caches under raw_stats/, which + expose mean alongside the percentiles (b3_policy_comparison.json does + not record mean). + """ + raw_dir = results_dir / "raw_stats" + pols = [p for p in POLICY_ORDER if (raw_dir / f"{p}.json").exists()] + if not pols: + return + stats = {p: json.loads((raw_dir / f"{p}.json").read_text()) for p in pols} + + rows = [("mean", "mean"), ("p50", "p50"), ("p90", "p90"), ("p99", "p99")] + cols = [ + ("TTFT (s)", "ttft", 1.0), + ("TPOT (ms)", "tpot", 1000.0), + ("E2E (s)", "e2e", 1.0), + ] + fig, axes = plt.subplots(len(rows), len(cols), figsize=(11, 11), sharex=True) + for i, (row_label, agg_key) in enumerate(rows): + for j, (col_label, metric_key, scale) in enumerate(cols): + ax = axes[i][j] + vals = [stats[p][metric_key][agg_key] * scale for p in pols] + ax.bar(pols, vals, + color=[POLICY_COLOR.get(p, "gray") for p in pols], + edgecolor="black", linewidth=0.5) + for k, v in enumerate(vals): + ax.text(k, v, f"{v:.1f}", ha="center", va="bottom", fontsize=8) + if j == 0: + ax.set_ylabel(row_label, fontsize=11) + if i == 0: + ax.set_title(col_label, fontsize=11) + ax.grid(alpha=0.3, axis="y") + ax.tick_params(axis="x", rotation=20, labelsize=9) + ax.margins(y=0.18) + fig.suptitle("B3 latencies per policy — mean / p50 / p90 / p99") + fig.tight_layout() + fig.savefig(out, dpi=120) + plt.close(fig) + + def fig_b3_apc_vs_upper(comp: dict, upper: dict, out: Path) -> None: by = {r["policy"]: r for r in comp["rows"]} pols = [p for p in POLICY_ORDER if p in by] @@ -307,6 +349,9 @@ def main() -> None: fig_b3_apc_vs_hotspot(comp, upper, args.out_dir / "fig_b3_apc_vs_hotspot.png") fig_b3_latency_bars(comp, args.out_dir / "fig_b3_latency_bars.png") + fig_b3_latency_full_grid( + args.results_dir, args.out_dir / "fig_b3_latency_full_grid.png" + ) fig_b3_apc_vs_upper(comp, upper, args.out_dir / "fig_b3_apc_vs_upper.png") fig_b3_failure_breakdown(comp, args.out_dir / "fig_b3_failure_breakdown.png") fig_b3_per_worker_ttft(args.results_dir, comp, diff --git a/analysis/characterization/window_1_results/raw_stats/lmetric.json b/analysis/characterization/window_1_results/raw_stats/lmetric.json new file mode 100644 index 0000000..9ecfb29 --- /dev/null +++ b/analysis/characterization/window_1_results/raw_stats/lmetric.json @@ -0,0 +1,23 @@ +{ + "ttft": { + "count": 1214.0, + "mean": 5.111546324698484, + "p50": 0.9387824369769078, + "p90": 15.671339168207492, + "p99": 53.56683189840049 + }, + "tpot": { + "count": 1214.0, + "mean": 0.01757124870168204, + "p50": 0.008854518407308914, + "p90": 0.02122720699121469, + "p99": 0.18280341184277568 + }, + "e2e": { + "count": 1214.0, + "mean": 9.518126648903337, + "p50": 2.754255389008904, + "p90": 24.8209177934099, + "p99": 80.59924928059091 + } +} diff --git a/analysis/characterization/window_1_results/raw_stats/load_only.json b/analysis/characterization/window_1_results/raw_stats/load_only.json new file mode 100644 index 0000000..a1c38e0 --- /dev/null +++ b/analysis/characterization/window_1_results/raw_stats/load_only.json @@ -0,0 +1,23 @@ +{ + "ttft": { + "count": 1214.0, + "mean": 6.268620166597892, + "p50": 1.2609447415161412, + "p90": 20.197147866390882, + "p99": 52.84285237012196 + }, + "tpot": { + "count": 1214.0, + "mean": 0.02406975794215626, + "p50": 0.009231464695980247, + "p90": 0.026851662550158716, + "p99": 0.3211630676943426 + }, + "e2e": { + "count": 1214.0, + "mean": 11.702793988628443, + "p50": 3.58568156149704, + "p90": 33.459180271782685, + "p99": 93.95083751494239 + } +} diff --git a/analysis/characterization/window_1_results/raw_stats/sticky.json b/analysis/characterization/window_1_results/raw_stats/sticky.json new file mode 100644 index 0000000..02ba4dc --- /dev/null +++ b/analysis/characterization/window_1_results/raw_stats/sticky.json @@ -0,0 +1,23 @@ +{ + "ttft": { + "count": 1214.0, + "mean": 5.55315460854824, + "p50": 0.5415176274836995, + "p90": 18.021296651283045, + "p99": 74.09429564891524 + }, + "tpot": { + "count": 1214.0, + "mean": 0.027834537397398284, + "p50": 0.008952101894096181, + "p90": 0.03641285916619554, + "p99": 0.35152006935195085 + }, + "e2e": { + "count": 1214.0, + "mean": 12.109200157184377, + "p50": 2.081947358994512, + "p90": 34.62592205510591, + "p99": 139.68334607904353 + } +} diff --git a/analysis/characterization/window_1_results/raw_stats/unified.json b/analysis/characterization/window_1_results/raw_stats/unified.json new file mode 100644 index 0000000..d9e66bc --- /dev/null +++ b/analysis/characterization/window_1_results/raw_stats/unified.json @@ -0,0 +1,23 @@ +{ + "ttft": { + "count": 1213.0, + "mean": 3.2790960856202394, + "p50": 0.4997710260213353, + "p90": 7.345769894809922, + "p99": 42.34170345296613 + }, + "tpot": { + "count": 1213.0, + "mean": 0.012493800538265787, + "p50": 0.008079791456705824, + "p90": 0.017110194704198407, + "p99": 0.12655874612209597 + }, + "e2e": { + "count": 1213.0, + "mean": 6.961301470549104, + "p50": 1.7495028690318577, + "p90": 18.033410895219994, + "p99": 68.80023987947489 + } +} diff --git a/figs/f6_e2e_latency_full_grid.png b/figs/f6_e2e_latency_full_grid.png new file mode 100644 index 0000000..34d8cb6 Binary files /dev/null and b/figs/f6_e2e_latency_full_grid.png differ