Add characterization result figures

2026-05-25 15:15:10 +08:00
parent 0f64fb3261
commit 5ed6f6fe5b
8 changed files with 382 additions and 6 deletions
--- a/analysis/characterization/current_results/all_figures_index.md
+++ b/analysis/characterization/current_results/all_figures_index.md
@@ -1,10 +1,54 @@
 # Figures Index

-No generated figures are committed by this script. Batch-specific figures should be generated from:
+Generated by:

- `analysis/characterization/analyze.py` for Batch 0/1 trace figures.
- future Batch 2 step-timeline artifacts for interference plots.
- future Batch 3 per-worker/session artifacts for hot-spot plots.
- future Batch 4 arrival-rate sweep artifacts for SRR curves.
+```bash
+.venv/bin/python analysis/characterization/plot_current_results.py
+```

-This file exists so the audit package has a stable placeholder until fresh figures are generated.
+| Figure | Intended Claim |
+|---|---|
+| [fig_full_trace_workload.png](figures/fig_full_trace_workload.png) | Full GLM-5.1 trace is long-input, short-output, and high input/output ratio. |
+| [fig_session_skew.png](figures/fig_session_skew.png) | Session input-token mass is highly skewed; top sessions dominate work. |
+| [fig_pdsep_vs_combined.png](figures/fig_pdsep_vs_combined.png) | Existing static PD-sep A/B regresses TTFT/E2E vs combined. |
+| [fig_elastic_vs_baseline.png](figures/fig_elastic_vs_baseline.png) | Existing elastic transfer-based run does not improve TTFT/TPOT over high-contention baseline. |
+| [fig_gpu_balance.png](figures/fig_gpu_balance.png) | Existing runs show GPU-util imbalance, but more data is needed for hot-spot causality. |
+| [fig_claim_status.png](figures/fig_claim_status.png) | Current audit separates supported, partial, and unsupported claims. |
+
+## Figure Previews
+
+### Full Trace Workload
+
+Full GLM-5.1 trace is long-input, short-output, and high input/output ratio.
+
+![Full Trace Workload](figures/fig_full_trace_workload.png)
+
+### Session Skew
+
+Session input-token mass is highly skewed; top sessions dominate work.
+
+![Session Skew](figures/fig_session_skew.png)
+
+### PD-Sep vs Combined
+
+Existing static PD-sep A/B regresses TTFT/E2E vs combined.
+
+![PD-Sep vs Combined](figures/fig_pdsep_vs_combined.png)
+
+### Elastic vs Baseline
+
+Existing elastic transfer-based run does not improve TTFT/TPOT over high-contention baseline.
+
+![Elastic vs Baseline](figures/fig_elastic_vs_baseline.png)
+
+### GPU Balance
+
+Existing runs show GPU-util imbalance, but more data is needed for hot-spot causality.
+
+![GPU Balance](figures/fig_gpu_balance.png)
+
+### Claim Status
+
+Current audit separates supported, partial, and unsupported claims.
+
+![Claim Status](figures/fig_claim_status.png)
--- a/analysis/characterization/current_results/figures/fig_claim_status.png
+++ b/analysis/characterization/current_results/figures/fig_claim_status.png
--- a/analysis/characterization/current_results/figures/fig_elastic_vs_baseline.png
+++ b/analysis/characterization/current_results/figures/fig_elastic_vs_baseline.png
--- a/analysis/characterization/current_results/figures/fig_full_trace_workload.png
+++ b/analysis/characterization/current_results/figures/fig_full_trace_workload.png
--- a/analysis/characterization/current_results/figures/fig_gpu_balance.png
+++ b/analysis/characterization/current_results/figures/fig_gpu_balance.png
--- a/analysis/characterization/current_results/figures/fig_pdsep_vs_combined.png
+++ b/analysis/characterization/current_results/figures/fig_pdsep_vs_combined.png
--- a/analysis/characterization/current_results/figures/fig_session_skew.png
+++ b/analysis/characterization/current_results/figures/fig_session_skew.png
--- a/analysis/characterization/plot_current_results.py
+++ b/analysis/characterization/plot_current_results.py
@@ -0,0 +1,332 @@
+#!/usr/bin/env python3
+"""Generate matplotlib figures for the current characterization package."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+import matplotlib
+
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+
+
+ROOT = Path("analysis/characterization/current_results")
+FIG_DIR = ROOT / "figures"
+
+
+def main() -> None:
+    FIG_DIR.mkdir(parents=True, exist_ok=True)
+    full_trace = load_json(ROOT / "full_trace_summary.json")
+    runs = load_json(ROOT / "run_summaries.json")
+    claims = load_json(ROOT / "claim_matrix.json")
+
+    paths = [
+        plot_full_trace_workload(full_trace),
+        plot_session_skew(full_trace),
+        plot_pdsep_vs_combined(runs),
+        plot_elastic_vs_baseline(runs),
+        plot_gpu_balance(runs),
+        plot_claim_status(claims),
+    ]
+    write_figures_index(paths)
+    for path in paths:
+        print(path)
+
+
+def load_json(path: Path) -> Any:
+    return json.loads(path.read_text(encoding="utf-8"))
+
+
+def plot_full_trace_workload(summary: dict[str, Any]) -> str:
+    labels = ["p50", "p90", "p99"]
+    series = {
+        "input tokens": [summary["input"][k] for k in labels],
+        "output tokens": [summary["output"][k] for k in labels],
+        "input/output": [summary["input_output_ratio"][k] for k in labels],
+    }
+    fig, ax = plt.subplots(figsize=(9, 5.5))
+    width = 0.24
+    x = range(len(labels))
+    colors = ["#2f6fab", "#dd8452", "#4c995c"]
+    for idx, (name, values) in enumerate(series.items()):
+        offset = (idx - 1) * width
+        ax.bar([v + offset for v in x], values, width=width, label=name, color=colors[idx])
+        for xpos, value in zip([v + offset for v in x], values):
+            ax.text(xpos, value * 1.08, short_num(value), ha="center", va="bottom", fontsize=9)
+    ax.set_yscale("log")
+    ax.set_xticks(list(x), labels)
+    ax.set_ylabel("value, log scale")
+    ax.set_title("Full Trace Workload Shape")
+    ax.text(
+        0.01,
+        -0.22,
+        f"Requests={summary['records']:,}; sessions={summary['sessions']:,}; span={summary['trace_span_s']:.1f}s",
+        transform=ax.transAxes,
+        fontsize=10,
+        color="#555",
+    )
+    ax.grid(True, axis="y", alpha=0.25)
+    ax.legend()
+    return save(fig, "fig_full_trace_workload.png")
+
+
+def plot_session_skew(summary: dict[str, Any]) -> str:
+    vals = summary["top_session_input_fraction"]
+    labels = ["top 1%", "top 5%", "top 10%"]
+    fractions = [vals["top1pct"] * 100, vals["top5pct"] * 100, vals["top10pct"] * 100]
+    fig, ax = plt.subplots(figsize=(8, 5))
+    bars = ax.bar(labels, fractions, color=["#c44e52", "#dd8452", "#2f6fab"])
+    for bar, value in zip(bars, fractions):
+        ax.text(bar.get_x() + bar.get_width() / 2, value + 1.5, f"{value:.1f}%", ha="center")
+    ax.set_ylim(0, 100)
+    ax.set_ylabel("% of input-token mass")
+    ax.set_title("Session Token-Mass Skew")
+    ax.text(
+        0.01,
+        -0.20,
+        "Session input-token p50/p90/p99/max = "
+        f"{short_num(summary['session_input_tokens']['p50'])} / "
+        f"{short_num(summary['session_input_tokens']['p90'])} / "
+        f"{short_num(summary['session_input_tokens']['p99'])} / "
+        f"{short_num(summary['session_input_tokens']['max'])}",
+        transform=ax.transAxes,
+        fontsize=10,
+        color="#555",
+    )
+    ax.grid(True, axis="y", alpha=0.25)
+    return save(fig, "fig_session_skew.png")
+
+
+def plot_pdsep_vs_combined(runs: list[dict[str, Any]]) -> str:
+    by_run = {run["run"]: run for run in runs}
+    combined = by_run["outputs/gpu_ab_combined"]
+    pdsep = by_run["outputs/gpu_ab_pdsep"]
+    labels = ["TTFT p50", "TTFT p90", "E2E p50", "E2E p90"]
+    combined_vals = [
+        stat(combined, "ttft_stats_s", "p50"),
+        stat(combined, "ttft_stats_s", "p90"),
+        stat(combined, "latency_stats_s", "p50"),
+        stat(combined, "latency_stats_s", "p90"),
+    ]
+    pdsep_vals = [
+        stat(pdsep, "ttft_stats_s", "p50"),
+        stat(pdsep, "ttft_stats_s", "p90"),
+        stat(pdsep, "latency_stats_s", "p50"),
+        stat(pdsep, "latency_stats_s", "p90"),
+    ]
+    fig, ax = plt.subplots(figsize=(9, 5))
+    grouped_bars(ax, labels, [("combined", combined_vals), ("PD-sep", pdsep_vals)], ["#2f6fab", "#c44e52"])
+    ax.set_ylabel("seconds")
+    ax.set_title("Static PD-Sep vs Combined Baseline")
+    ax.text(
+        0.01,
+        -0.22,
+        f"Errors: combined={combined['error_count']}, PD-sep={pdsep['error_count']}; "
+        f"wall-clock delta={pct_delta(combined['wall_clock_s'], pdsep['wall_clock_s']):+.1f}%",
+        transform=ax.transAxes,
+        fontsize=10,
+        color="#555",
+    )
+    ax.grid(True, axis="y", alpha=0.25)
+    ax.legend()
+    return save(fig, "fig_pdsep_vs_combined.png")
+
+
+def plot_elastic_vs_baseline(runs: list[dict[str, Any]]) -> str:
+    by_run = {run["run"]: run for run in runs}
+    baseline = by_run["outputs/contention_16s_ts10"]
+    elastic = by_run["outputs/contention_16s_elastic"]
+    labels = ["TTFT p50", "TTFT p90", "E2E p50", "E2E p90", "TPOT p90"]
+    baseline_vals = [
+        stat(baseline, "ttft_stats_s", "p50"),
+        stat(baseline, "ttft_stats_s", "p90"),
+        stat(baseline, "latency_stats_s", "p50"),
+        stat(baseline, "latency_stats_s", "p90"),
+        stat(baseline, "tpot_stats_s", "p90"),
+    ]
+    elastic_vals = [
+        stat(elastic, "ttft_stats_s", "p50"),
+        stat(elastic, "ttft_stats_s", "p90"),
+        stat(elastic, "latency_stats_s", "p50"),
+        stat(elastic, "latency_stats_s", "p90"),
+        stat(elastic, "tpot_stats_s", "p90"),
+    ]
+    fig, ax = plt.subplots(figsize=(10, 5))
+    grouped_bars(ax, labels, [("baseline", baseline_vals), ("elastic", elastic_vals)], ["#2f6fab", "#dd8452"])
+    ax.set_ylabel("seconds")
+    ax.set_title("Elastic Transfer-Based Migration vs High-Contention Baseline")
+    ax.text(
+        0.01,
+        -0.22,
+        f"GPU imbalance ratio: baseline={nested(baseline, ['gpu_summary', 'max_min_ratio']):.2f}x, "
+        f"elastic={nested(elastic, ['gpu_summary', 'max_min_ratio']):.2f}x",
+        transform=ax.transAxes,
+        fontsize=10,
+        color="#555",
+    )
+    ax.grid(True, axis="y", alpha=0.25)
+    ax.legend()
+    return save(fig, "fig_elastic_vs_baseline.png")
+
+
+def plot_gpu_balance(runs: list[dict[str, Any]]) -> str:
+    selected = [
+        ("combined", "outputs/gpu_ab_combined"),
+        ("PD-sep", "outputs/gpu_ab_pdsep"),
+        ("16s base", "outputs/contention_16s_ts10"),
+        ("16s elastic", "outputs/contention_16s_elastic"),
+    ]
+    by_run = {run["run"]: run for run in runs}
+    labels = [label for label, _ in selected]
+    mean_util = [nested(by_run[path], ["gpu_summary", "mean_util_pct"]) for _, path in selected]
+    imbalance = [nested(by_run[path], ["gpu_summary", "max_min_ratio"]) for _, path in selected]
+    fig, axes = plt.subplots(1, 2, figsize=(11, 4.8))
+    axes[0].bar(labels, mean_util, color="#4c995c")
+    axes[0].set_ylabel("mean GPU util (%)")
+    axes[0].set_title("Mean Utilization")
+    axes[0].tick_params(axis="x", rotation=20)
+    axes[0].grid(True, axis="y", alpha=0.25)
+    axes[1].bar(labels, imbalance, color="#76619c")
+    axes[1].set_ylabel("max/min mean util")
+    axes[1].set_title("Imbalance Ratio")
+    axes[1].tick_params(axis="x", rotation=20)
+    axes[1].grid(True, axis="y", alpha=0.25)
+    fig.suptitle("GPU Utilization Balance in Existing Runs")
+    fig.text(
+        0.02,
+        0.01,
+        "GPU util imbalance is suggestive only; hot-spot causality still needs per-worker queue and session mapping.",
+        fontsize=10,
+        color="#555",
+    )
+    return save(fig, "fig_gpu_balance.png")
+
+
+def plot_claim_status(claims: list[dict[str, Any]]) -> str:
+    order = [
+        "supported_by_existing_artifact",
+        "supported_for_trace_shape",
+        "partially_supported",
+        "not_yet_supported",
+    ]
+    counts = {status: 0 for status in order}
+    for claim in claims:
+        counts[claim["status"]] = counts.get(claim["status"], 0) + 1
+    labels = [status.replace("_", "\n") for status in order if counts.get(status)]
+    values = [counts[status] for status in order if counts.get(status)]
+    fig, ax = plt.subplots(figsize=(9, 5))
+    bars = ax.bar(labels, values, color=["#4c995c", "#2f6fab", "#dd8452", "#c44e52"][: len(values)])
+    for bar, value in zip(bars, values):
+        ax.text(bar.get_x() + bar.get_width() / 2, value + 0.05, str(value), ha="center")
+    ax.set_ylabel("claim count")
+    ax.set_title("Current Claim Support Status")
+    ax.grid(True, axis="y", alpha=0.25)
+    return save(fig, "fig_claim_status.png")
+
+
+def grouped_bars(ax: Any, labels: list[str], series: list[tuple[str, list[float]]], colors: list[str]) -> None:
+    x = list(range(len(labels)))
+    width = 0.35
+    for idx, ((name, values), color) in enumerate(zip(series, colors)):
+        offset = (idx - (len(series) - 1) / 2) * width
+        bars = ax.bar([pos + offset for pos in x], values, width=width, label=name, color=color)
+        for bar, value in zip(bars, values):
+            ax.text(bar.get_x() + bar.get_width() / 2, value * 1.02, short_num(value), ha="center", va="bottom", fontsize=8)
+    ax.set_xticks(x, labels)
+
+
+def stat(run: dict[str, Any], stat_name: str, key: str) -> float:
+    return float(run[stat_name][key])
+
+
+def nested(run: dict[str, Any], keys: list[str]) -> float:
+    current: Any = run
+    for key in keys:
+        current = current[key]
+    return float(current)
+
+
+def pct_delta(base: float, variant: float) -> float:
+    return (variant - base) / base * 100.0
+
+
+def short_num(value: float) -> str:
+    if abs(value) >= 1_000_000:
+        return f"{value / 1_000_000:.1f}M"
+    if abs(value) >= 10_000:
+        return f"{value / 1000:.1f}k"
+    if abs(value) >= 1000:
+        return f"{value / 1000:.2f}k"
+    if abs(value) >= 100:
+        return f"{value:.0f}"
+    if abs(value) >= 10:
+        return f"{value:.1f}"
+    return f"{value:.2f}"
+
+
+def save(fig: Any, name: str) -> str:
+    path = FIG_DIR / name
+    fig.tight_layout(rect=(0, 0.04, 1, 0.95))
+    fig.savefig(path, dpi=180)
+    plt.close(fig)
+    return str(path)
+
+
+def write_figures_index(paths: list[str]) -> None:
+    claims = {
+        "fig_full_trace_workload.png": (
+            "Full Trace Workload",
+            "Full GLM-5.1 trace is long-input, short-output, and high input/output ratio.",
+        ),
+        "fig_session_skew.png": (
+            "Session Skew",
+            "Session input-token mass is highly skewed; top sessions dominate work.",
+        ),
+        "fig_pdsep_vs_combined.png": (
+            "PD-Sep vs Combined",
+            "Existing static PD-sep A/B regresses TTFT/E2E vs combined.",
+        ),
+        "fig_elastic_vs_baseline.png": (
+            "Elastic vs Baseline",
+            "Existing elastic transfer-based run does not improve TTFT/TPOT over high-contention baseline.",
+        ),
+        "fig_gpu_balance.png": (
+            "GPU Balance",
+            "Existing runs show GPU-util imbalance, but more data is needed for hot-spot causality.",
+        ),
+        "fig_claim_status.png": (
+            "Claim Status",
+            "Current audit separates supported, partial, and unsupported claims.",
+        ),
+    }
+    lines = [
+        "# Figures Index",
+        "",
+        "Generated by:",
+        "",
+        "```bash",
+        ".venv/bin/python analysis/characterization/plot_current_results.py",
+        "```",
+        "",
+        "| Figure | Intended Claim |",
+        "|---|---|",
+    ]
+    for path in paths:
+        name = Path(path).name
+        title, claim = claims[name]
+        rel_path = f"figures/{name}"
+        lines.append(f"| [{name}]({rel_path}) | {claim} |")
+    lines.extend(["", "## Figure Previews", ""])
+    for path in paths:
+        name = Path(path).name
+        title, claim = claims[name]
+        rel_path = f"figures/{name}"
+        lines.extend([f"### {title}", "", claim, "", f"![{title}]({rel_path})", ""])
+    (ROOT / "all_figures_index.md").write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8")
+
+
+if __name__ == "__main__":
+    main()