Add characterization result figures
This commit is contained in:
@@ -1,10 +1,54 @@
|
||||
# Figures Index
|
||||
|
||||
No generated figures are committed by this script. Batch-specific figures should be generated from:
|
||||
Generated by:
|
||||
|
||||
- `analysis/characterization/analyze.py` for Batch 0/1 trace figures.
|
||||
- future Batch 2 step-timeline artifacts for interference plots.
|
||||
- future Batch 3 per-worker/session artifacts for hot-spot plots.
|
||||
- future Batch 4 arrival-rate sweep artifacts for SRR curves.
|
||||
```bash
|
||||
.venv/bin/python analysis/characterization/plot_current_results.py
|
||||
```
|
||||
|
||||
This file exists so the audit package has a stable placeholder until fresh figures are generated.
|
||||
| Figure | Intended Claim |
|
||||
|---|---|
|
||||
| [fig_full_trace_workload.png](figures/fig_full_trace_workload.png) | Full GLM-5.1 trace is long-input, short-output, and high input/output ratio. |
|
||||
| [fig_session_skew.png](figures/fig_session_skew.png) | Session input-token mass is highly skewed; top sessions dominate work. |
|
||||
| [fig_pdsep_vs_combined.png](figures/fig_pdsep_vs_combined.png) | Existing static PD-sep A/B regresses TTFT/E2E vs combined. |
|
||||
| [fig_elastic_vs_baseline.png](figures/fig_elastic_vs_baseline.png) | Existing elastic transfer-based run does not improve TTFT/TPOT over high-contention baseline. |
|
||||
| [fig_gpu_balance.png](figures/fig_gpu_balance.png) | Existing runs show GPU-util imbalance, but more data is needed for hot-spot causality. |
|
||||
| [fig_claim_status.png](figures/fig_claim_status.png) | Current audit separates supported, partial, and unsupported claims. |
|
||||
|
||||
## Figure Previews
|
||||
|
||||
### Full Trace Workload
|
||||
|
||||
Full GLM-5.1 trace is long-input, short-output, and high input/output ratio.
|
||||
|
||||

|
||||
|
||||
### Session Skew
|
||||
|
||||
Session input-token mass is highly skewed; top sessions dominate work.
|
||||
|
||||

|
||||
|
||||
### PD-Sep vs Combined
|
||||
|
||||
Existing static PD-sep A/B regresses TTFT/E2E vs combined.
|
||||
|
||||

|
||||
|
||||
### Elastic vs Baseline
|
||||
|
||||
Existing elastic transfer-based run does not improve TTFT/TPOT over high-contention baseline.
|
||||
|
||||

|
||||
|
||||
### GPU Balance
|
||||
|
||||
Existing runs show GPU-util imbalance, but more data is needed for hot-spot causality.
|
||||
|
||||

|
||||
|
||||
### Claim Status
|
||||
|
||||
Current audit separates supported, partial, and unsupported claims.
|
||||
|
||||

|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 52 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 65 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 66 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 82 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 59 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 55 KiB |
332
analysis/characterization/plot_current_results.py
Normal file
332
analysis/characterization/plot_current_results.py
Normal file
@@ -0,0 +1,332 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate matplotlib figures for the current characterization package."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import matplotlib
|
||||
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
ROOT = Path("analysis/characterization/current_results")
|
||||
FIG_DIR = ROOT / "figures"
|
||||
|
||||
|
||||
def main() -> None:
|
||||
FIG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
full_trace = load_json(ROOT / "full_trace_summary.json")
|
||||
runs = load_json(ROOT / "run_summaries.json")
|
||||
claims = load_json(ROOT / "claim_matrix.json")
|
||||
|
||||
paths = [
|
||||
plot_full_trace_workload(full_trace),
|
||||
plot_session_skew(full_trace),
|
||||
plot_pdsep_vs_combined(runs),
|
||||
plot_elastic_vs_baseline(runs),
|
||||
plot_gpu_balance(runs),
|
||||
plot_claim_status(claims),
|
||||
]
|
||||
write_figures_index(paths)
|
||||
for path in paths:
|
||||
print(path)
|
||||
|
||||
|
||||
def load_json(path: Path) -> Any:
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
def plot_full_trace_workload(summary: dict[str, Any]) -> str:
|
||||
labels = ["p50", "p90", "p99"]
|
||||
series = {
|
||||
"input tokens": [summary["input"][k] for k in labels],
|
||||
"output tokens": [summary["output"][k] for k in labels],
|
||||
"input/output": [summary["input_output_ratio"][k] for k in labels],
|
||||
}
|
||||
fig, ax = plt.subplots(figsize=(9, 5.5))
|
||||
width = 0.24
|
||||
x = range(len(labels))
|
||||
colors = ["#2f6fab", "#dd8452", "#4c995c"]
|
||||
for idx, (name, values) in enumerate(series.items()):
|
||||
offset = (idx - 1) * width
|
||||
ax.bar([v + offset for v in x], values, width=width, label=name, color=colors[idx])
|
||||
for xpos, value in zip([v + offset for v in x], values):
|
||||
ax.text(xpos, value * 1.08, short_num(value), ha="center", va="bottom", fontsize=9)
|
||||
ax.set_yscale("log")
|
||||
ax.set_xticks(list(x), labels)
|
||||
ax.set_ylabel("value, log scale")
|
||||
ax.set_title("Full Trace Workload Shape")
|
||||
ax.text(
|
||||
0.01,
|
||||
-0.22,
|
||||
f"Requests={summary['records']:,}; sessions={summary['sessions']:,}; span={summary['trace_span_s']:.1f}s",
|
||||
transform=ax.transAxes,
|
||||
fontsize=10,
|
||||
color="#555",
|
||||
)
|
||||
ax.grid(True, axis="y", alpha=0.25)
|
||||
ax.legend()
|
||||
return save(fig, "fig_full_trace_workload.png")
|
||||
|
||||
|
||||
def plot_session_skew(summary: dict[str, Any]) -> str:
|
||||
vals = summary["top_session_input_fraction"]
|
||||
labels = ["top 1%", "top 5%", "top 10%"]
|
||||
fractions = [vals["top1pct"] * 100, vals["top5pct"] * 100, vals["top10pct"] * 100]
|
||||
fig, ax = plt.subplots(figsize=(8, 5))
|
||||
bars = ax.bar(labels, fractions, color=["#c44e52", "#dd8452", "#2f6fab"])
|
||||
for bar, value in zip(bars, fractions):
|
||||
ax.text(bar.get_x() + bar.get_width() / 2, value + 1.5, f"{value:.1f}%", ha="center")
|
||||
ax.set_ylim(0, 100)
|
||||
ax.set_ylabel("% of input-token mass")
|
||||
ax.set_title("Session Token-Mass Skew")
|
||||
ax.text(
|
||||
0.01,
|
||||
-0.20,
|
||||
"Session input-token p50/p90/p99/max = "
|
||||
f"{short_num(summary['session_input_tokens']['p50'])} / "
|
||||
f"{short_num(summary['session_input_tokens']['p90'])} / "
|
||||
f"{short_num(summary['session_input_tokens']['p99'])} / "
|
||||
f"{short_num(summary['session_input_tokens']['max'])}",
|
||||
transform=ax.transAxes,
|
||||
fontsize=10,
|
||||
color="#555",
|
||||
)
|
||||
ax.grid(True, axis="y", alpha=0.25)
|
||||
return save(fig, "fig_session_skew.png")
|
||||
|
||||
|
||||
def plot_pdsep_vs_combined(runs: list[dict[str, Any]]) -> str:
|
||||
by_run = {run["run"]: run for run in runs}
|
||||
combined = by_run["outputs/gpu_ab_combined"]
|
||||
pdsep = by_run["outputs/gpu_ab_pdsep"]
|
||||
labels = ["TTFT p50", "TTFT p90", "E2E p50", "E2E p90"]
|
||||
combined_vals = [
|
||||
stat(combined, "ttft_stats_s", "p50"),
|
||||
stat(combined, "ttft_stats_s", "p90"),
|
||||
stat(combined, "latency_stats_s", "p50"),
|
||||
stat(combined, "latency_stats_s", "p90"),
|
||||
]
|
||||
pdsep_vals = [
|
||||
stat(pdsep, "ttft_stats_s", "p50"),
|
||||
stat(pdsep, "ttft_stats_s", "p90"),
|
||||
stat(pdsep, "latency_stats_s", "p50"),
|
||||
stat(pdsep, "latency_stats_s", "p90"),
|
||||
]
|
||||
fig, ax = plt.subplots(figsize=(9, 5))
|
||||
grouped_bars(ax, labels, [("combined", combined_vals), ("PD-sep", pdsep_vals)], ["#2f6fab", "#c44e52"])
|
||||
ax.set_ylabel("seconds")
|
||||
ax.set_title("Static PD-Sep vs Combined Baseline")
|
||||
ax.text(
|
||||
0.01,
|
||||
-0.22,
|
||||
f"Errors: combined={combined['error_count']}, PD-sep={pdsep['error_count']}; "
|
||||
f"wall-clock delta={pct_delta(combined['wall_clock_s'], pdsep['wall_clock_s']):+.1f}%",
|
||||
transform=ax.transAxes,
|
||||
fontsize=10,
|
||||
color="#555",
|
||||
)
|
||||
ax.grid(True, axis="y", alpha=0.25)
|
||||
ax.legend()
|
||||
return save(fig, "fig_pdsep_vs_combined.png")
|
||||
|
||||
|
||||
def plot_elastic_vs_baseline(runs: list[dict[str, Any]]) -> str:
|
||||
by_run = {run["run"]: run for run in runs}
|
||||
baseline = by_run["outputs/contention_16s_ts10"]
|
||||
elastic = by_run["outputs/contention_16s_elastic"]
|
||||
labels = ["TTFT p50", "TTFT p90", "E2E p50", "E2E p90", "TPOT p90"]
|
||||
baseline_vals = [
|
||||
stat(baseline, "ttft_stats_s", "p50"),
|
||||
stat(baseline, "ttft_stats_s", "p90"),
|
||||
stat(baseline, "latency_stats_s", "p50"),
|
||||
stat(baseline, "latency_stats_s", "p90"),
|
||||
stat(baseline, "tpot_stats_s", "p90"),
|
||||
]
|
||||
elastic_vals = [
|
||||
stat(elastic, "ttft_stats_s", "p50"),
|
||||
stat(elastic, "ttft_stats_s", "p90"),
|
||||
stat(elastic, "latency_stats_s", "p50"),
|
||||
stat(elastic, "latency_stats_s", "p90"),
|
||||
stat(elastic, "tpot_stats_s", "p90"),
|
||||
]
|
||||
fig, ax = plt.subplots(figsize=(10, 5))
|
||||
grouped_bars(ax, labels, [("baseline", baseline_vals), ("elastic", elastic_vals)], ["#2f6fab", "#dd8452"])
|
||||
ax.set_ylabel("seconds")
|
||||
ax.set_title("Elastic Transfer-Based Migration vs High-Contention Baseline")
|
||||
ax.text(
|
||||
0.01,
|
||||
-0.22,
|
||||
f"GPU imbalance ratio: baseline={nested(baseline, ['gpu_summary', 'max_min_ratio']):.2f}x, "
|
||||
f"elastic={nested(elastic, ['gpu_summary', 'max_min_ratio']):.2f}x",
|
||||
transform=ax.transAxes,
|
||||
fontsize=10,
|
||||
color="#555",
|
||||
)
|
||||
ax.grid(True, axis="y", alpha=0.25)
|
||||
ax.legend()
|
||||
return save(fig, "fig_elastic_vs_baseline.png")
|
||||
|
||||
|
||||
def plot_gpu_balance(runs: list[dict[str, Any]]) -> str:
|
||||
selected = [
|
||||
("combined", "outputs/gpu_ab_combined"),
|
||||
("PD-sep", "outputs/gpu_ab_pdsep"),
|
||||
("16s base", "outputs/contention_16s_ts10"),
|
||||
("16s elastic", "outputs/contention_16s_elastic"),
|
||||
]
|
||||
by_run = {run["run"]: run for run in runs}
|
||||
labels = [label for label, _ in selected]
|
||||
mean_util = [nested(by_run[path], ["gpu_summary", "mean_util_pct"]) for _, path in selected]
|
||||
imbalance = [nested(by_run[path], ["gpu_summary", "max_min_ratio"]) for _, path in selected]
|
||||
fig, axes = plt.subplots(1, 2, figsize=(11, 4.8))
|
||||
axes[0].bar(labels, mean_util, color="#4c995c")
|
||||
axes[0].set_ylabel("mean GPU util (%)")
|
||||
axes[0].set_title("Mean Utilization")
|
||||
axes[0].tick_params(axis="x", rotation=20)
|
||||
axes[0].grid(True, axis="y", alpha=0.25)
|
||||
axes[1].bar(labels, imbalance, color="#76619c")
|
||||
axes[1].set_ylabel("max/min mean util")
|
||||
axes[1].set_title("Imbalance Ratio")
|
||||
axes[1].tick_params(axis="x", rotation=20)
|
||||
axes[1].grid(True, axis="y", alpha=0.25)
|
||||
fig.suptitle("GPU Utilization Balance in Existing Runs")
|
||||
fig.text(
|
||||
0.02,
|
||||
0.01,
|
||||
"GPU util imbalance is suggestive only; hot-spot causality still needs per-worker queue and session mapping.",
|
||||
fontsize=10,
|
||||
color="#555",
|
||||
)
|
||||
return save(fig, "fig_gpu_balance.png")
|
||||
|
||||
|
||||
def plot_claim_status(claims: list[dict[str, Any]]) -> str:
|
||||
order = [
|
||||
"supported_by_existing_artifact",
|
||||
"supported_for_trace_shape",
|
||||
"partially_supported",
|
||||
"not_yet_supported",
|
||||
]
|
||||
counts = {status: 0 for status in order}
|
||||
for claim in claims:
|
||||
counts[claim["status"]] = counts.get(claim["status"], 0) + 1
|
||||
labels = [status.replace("_", "\n") for status in order if counts.get(status)]
|
||||
values = [counts[status] for status in order if counts.get(status)]
|
||||
fig, ax = plt.subplots(figsize=(9, 5))
|
||||
bars = ax.bar(labels, values, color=["#4c995c", "#2f6fab", "#dd8452", "#c44e52"][: len(values)])
|
||||
for bar, value in zip(bars, values):
|
||||
ax.text(bar.get_x() + bar.get_width() / 2, value + 0.05, str(value), ha="center")
|
||||
ax.set_ylabel("claim count")
|
||||
ax.set_title("Current Claim Support Status")
|
||||
ax.grid(True, axis="y", alpha=0.25)
|
||||
return save(fig, "fig_claim_status.png")
|
||||
|
||||
|
||||
def grouped_bars(ax: Any, labels: list[str], series: list[tuple[str, list[float]]], colors: list[str]) -> None:
|
||||
x = list(range(len(labels)))
|
||||
width = 0.35
|
||||
for idx, ((name, values), color) in enumerate(zip(series, colors)):
|
||||
offset = (idx - (len(series) - 1) / 2) * width
|
||||
bars = ax.bar([pos + offset for pos in x], values, width=width, label=name, color=color)
|
||||
for bar, value in zip(bars, values):
|
||||
ax.text(bar.get_x() + bar.get_width() / 2, value * 1.02, short_num(value), ha="center", va="bottom", fontsize=8)
|
||||
ax.set_xticks(x, labels)
|
||||
|
||||
|
||||
def stat(run: dict[str, Any], stat_name: str, key: str) -> float:
|
||||
return float(run[stat_name][key])
|
||||
|
||||
|
||||
def nested(run: dict[str, Any], keys: list[str]) -> float:
|
||||
current: Any = run
|
||||
for key in keys:
|
||||
current = current[key]
|
||||
return float(current)
|
||||
|
||||
|
||||
def pct_delta(base: float, variant: float) -> float:
|
||||
return (variant - base) / base * 100.0
|
||||
|
||||
|
||||
def short_num(value: float) -> str:
|
||||
if abs(value) >= 1_000_000:
|
||||
return f"{value / 1_000_000:.1f}M"
|
||||
if abs(value) >= 10_000:
|
||||
return f"{value / 1000:.1f}k"
|
||||
if abs(value) >= 1000:
|
||||
return f"{value / 1000:.2f}k"
|
||||
if abs(value) >= 100:
|
||||
return f"{value:.0f}"
|
||||
if abs(value) >= 10:
|
||||
return f"{value:.1f}"
|
||||
return f"{value:.2f}"
|
||||
|
||||
|
||||
def save(fig: Any, name: str) -> str:
|
||||
path = FIG_DIR / name
|
||||
fig.tight_layout(rect=(0, 0.04, 1, 0.95))
|
||||
fig.savefig(path, dpi=180)
|
||||
plt.close(fig)
|
||||
return str(path)
|
||||
|
||||
|
||||
def write_figures_index(paths: list[str]) -> None:
|
||||
claims = {
|
||||
"fig_full_trace_workload.png": (
|
||||
"Full Trace Workload",
|
||||
"Full GLM-5.1 trace is long-input, short-output, and high input/output ratio.",
|
||||
),
|
||||
"fig_session_skew.png": (
|
||||
"Session Skew",
|
||||
"Session input-token mass is highly skewed; top sessions dominate work.",
|
||||
),
|
||||
"fig_pdsep_vs_combined.png": (
|
||||
"PD-Sep vs Combined",
|
||||
"Existing static PD-sep A/B regresses TTFT/E2E vs combined.",
|
||||
),
|
||||
"fig_elastic_vs_baseline.png": (
|
||||
"Elastic vs Baseline",
|
||||
"Existing elastic transfer-based run does not improve TTFT/TPOT over high-contention baseline.",
|
||||
),
|
||||
"fig_gpu_balance.png": (
|
||||
"GPU Balance",
|
||||
"Existing runs show GPU-util imbalance, but more data is needed for hot-spot causality.",
|
||||
),
|
||||
"fig_claim_status.png": (
|
||||
"Claim Status",
|
||||
"Current audit separates supported, partial, and unsupported claims.",
|
||||
),
|
||||
}
|
||||
lines = [
|
||||
"# Figures Index",
|
||||
"",
|
||||
"Generated by:",
|
||||
"",
|
||||
"```bash",
|
||||
".venv/bin/python analysis/characterization/plot_current_results.py",
|
||||
"```",
|
||||
"",
|
||||
"| Figure | Intended Claim |",
|
||||
"|---|---|",
|
||||
]
|
||||
for path in paths:
|
||||
name = Path(path).name
|
||||
title, claim = claims[name]
|
||||
rel_path = f"figures/{name}"
|
||||
lines.append(f"| [{name}]({rel_path}) | {claim} |")
|
||||
lines.extend(["", "## Figure Previews", ""])
|
||||
for path in paths:
|
||||
name = Path(path).name
|
||||
title, claim = claims[name]
|
||||
rel_path = f"figures/{name}"
|
||||
lines.extend([f"### {title}", "", claim, "", f"", ""])
|
||||
(ROOT / "all_figures_index.md").write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user