333 lines
12 KiB
Python
333 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""Generate matplotlib figures for the current characterization package."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import matplotlib
|
|
|
|
matplotlib.use("Agg")
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
ROOT = Path("analysis/characterization/current_results")
|
|
FIG_DIR = ROOT / "figures"
|
|
|
|
|
|
def main() -> None:
|
|
FIG_DIR.mkdir(parents=True, exist_ok=True)
|
|
full_trace = load_json(ROOT / "full_trace_summary.json")
|
|
runs = load_json(ROOT / "run_summaries.json")
|
|
claims = load_json(ROOT / "claim_matrix.json")
|
|
|
|
paths = [
|
|
plot_full_trace_workload(full_trace),
|
|
plot_session_skew(full_trace),
|
|
plot_pdsep_vs_combined(runs),
|
|
plot_elastic_vs_baseline(runs),
|
|
plot_gpu_balance(runs),
|
|
plot_claim_status(claims),
|
|
]
|
|
write_figures_index(paths)
|
|
for path in paths:
|
|
print(path)
|
|
|
|
|
|
def load_json(path: Path) -> Any:
|
|
return json.loads(path.read_text(encoding="utf-8"))
|
|
|
|
|
|
def plot_full_trace_workload(summary: dict[str, Any]) -> str:
|
|
labels = ["p50", "p90", "p99"]
|
|
series = {
|
|
"input tokens": [summary["input"][k] for k in labels],
|
|
"output tokens": [summary["output"][k] for k in labels],
|
|
"input/output": [summary["input_output_ratio"][k] for k in labels],
|
|
}
|
|
fig, ax = plt.subplots(figsize=(9, 5.5))
|
|
width = 0.24
|
|
x = range(len(labels))
|
|
colors = ["#2f6fab", "#dd8452", "#4c995c"]
|
|
for idx, (name, values) in enumerate(series.items()):
|
|
offset = (idx - 1) * width
|
|
ax.bar([v + offset for v in x], values, width=width, label=name, color=colors[idx])
|
|
for xpos, value in zip([v + offset for v in x], values):
|
|
ax.text(xpos, value * 1.08, short_num(value), ha="center", va="bottom", fontsize=9)
|
|
ax.set_yscale("log")
|
|
ax.set_xticks(list(x), labels)
|
|
ax.set_ylabel("value, log scale")
|
|
ax.set_title("Full Trace Workload Shape")
|
|
ax.text(
|
|
0.01,
|
|
-0.22,
|
|
f"Requests={summary['records']:,}; sessions={summary['sessions']:,}; span={summary['trace_span_s']:.1f}s",
|
|
transform=ax.transAxes,
|
|
fontsize=10,
|
|
color="#555",
|
|
)
|
|
ax.grid(True, axis="y", alpha=0.25)
|
|
ax.legend()
|
|
return save(fig, "fig_full_trace_workload.png")
|
|
|
|
|
|
def plot_session_skew(summary: dict[str, Any]) -> str:
|
|
vals = summary["top_session_input_fraction"]
|
|
labels = ["top 1%", "top 5%", "top 10%"]
|
|
fractions = [vals["top1pct"] * 100, vals["top5pct"] * 100, vals["top10pct"] * 100]
|
|
fig, ax = plt.subplots(figsize=(8, 5))
|
|
bars = ax.bar(labels, fractions, color=["#c44e52", "#dd8452", "#2f6fab"])
|
|
for bar, value in zip(bars, fractions):
|
|
ax.text(bar.get_x() + bar.get_width() / 2, value + 1.5, f"{value:.1f}%", ha="center")
|
|
ax.set_ylim(0, 100)
|
|
ax.set_ylabel("% of input-token mass")
|
|
ax.set_title("Session Token-Mass Skew")
|
|
ax.text(
|
|
0.01,
|
|
-0.20,
|
|
"Session input-token p50/p90/p99/max = "
|
|
f"{short_num(summary['session_input_tokens']['p50'])} / "
|
|
f"{short_num(summary['session_input_tokens']['p90'])} / "
|
|
f"{short_num(summary['session_input_tokens']['p99'])} / "
|
|
f"{short_num(summary['session_input_tokens']['max'])}",
|
|
transform=ax.transAxes,
|
|
fontsize=10,
|
|
color="#555",
|
|
)
|
|
ax.grid(True, axis="y", alpha=0.25)
|
|
return save(fig, "fig_session_skew.png")
|
|
|
|
|
|
def plot_pdsep_vs_combined(runs: list[dict[str, Any]]) -> str:
|
|
by_run = {run["run"]: run for run in runs}
|
|
combined = by_run["outputs/gpu_ab_combined"]
|
|
pdsep = by_run["outputs/gpu_ab_pdsep"]
|
|
labels = ["TTFT p50", "TTFT p90", "E2E p50", "E2E p90"]
|
|
combined_vals = [
|
|
stat(combined, "ttft_stats_s", "p50"),
|
|
stat(combined, "ttft_stats_s", "p90"),
|
|
stat(combined, "latency_stats_s", "p50"),
|
|
stat(combined, "latency_stats_s", "p90"),
|
|
]
|
|
pdsep_vals = [
|
|
stat(pdsep, "ttft_stats_s", "p50"),
|
|
stat(pdsep, "ttft_stats_s", "p90"),
|
|
stat(pdsep, "latency_stats_s", "p50"),
|
|
stat(pdsep, "latency_stats_s", "p90"),
|
|
]
|
|
fig, ax = plt.subplots(figsize=(9, 5))
|
|
grouped_bars(ax, labels, [("combined", combined_vals), ("PD-sep", pdsep_vals)], ["#2f6fab", "#c44e52"])
|
|
ax.set_ylabel("seconds")
|
|
ax.set_title("Static PD-Sep vs Combined Baseline")
|
|
ax.text(
|
|
0.01,
|
|
-0.22,
|
|
f"Errors: combined={combined['error_count']}, PD-sep={pdsep['error_count']}; "
|
|
f"wall-clock delta={pct_delta(combined['wall_clock_s'], pdsep['wall_clock_s']):+.1f}%",
|
|
transform=ax.transAxes,
|
|
fontsize=10,
|
|
color="#555",
|
|
)
|
|
ax.grid(True, axis="y", alpha=0.25)
|
|
ax.legend()
|
|
return save(fig, "fig_pdsep_vs_combined.png")
|
|
|
|
|
|
def plot_elastic_vs_baseline(runs: list[dict[str, Any]]) -> str:
|
|
by_run = {run["run"]: run for run in runs}
|
|
baseline = by_run["outputs/contention_16s_ts10"]
|
|
elastic = by_run["outputs/contention_16s_elastic"]
|
|
labels = ["TTFT p50", "TTFT p90", "E2E p50", "E2E p90", "TPOT p90"]
|
|
baseline_vals = [
|
|
stat(baseline, "ttft_stats_s", "p50"),
|
|
stat(baseline, "ttft_stats_s", "p90"),
|
|
stat(baseline, "latency_stats_s", "p50"),
|
|
stat(baseline, "latency_stats_s", "p90"),
|
|
stat(baseline, "tpot_stats_s", "p90"),
|
|
]
|
|
elastic_vals = [
|
|
stat(elastic, "ttft_stats_s", "p50"),
|
|
stat(elastic, "ttft_stats_s", "p90"),
|
|
stat(elastic, "latency_stats_s", "p50"),
|
|
stat(elastic, "latency_stats_s", "p90"),
|
|
stat(elastic, "tpot_stats_s", "p90"),
|
|
]
|
|
fig, ax = plt.subplots(figsize=(10, 5))
|
|
grouped_bars(ax, labels, [("baseline", baseline_vals), ("elastic", elastic_vals)], ["#2f6fab", "#dd8452"])
|
|
ax.set_ylabel("seconds")
|
|
ax.set_title("Elastic Transfer-Based Migration vs High-Contention Baseline")
|
|
ax.text(
|
|
0.01,
|
|
-0.22,
|
|
f"GPU imbalance ratio: baseline={nested(baseline, ['gpu_summary', 'max_min_ratio']):.2f}x, "
|
|
f"elastic={nested(elastic, ['gpu_summary', 'max_min_ratio']):.2f}x",
|
|
transform=ax.transAxes,
|
|
fontsize=10,
|
|
color="#555",
|
|
)
|
|
ax.grid(True, axis="y", alpha=0.25)
|
|
ax.legend()
|
|
return save(fig, "fig_elastic_vs_baseline.png")
|
|
|
|
|
|
def plot_gpu_balance(runs: list[dict[str, Any]]) -> str:
|
|
selected = [
|
|
("combined", "outputs/gpu_ab_combined"),
|
|
("PD-sep", "outputs/gpu_ab_pdsep"),
|
|
("16s base", "outputs/contention_16s_ts10"),
|
|
("16s elastic", "outputs/contention_16s_elastic"),
|
|
]
|
|
by_run = {run["run"]: run for run in runs}
|
|
labels = [label for label, _ in selected]
|
|
mean_util = [nested(by_run[path], ["gpu_summary", "mean_util_pct"]) for _, path in selected]
|
|
imbalance = [nested(by_run[path], ["gpu_summary", "max_min_ratio"]) for _, path in selected]
|
|
fig, axes = plt.subplots(1, 2, figsize=(11, 4.8))
|
|
axes[0].bar(labels, mean_util, color="#4c995c")
|
|
axes[0].set_ylabel("mean GPU util (%)")
|
|
axes[0].set_title("Mean Utilization")
|
|
axes[0].tick_params(axis="x", rotation=20)
|
|
axes[0].grid(True, axis="y", alpha=0.25)
|
|
axes[1].bar(labels, imbalance, color="#76619c")
|
|
axes[1].set_ylabel("max/min mean util")
|
|
axes[1].set_title("Imbalance Ratio")
|
|
axes[1].tick_params(axis="x", rotation=20)
|
|
axes[1].grid(True, axis="y", alpha=0.25)
|
|
fig.suptitle("GPU Utilization Balance in Existing Runs")
|
|
fig.text(
|
|
0.02,
|
|
0.01,
|
|
"GPU util imbalance is suggestive only; hot-spot causality still needs per-worker queue and session mapping.",
|
|
fontsize=10,
|
|
color="#555",
|
|
)
|
|
return save(fig, "fig_gpu_balance.png")
|
|
|
|
|
|
def plot_claim_status(claims: list[dict[str, Any]]) -> str:
|
|
order = [
|
|
"supported_by_existing_artifact",
|
|
"supported_for_trace_shape",
|
|
"partially_supported",
|
|
"not_yet_supported",
|
|
]
|
|
counts = {status: 0 for status in order}
|
|
for claim in claims:
|
|
counts[claim["status"]] = counts.get(claim["status"], 0) + 1
|
|
labels = [status.replace("_", "\n") for status in order if counts.get(status)]
|
|
values = [counts[status] for status in order if counts.get(status)]
|
|
fig, ax = plt.subplots(figsize=(9, 5))
|
|
bars = ax.bar(labels, values, color=["#4c995c", "#2f6fab", "#dd8452", "#c44e52"][: len(values)])
|
|
for bar, value in zip(bars, values):
|
|
ax.text(bar.get_x() + bar.get_width() / 2, value + 0.05, str(value), ha="center")
|
|
ax.set_ylabel("claim count")
|
|
ax.set_title("Current Claim Support Status")
|
|
ax.grid(True, axis="y", alpha=0.25)
|
|
return save(fig, "fig_claim_status.png")
|
|
|
|
|
|
def grouped_bars(ax: Any, labels: list[str], series: list[tuple[str, list[float]]], colors: list[str]) -> None:
|
|
x = list(range(len(labels)))
|
|
width = 0.35
|
|
for idx, ((name, values), color) in enumerate(zip(series, colors)):
|
|
offset = (idx - (len(series) - 1) / 2) * width
|
|
bars = ax.bar([pos + offset for pos in x], values, width=width, label=name, color=color)
|
|
for bar, value in zip(bars, values):
|
|
ax.text(bar.get_x() + bar.get_width() / 2, value * 1.02, short_num(value), ha="center", va="bottom", fontsize=8)
|
|
ax.set_xticks(x, labels)
|
|
|
|
|
|
def stat(run: dict[str, Any], stat_name: str, key: str) -> float:
|
|
return float(run[stat_name][key])
|
|
|
|
|
|
def nested(run: dict[str, Any], keys: list[str]) -> float:
|
|
current: Any = run
|
|
for key in keys:
|
|
current = current[key]
|
|
return float(current)
|
|
|
|
|
|
def pct_delta(base: float, variant: float) -> float:
|
|
return (variant - base) / base * 100.0
|
|
|
|
|
|
def short_num(value: float) -> str:
|
|
if abs(value) >= 1_000_000:
|
|
return f"{value / 1_000_000:.1f}M"
|
|
if abs(value) >= 10_000:
|
|
return f"{value / 1000:.1f}k"
|
|
if abs(value) >= 1000:
|
|
return f"{value / 1000:.2f}k"
|
|
if abs(value) >= 100:
|
|
return f"{value:.0f}"
|
|
if abs(value) >= 10:
|
|
return f"{value:.1f}"
|
|
return f"{value:.2f}"
|
|
|
|
|
|
def save(fig: Any, name: str) -> str:
|
|
path = FIG_DIR / name
|
|
fig.tight_layout(rect=(0, 0.04, 1, 0.95))
|
|
fig.savefig(path, dpi=180)
|
|
plt.close(fig)
|
|
return str(path)
|
|
|
|
|
|
def write_figures_index(paths: list[str]) -> None:
|
|
claims = {
|
|
"fig_full_trace_workload.png": (
|
|
"Full Trace Workload",
|
|
"Full GLM-5.1 trace is long-input, short-output, and high input/output ratio.",
|
|
),
|
|
"fig_session_skew.png": (
|
|
"Session Skew",
|
|
"Session input-token mass is highly skewed; top sessions dominate work.",
|
|
),
|
|
"fig_pdsep_vs_combined.png": (
|
|
"PD-Sep vs Combined",
|
|
"Existing static PD-sep A/B regresses TTFT/E2E vs combined.",
|
|
),
|
|
"fig_elastic_vs_baseline.png": (
|
|
"Elastic vs Baseline",
|
|
"Existing elastic transfer-based run does not improve TTFT/TPOT over high-contention baseline.",
|
|
),
|
|
"fig_gpu_balance.png": (
|
|
"GPU Balance",
|
|
"Existing runs show GPU-util imbalance, but more data is needed for hot-spot causality.",
|
|
),
|
|
"fig_claim_status.png": (
|
|
"Claim Status",
|
|
"Current audit separates supported, partial, and unsupported claims.",
|
|
),
|
|
}
|
|
lines = [
|
|
"# Figures Index",
|
|
"",
|
|
"Generated by:",
|
|
"",
|
|
"```bash",
|
|
".venv/bin/python analysis/characterization/plot_current_results.py",
|
|
"```",
|
|
"",
|
|
"| Figure | Intended Claim |",
|
|
"|---|---|",
|
|
]
|
|
for path in paths:
|
|
name = Path(path).name
|
|
title, claim = claims[name]
|
|
rel_path = f"figures/{name}"
|
|
lines.append(f"| [{name}]({rel_path}) | {claim} |")
|
|
lines.extend(["", "## Figure Previews", ""])
|
|
for path in paths:
|
|
name = Path(path).name
|
|
title, claim = claims[name]
|
|
rel_path = f"figures/{name}"
|
|
lines.extend([f"### {title}", "", claim, "", f"", ""])
|
|
(ROOT / "all_figures_index.md").write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|