Files
agentic-kvc/analysis/characterization/plot_current_results.py

333 lines
12 KiB
Python

#!/usr/bin/env python3
"""Generate matplotlib figures for the current characterization package."""
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
ROOT = Path("analysis/characterization/current_results")
FIG_DIR = ROOT / "figures"
def main() -> None:
FIG_DIR.mkdir(parents=True, exist_ok=True)
full_trace = load_json(ROOT / "full_trace_summary.json")
runs = load_json(ROOT / "run_summaries.json")
claims = load_json(ROOT / "claim_matrix.json")
paths = [
plot_full_trace_workload(full_trace),
plot_session_skew(full_trace),
plot_pdsep_vs_combined(runs),
plot_elastic_vs_baseline(runs),
plot_gpu_balance(runs),
plot_claim_status(claims),
]
write_figures_index(paths)
for path in paths:
print(path)
def load_json(path: Path) -> Any:
return json.loads(path.read_text(encoding="utf-8"))
def plot_full_trace_workload(summary: dict[str, Any]) -> str:
labels = ["p50", "p90", "p99"]
series = {
"input tokens": [summary["input"][k] for k in labels],
"output tokens": [summary["output"][k] for k in labels],
"input/output": [summary["input_output_ratio"][k] for k in labels],
}
fig, ax = plt.subplots(figsize=(9, 5.5))
width = 0.24
x = range(len(labels))
colors = ["#2f6fab", "#dd8452", "#4c995c"]
for idx, (name, values) in enumerate(series.items()):
offset = (idx - 1) * width
ax.bar([v + offset for v in x], values, width=width, label=name, color=colors[idx])
for xpos, value in zip([v + offset for v in x], values):
ax.text(xpos, value * 1.08, short_num(value), ha="center", va="bottom", fontsize=9)
ax.set_yscale("log")
ax.set_xticks(list(x), labels)
ax.set_ylabel("value, log scale")
ax.set_title("Full Trace Workload Shape")
ax.text(
0.01,
-0.22,
f"Requests={summary['records']:,}; sessions={summary['sessions']:,}; span={summary['trace_span_s']:.1f}s",
transform=ax.transAxes,
fontsize=10,
color="#555",
)
ax.grid(True, axis="y", alpha=0.25)
ax.legend()
return save(fig, "fig_full_trace_workload.png")
def plot_session_skew(summary: dict[str, Any]) -> str:
vals = summary["top_session_input_fraction"]
labels = ["top 1%", "top 5%", "top 10%"]
fractions = [vals["top1pct"] * 100, vals["top5pct"] * 100, vals["top10pct"] * 100]
fig, ax = plt.subplots(figsize=(8, 5))
bars = ax.bar(labels, fractions, color=["#c44e52", "#dd8452", "#2f6fab"])
for bar, value in zip(bars, fractions):
ax.text(bar.get_x() + bar.get_width() / 2, value + 1.5, f"{value:.1f}%", ha="center")
ax.set_ylim(0, 100)
ax.set_ylabel("% of input-token mass")
ax.set_title("Session Token-Mass Skew")
ax.text(
0.01,
-0.20,
"Session input-token p50/p90/p99/max = "
f"{short_num(summary['session_input_tokens']['p50'])} / "
f"{short_num(summary['session_input_tokens']['p90'])} / "
f"{short_num(summary['session_input_tokens']['p99'])} / "
f"{short_num(summary['session_input_tokens']['max'])}",
transform=ax.transAxes,
fontsize=10,
color="#555",
)
ax.grid(True, axis="y", alpha=0.25)
return save(fig, "fig_session_skew.png")
def plot_pdsep_vs_combined(runs: list[dict[str, Any]]) -> str:
by_run = {run["run"]: run for run in runs}
combined = by_run["outputs/gpu_ab_combined"]
pdsep = by_run["outputs/gpu_ab_pdsep"]
labels = ["TTFT p50", "TTFT p90", "E2E p50", "E2E p90"]
combined_vals = [
stat(combined, "ttft_stats_s", "p50"),
stat(combined, "ttft_stats_s", "p90"),
stat(combined, "latency_stats_s", "p50"),
stat(combined, "latency_stats_s", "p90"),
]
pdsep_vals = [
stat(pdsep, "ttft_stats_s", "p50"),
stat(pdsep, "ttft_stats_s", "p90"),
stat(pdsep, "latency_stats_s", "p50"),
stat(pdsep, "latency_stats_s", "p90"),
]
fig, ax = plt.subplots(figsize=(9, 5))
grouped_bars(ax, labels, [("combined", combined_vals), ("PD-sep", pdsep_vals)], ["#2f6fab", "#c44e52"])
ax.set_ylabel("seconds")
ax.set_title("Static PD-Sep vs Combined Baseline")
ax.text(
0.01,
-0.22,
f"Errors: combined={combined['error_count']}, PD-sep={pdsep['error_count']}; "
f"wall-clock delta={pct_delta(combined['wall_clock_s'], pdsep['wall_clock_s']):+.1f}%",
transform=ax.transAxes,
fontsize=10,
color="#555",
)
ax.grid(True, axis="y", alpha=0.25)
ax.legend()
return save(fig, "fig_pdsep_vs_combined.png")
def plot_elastic_vs_baseline(runs: list[dict[str, Any]]) -> str:
by_run = {run["run"]: run for run in runs}
baseline = by_run["outputs/contention_16s_ts10"]
elastic = by_run["outputs/contention_16s_elastic"]
labels = ["TTFT p50", "TTFT p90", "E2E p50", "E2E p90", "TPOT p90"]
baseline_vals = [
stat(baseline, "ttft_stats_s", "p50"),
stat(baseline, "ttft_stats_s", "p90"),
stat(baseline, "latency_stats_s", "p50"),
stat(baseline, "latency_stats_s", "p90"),
stat(baseline, "tpot_stats_s", "p90"),
]
elastic_vals = [
stat(elastic, "ttft_stats_s", "p50"),
stat(elastic, "ttft_stats_s", "p90"),
stat(elastic, "latency_stats_s", "p50"),
stat(elastic, "latency_stats_s", "p90"),
stat(elastic, "tpot_stats_s", "p90"),
]
fig, ax = plt.subplots(figsize=(10, 5))
grouped_bars(ax, labels, [("baseline", baseline_vals), ("elastic", elastic_vals)], ["#2f6fab", "#dd8452"])
ax.set_ylabel("seconds")
ax.set_title("Elastic Transfer-Based Migration vs High-Contention Baseline")
ax.text(
0.01,
-0.22,
f"GPU imbalance ratio: baseline={nested(baseline, ['gpu_summary', 'max_min_ratio']):.2f}x, "
f"elastic={nested(elastic, ['gpu_summary', 'max_min_ratio']):.2f}x",
transform=ax.transAxes,
fontsize=10,
color="#555",
)
ax.grid(True, axis="y", alpha=0.25)
ax.legend()
return save(fig, "fig_elastic_vs_baseline.png")
def plot_gpu_balance(runs: list[dict[str, Any]]) -> str:
selected = [
("combined", "outputs/gpu_ab_combined"),
("PD-sep", "outputs/gpu_ab_pdsep"),
("16s base", "outputs/contention_16s_ts10"),
("16s elastic", "outputs/contention_16s_elastic"),
]
by_run = {run["run"]: run for run in runs}
labels = [label for label, _ in selected]
mean_util = [nested(by_run[path], ["gpu_summary", "mean_util_pct"]) for _, path in selected]
imbalance = [nested(by_run[path], ["gpu_summary", "max_min_ratio"]) for _, path in selected]
fig, axes = plt.subplots(1, 2, figsize=(11, 4.8))
axes[0].bar(labels, mean_util, color="#4c995c")
axes[0].set_ylabel("mean GPU util (%)")
axes[0].set_title("Mean Utilization")
axes[0].tick_params(axis="x", rotation=20)
axes[0].grid(True, axis="y", alpha=0.25)
axes[1].bar(labels, imbalance, color="#76619c")
axes[1].set_ylabel("max/min mean util")
axes[1].set_title("Imbalance Ratio")
axes[1].tick_params(axis="x", rotation=20)
axes[1].grid(True, axis="y", alpha=0.25)
fig.suptitle("GPU Utilization Balance in Existing Runs")
fig.text(
0.02,
0.01,
"GPU util imbalance is suggestive only; hot-spot causality still needs per-worker queue and session mapping.",
fontsize=10,
color="#555",
)
return save(fig, "fig_gpu_balance.png")
def plot_claim_status(claims: list[dict[str, Any]]) -> str:
order = [
"supported_by_existing_artifact",
"supported_for_trace_shape",
"partially_supported",
"not_yet_supported",
]
counts = {status: 0 for status in order}
for claim in claims:
counts[claim["status"]] = counts.get(claim["status"], 0) + 1
labels = [status.replace("_", "\n") for status in order if counts.get(status)]
values = [counts[status] for status in order if counts.get(status)]
fig, ax = plt.subplots(figsize=(9, 5))
bars = ax.bar(labels, values, color=["#4c995c", "#2f6fab", "#dd8452", "#c44e52"][: len(values)])
for bar, value in zip(bars, values):
ax.text(bar.get_x() + bar.get_width() / 2, value + 0.05, str(value), ha="center")
ax.set_ylabel("claim count")
ax.set_title("Current Claim Support Status")
ax.grid(True, axis="y", alpha=0.25)
return save(fig, "fig_claim_status.png")
def grouped_bars(ax: Any, labels: list[str], series: list[tuple[str, list[float]]], colors: list[str]) -> None:
x = list(range(len(labels)))
width = 0.35
for idx, ((name, values), color) in enumerate(zip(series, colors)):
offset = (idx - (len(series) - 1) / 2) * width
bars = ax.bar([pos + offset for pos in x], values, width=width, label=name, color=color)
for bar, value in zip(bars, values):
ax.text(bar.get_x() + bar.get_width() / 2, value * 1.02, short_num(value), ha="center", va="bottom", fontsize=8)
ax.set_xticks(x, labels)
def stat(run: dict[str, Any], stat_name: str, key: str) -> float:
return float(run[stat_name][key])
def nested(run: dict[str, Any], keys: list[str]) -> float:
current: Any = run
for key in keys:
current = current[key]
return float(current)
def pct_delta(base: float, variant: float) -> float:
return (variant - base) / base * 100.0
def short_num(value: float) -> str:
if abs(value) >= 1_000_000:
return f"{value / 1_000_000:.1f}M"
if abs(value) >= 10_000:
return f"{value / 1000:.1f}k"
if abs(value) >= 1000:
return f"{value / 1000:.2f}k"
if abs(value) >= 100:
return f"{value:.0f}"
if abs(value) >= 10:
return f"{value:.1f}"
return f"{value:.2f}"
def save(fig: Any, name: str) -> str:
path = FIG_DIR / name
fig.tight_layout(rect=(0, 0.04, 1, 0.95))
fig.savefig(path, dpi=180)
plt.close(fig)
return str(path)
def write_figures_index(paths: list[str]) -> None:
claims = {
"fig_full_trace_workload.png": (
"Full Trace Workload",
"Full GLM-5.1 trace is long-input, short-output, and high input/output ratio.",
),
"fig_session_skew.png": (
"Session Skew",
"Session input-token mass is highly skewed; top sessions dominate work.",
),
"fig_pdsep_vs_combined.png": (
"PD-Sep vs Combined",
"Existing static PD-sep A/B regresses TTFT/E2E vs combined.",
),
"fig_elastic_vs_baseline.png": (
"Elastic vs Baseline",
"Existing elastic transfer-based run does not improve TTFT/TPOT over high-contention baseline.",
),
"fig_gpu_balance.png": (
"GPU Balance",
"Existing runs show GPU-util imbalance, but more data is needed for hot-spot causality.",
),
"fig_claim_status.png": (
"Claim Status",
"Current audit separates supported, partial, and unsupported claims.",
),
}
lines = [
"# Figures Index",
"",
"Generated by:",
"",
"```bash",
".venv/bin/python analysis/characterization/plot_current_results.py",
"```",
"",
"| Figure | Intended Claim |",
"|---|---|",
]
for path in paths:
name = Path(path).name
title, claim = claims[name]
rel_path = f"figures/{name}"
lines.append(f"| [{name}]({rel_path}) | {claim} |")
lines.extend(["", "## Figure Previews", ""])
for path in paths:
name = Path(path).name
title, claim = claims[name]
rel_path = f"figures/{name}"
lines.extend([f"### {title}", "", claim, "", f"![{title}]({rel_path})", ""])
(ROOT / "all_figures_index.md").write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8")
if __name__ == "__main__":
main()