agentic-kvc/analysis/characterization/plot_current_results.py

#!/usr/bin/env python3
"""Generate matplotlib figures for the current characterization package."""

from __future__ import annotations

import json
from pathlib import Path
from typing import Any

import matplotlib

matplotlib.use("Agg")
import matplotlib.pyplot as plt


ROOT = Path("analysis/characterization/current_results")
FIG_DIR = ROOT / "figures"


def main() -> None:
    FIG_DIR.mkdir(parents=True, exist_ok=True)
    full_trace = load_json(ROOT / "full_trace_summary.json")
    runs = load_json(ROOT / "run_summaries.json")
    claims = load_json(ROOT / "claim_matrix.json")

    paths = [
        plot_full_trace_workload(full_trace),
        plot_session_skew(full_trace),
        plot_pdsep_vs_combined(runs),
        plot_elastic_vs_baseline(runs),
        plot_gpu_balance(runs),
        plot_claim_status(claims),
    ]
    write_figures_index(paths)
    for path in paths:
        print(path)


def load_json(path: Path) -> Any:
    return json.loads(path.read_text(encoding="utf-8"))


def plot_full_trace_workload(summary: dict[str, Any]) -> str:
    labels = ["p50", "p90", "p99"]
    series = {
        "input tokens": [summary["input"][k] for k in labels],
        "output tokens": [summary["output"][k] for k in labels],
        "input/output": [summary["input_output_ratio"][k] for k in labels],
    }
    fig, ax = plt.subplots(figsize=(9, 5.5))
    width = 0.24
    x = range(len(labels))
    colors = ["#2f6fab", "#dd8452", "#4c995c"]
    for idx, (name, values) in enumerate(series.items()):
        offset = (idx - 1) * width
        ax.bar([v + offset for v in x], values, width=width, label=name, color=colors[idx])
        for xpos, value in zip([v + offset for v in x], values):
            ax.text(xpos, value * 1.08, short_num(value), ha="center", va="bottom", fontsize=9)
    ax.set_yscale("log")
    ax.set_xticks(list(x), labels)
    ax.set_ylabel("value, log scale")
    ax.set_title("Full Trace Workload Shape")
    ax.text(
        0.01,
        -0.22,
        f"Requests={summary['records']:,}; sessions={summary['sessions']:,}; span={summary['trace_span_s']:.1f}s",
        transform=ax.transAxes,
        fontsize=10,
        color="#555",
    )
    ax.grid(True, axis="y", alpha=0.25)
    ax.legend()
    return save(fig, "fig_full_trace_workload.png")


def plot_session_skew(summary: dict[str, Any]) -> str:
    vals = summary["top_session_input_fraction"]
    labels = ["top 1%", "top 5%", "top 10%"]
    fractions = [vals["top1pct"] * 100, vals["top5pct"] * 100, vals["top10pct"] * 100]
    fig, ax = plt.subplots(figsize=(8, 5))
    bars = ax.bar(labels, fractions, color=["#c44e52", "#dd8452", "#2f6fab"])
    for bar, value in zip(bars, fractions):
        ax.text(bar.get_x() + bar.get_width() / 2, value + 1.5, f"{value:.1f}%", ha="center")
    ax.set_ylim(0, 100)
    ax.set_ylabel("% of input-token mass")
    ax.set_title("Session Token-Mass Skew")
    ax.text(
        0.01,
        -0.20,
        "Session input-token p50/p90/p99/max = "
        f"{short_num(summary['session_input_tokens']['p50'])} / "
        f"{short_num(summary['session_input_tokens']['p90'])} / "
        f"{short_num(summary['session_input_tokens']['p99'])} / "
        f"{short_num(summary['session_input_tokens']['max'])}",
        transform=ax.transAxes,
        fontsize=10,
        color="#555",
    )
    ax.grid(True, axis="y", alpha=0.25)
    return save(fig, "fig_session_skew.png")


def plot_pdsep_vs_combined(runs: list[dict[str, Any]]) -> str:
    by_run = {run["run"]: run for run in runs}
    combined = by_run["outputs/gpu_ab_combined"]
    pdsep = by_run["outputs/gpu_ab_pdsep"]
    labels = ["TTFT p50", "TTFT p90", "E2E p50", "E2E p90"]
    combined_vals = [
        stat(combined, "ttft_stats_s", "p50"),
        stat(combined, "ttft_stats_s", "p90"),
        stat(combined, "latency_stats_s", "p50"),
        stat(combined, "latency_stats_s", "p90"),
    ]
    pdsep_vals = [
        stat(pdsep, "ttft_stats_s", "p50"),
        stat(pdsep, "ttft_stats_s", "p90"),
        stat(pdsep, "latency_stats_s", "p50"),
        stat(pdsep, "latency_stats_s", "p90"),
    ]
    fig, ax = plt.subplots(figsize=(9, 5))
    grouped_bars(ax, labels, [("combined", combined_vals), ("PD-sep", pdsep_vals)], ["#2f6fab", "#c44e52"])
    ax.set_ylabel("seconds")
    ax.set_title("Static PD-Sep vs Combined Baseline")
    ax.text(
        0.01,
        -0.22,
        f"Errors: combined={combined['error_count']}, PD-sep={pdsep['error_count']}; "
        f"wall-clock delta={pct_delta(combined['wall_clock_s'], pdsep['wall_clock_s']):+.1f}%",
        transform=ax.transAxes,
        fontsize=10,
        color="#555",
    )
    ax.grid(True, axis="y", alpha=0.25)
    ax.legend()
    return save(fig, "fig_pdsep_vs_combined.png")


def plot_elastic_vs_baseline(runs: list[dict[str, Any]]) -> str:
    by_run = {run["run"]: run for run in runs}
    baseline = by_run["outputs/contention_16s_ts10"]
    elastic = by_run["outputs/contention_16s_elastic"]
    labels = ["TTFT p50", "TTFT p90", "E2E p50", "E2E p90", "TPOT p90"]
    baseline_vals = [
        stat(baseline, "ttft_stats_s", "p50"),
        stat(baseline, "ttft_stats_s", "p90"),
        stat(baseline, "latency_stats_s", "p50"),
        stat(baseline, "latency_stats_s", "p90"),
        stat(baseline, "tpot_stats_s", "p90"),
    ]
    elastic_vals = [
        stat(elastic, "ttft_stats_s", "p50"),
        stat(elastic, "ttft_stats_s", "p90"),
        stat(elastic, "latency_stats_s", "p50"),
        stat(elastic, "latency_stats_s", "p90"),
        stat(elastic, "tpot_stats_s", "p90"),
    ]
    fig, ax = plt.subplots(figsize=(10, 5))
    grouped_bars(ax, labels, [("baseline", baseline_vals), ("elastic", elastic_vals)], ["#2f6fab", "#dd8452"])
    ax.set_ylabel("seconds")
    ax.set_title("Elastic Transfer-Based Migration vs High-Contention Baseline")
    ax.text(
        0.01,
        -0.22,
        f"GPU imbalance ratio: baseline={nested(baseline, ['gpu_summary', 'max_min_ratio']):.2f}x, "
        f"elastic={nested(elastic, ['gpu_summary', 'max_min_ratio']):.2f}x",
        transform=ax.transAxes,
        fontsize=10,
        color="#555",
    )
    ax.grid(True, axis="y", alpha=0.25)
    ax.legend()
    return save(fig, "fig_elastic_vs_baseline.png")


def plot_gpu_balance(runs: list[dict[str, Any]]) -> str:
    selected = [
        ("combined", "outputs/gpu_ab_combined"),
        ("PD-sep", "outputs/gpu_ab_pdsep"),
        ("16s base", "outputs/contention_16s_ts10"),
        ("16s elastic", "outputs/contention_16s_elastic"),
    ]
    by_run = {run["run"]: run for run in runs}
    labels = [label for label, _ in selected]
    mean_util = [nested(by_run[path], ["gpu_summary", "mean_util_pct"]) for _, path in selected]
    imbalance = [nested(by_run[path], ["gpu_summary", "max_min_ratio"]) for _, path in selected]
    fig, axes = plt.subplots(1, 2, figsize=(11, 4.8))
    axes[0].bar(labels, mean_util, color="#4c995c")
    axes[0].set_ylabel("mean GPU util (%)")
    axes[0].set_title("Mean Utilization")
    axes[0].tick_params(axis="x", rotation=20)
    axes[0].grid(True, axis="y", alpha=0.25)
    axes[1].bar(labels, imbalance, color="#76619c")
    axes[1].set_ylabel("max/min mean util")
    axes[1].set_title("Imbalance Ratio")
    axes[1].tick_params(axis="x", rotation=20)
    axes[1].grid(True, axis="y", alpha=0.25)
    fig.suptitle("GPU Utilization Balance in Existing Runs")
    fig.text(
        0.02,
        0.01,
        "GPU util imbalance is suggestive only; hot-spot causality still needs per-worker queue and session mapping.",
        fontsize=10,
        color="#555",
    )
    return save(fig, "fig_gpu_balance.png")


def plot_claim_status(claims: list[dict[str, Any]]) -> str:
    order = [
        "supported_by_existing_artifact",
        "supported_for_trace_shape",
        "partially_supported",
        "not_yet_supported",
    ]
    counts = {status: 0 for status in order}
    for claim in claims:
        counts[claim["status"]] = counts.get(claim["status"], 0) + 1
    labels = [status.replace("_", "\n") for status in order if counts.get(status)]
    values = [counts[status] for status in order if counts.get(status)]
    fig, ax = plt.subplots(figsize=(9, 5))
    bars = ax.bar(labels, values, color=["#4c995c", "#2f6fab", "#dd8452", "#c44e52"][: len(values)])
    for bar, value in zip(bars, values):
        ax.text(bar.get_x() + bar.get_width() / 2, value + 0.05, str(value), ha="center")
    ax.set_ylabel("claim count")
    ax.set_title("Current Claim Support Status")
    ax.grid(True, axis="y", alpha=0.25)
    return save(fig, "fig_claim_status.png")


def grouped_bars(ax: Any, labels: list[str], series: list[tuple[str, list[float]]], colors: list[str]) -> None:
    x = list(range(len(labels)))
    width = 0.35
    for idx, ((name, values), color) in enumerate(zip(series, colors)):
        offset = (idx - (len(series) - 1) / 2) * width
        bars = ax.bar([pos + offset for pos in x], values, width=width, label=name, color=color)
        for bar, value in zip(bars, values):
            ax.text(bar.get_x() + bar.get_width() / 2, value * 1.02, short_num(value), ha="center", va="bottom", fontsize=8)
    ax.set_xticks(x, labels)


def stat(run: dict[str, Any], stat_name: str, key: str) -> float:
    return float(run[stat_name][key])


def nested(run: dict[str, Any], keys: list[str]) -> float:
    current: Any = run
    for key in keys:
        current = current[key]
    return float(current)


def pct_delta(base: float, variant: float) -> float:
    return (variant - base) / base * 100.0


def short_num(value: float) -> str:
    if abs(value) >= 1_000_000:
        return f"{value / 1_000_000:.1f}M"
    if abs(value) >= 10_000:
        return f"{value / 1000:.1f}k"
    if abs(value) >= 1000:
        return f"{value / 1000:.2f}k"
    if abs(value) >= 100:
        return f"{value:.0f}"
    if abs(value) >= 10:
        return f"{value:.1f}"
    return f"{value:.2f}"


def save(fig: Any, name: str) -> str:
    path = FIG_DIR / name
    fig.tight_layout(rect=(0, 0.04, 1, 0.95))
    fig.savefig(path, dpi=180)
    plt.close(fig)
    return str(path)


def write_figures_index(paths: list[str]) -> None:
    claims = {
        "fig_full_trace_workload.png": (
            "Full Trace Workload",
            "Full GLM-5.1 trace is long-input, short-output, and high input/output ratio.",
        ),
        "fig_session_skew.png": (
            "Session Skew",
            "Session input-token mass is highly skewed; top sessions dominate work.",
        ),
        "fig_pdsep_vs_combined.png": (
            "PD-Sep vs Combined",
            "Existing static PD-sep A/B regresses TTFT/E2E vs combined.",
        ),
        "fig_elastic_vs_baseline.png": (
            "Elastic vs Baseline",
            "Existing elastic transfer-based run does not improve TTFT/TPOT over high-contention baseline.",
        ),
        "fig_gpu_balance.png": (
            "GPU Balance",
            "Existing runs show GPU-util imbalance, but more data is needed for hot-spot causality.",
        ),
        "fig_claim_status.png": (
            "Claim Status",
            "Current audit separates supported, partial, and unsupported claims.",
        ),
    }
    lines = [
        "# Figures Index",
        "",
        "Generated by:",
        "",
        "```bash",
        ".venv/bin/python analysis/characterization/plot_current_results.py",
        "```",
        "",
        "| Figure | Intended Claim |",
        "|---|---|",
    ]
    for path in paths:
        name = Path(path).name
        title, claim = claims[name]
        rel_path = f"figures/{name}"
        lines.append(f"| [{name}]({rel_path}) | {claim} |")
    lines.extend(["", "## Figure Previews", ""])
    for path in paths:
        name = Path(path).name
        title, claim = claims[name]
        rel_path = f"figures/{name}"
        lines.extend([f"### {title}", "", claim, "", f"![{title}]({rel_path})", ""])
    (ROOT / "all_figures_index.md").write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8")


if __name__ == "__main__":
    main()