agentic-kvc/microbench/connector_tax/plot_connector_tax.py

#!/usr/bin/env python3
"""Plot Figures 1-5 from connector_tax aggregate.

Requires aggregate.json + aggregate.csv from analyze.py.

Figure 1: TTFT p90 vs send rate, line per config (Phase A)
Figure 2: TPOT p90 vs send rate
Figure 3: Achieved throughput vs requested
Figure 4: Substrate tax bar at ref_safe and ref_load
Figure 5: Shape-dependent tax heatmap (Phase B)
"""

import argparse
import json
from collections import defaultdict
from pathlib import Path

import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt


CONFIG_COLORS = {
    "plain":                   "#000000",
    "noop_connector":          "#7f7f7f",
    "mooncake_producer":       "#1f77b4",
    "mooncake_consumer":       "#17becf",
    "mooncake_both":           "#d62728",
    "nixl_both":               "#ff7f0e",
    "lmcache_only":            "#2ca02c",
    "multi_mooncake_lmcache":  "#9467bd",
}


def load(root: Path):
    agg = json.loads((root / "aggregate.json").read_text())
    return agg


def fig1_2_3(agg, out_dir):
    configs = agg["configs"]

    # Rates
    rates = agg["rates_swept"]

    # ----- fig 1: TTFT p90 -----
    fig, ax = plt.subplots(figsize=(10, 5.5))
    for cfg, d in configs.items():
        cells = sorted(d["phase_a"], key=lambda c: c["rate_target"])
        xs = [c["rate_target"] for c in cells]
        ys = [c.get("ttft_ms_p90") for c in cells]
        ax.plot(xs, ys, "o-", label=cfg,
                color=CONFIG_COLORS.get(cfg, None), linewidth=2)
        # mark saturation
        for c in cells:
            if c.get("saturated"):
                ax.plot([c["rate_target"]], [c["ttft_ms_p90"]], "x",
                        markersize=12, mew=2,
                        color=CONFIG_COLORS.get(cfg, "red"))
    ax.set_xscale("log", base=2)
    ax.set_yscale("log")
    ax.set_xticks(rates)
    ax.set_xticklabels([str(r) for r in rates])
    ax.set_xlabel("Send rate (req/s)")
    ax.set_ylabel("TTFT p90 (ms, log)")
    ax.set_title("Figure 1 — TTFT p90 vs send rate (Phase A)\n"
                 "× = saturation criterion fired")
    ax.grid(True, which="both", linestyle="--", alpha=0.4)
    ax.legend(fontsize=8, loc="upper left")
    fig.tight_layout()
    fig.savefig(out_dir / "fig1_ttft_vs_rate.png", dpi=160)
    plt.close(fig)

    # ----- fig 2: TPOT p90 -----
    fig, ax = plt.subplots(figsize=(10, 5.5))
    for cfg, d in configs.items():
        cells = sorted(d["phase_a"], key=lambda c: c["rate_target"])
        xs = [c["rate_target"] for c in cells]
        ys = [c.get("tpot_ms_p90") for c in cells]
        ax.plot(xs, ys, "o-", label=cfg,
                color=CONFIG_COLORS.get(cfg, None), linewidth=2)
    ax.set_xscale("log", base=2)
    ax.set_xticks(rates)
    ax.set_xticklabels([str(r) for r in rates])
    ax.set_xlabel("Send rate (req/s)")
    ax.set_ylabel("TPOT p90 (ms)")
    ax.set_title("Figure 2 — TPOT p90 vs send rate (Phase A)")
    ax.grid(True, linestyle="--", alpha=0.4)
    ax.legend(fontsize=8, loc="upper left")
    fig.tight_layout()
    fig.savefig(out_dir / "fig2_tpot_vs_rate.png", dpi=160)
    plt.close(fig)

    # ----- fig 3: throughput -----
    fig, ax = plt.subplots(figsize=(10, 5.5))
    max_x = max(rates) if rates else 1
    ax.plot([0, max_x], [0, max_x], "k--", alpha=0.4, label="ideal y=x")
    for cfg, d in configs.items():
        cells = sorted(d["phase_a"], key=lambda c: c["rate_target"])
        xs = [c["rate_target"] for c in cells]
        ys = [c.get("throughput_effective_rps") for c in cells]
        ax.plot(xs, ys, "o-", label=cfg,
                color=CONFIG_COLORS.get(cfg, None), linewidth=2)
    ax.set_xlabel("Send rate (req/s)")
    ax.set_ylabel("Effective throughput (req/s)")
    ax.set_title("Figure 3 — Throughput tracking (Phase A)")
    ax.grid(True, linestyle="--", alpha=0.4)
    ax.legend(fontsize=8, loc="upper left")
    fig.tight_layout()
    fig.savefig(out_dir / "fig3_throughput_vs_rate.png", dpi=160)
    plt.close(fig)


def fig4(agg, out_dir):
    configs = agg["configs"]
    if "plain" not in configs:
        return
    plain = configs["plain"]

    def cell_at(d, r):
        for c in d["phase_a"]:
            if abs(c["rate_target"] - r) < 1e-6:
                return c
        return None

    def tax(c_cfg, c_plain, key):
        if c_cfg is None or c_plain is None:
            return None
        a, b = c_cfg.get(key), c_plain.get(key)
        if not a or not b:
            return None
        return a / b - 1

    rates_used = []
    if agg.get("ref_safe") is not None:
        rates_used.append(("ref_safe", agg["ref_safe"]))
    if agg.get("ref_load") is not None and agg["ref_load"] != agg.get("ref_safe"):
        rates_used.append(("ref_load", agg["ref_load"]))

    if not rates_used:
        return

    cfg_names = [c for c in configs if c != "plain"]
    fig, axes = plt.subplots(1, 2, figsize=(13, 5))
    if len(rates_used) == 1:
        axes = [axes[0]]
    for ax, (label, r) in zip(axes, rates_used):
        plain_cell = cell_at(plain, r)
        ttft_taxes = []
        tpot_taxes = []
        for cfg in cfg_names:
            c = cell_at(configs[cfg], r)
            ttft_taxes.append(tax(c, plain_cell, "ttft_ms_p90") or 0)
            tpot_taxes.append(tax(c, plain_cell, "tpot_ms_p90") or 0)

        x = np.arange(len(cfg_names))
        w = 0.4
        ax.bar(x - w/2, [v * 100 for v in ttft_taxes], width=w,
               label="TTFT p90 tax %", color="#d62728", alpha=0.85)
        ax.bar(x + w/2, [v * 100 for v in tpot_taxes], width=w,
               label="TPOT p90 tax %", color="#1f77b4", alpha=0.85)
        ax.axhline(0, color="black", linewidth=0.5)
        ax.set_xticks(x)
        ax.set_xticklabels(cfg_names, rotation=30, ha="right", fontsize=8)
        ax.set_ylabel("Tax vs plain (%)")
        ax.set_title(f"{label} (rate={r} req/s)")
        ax.grid(True, axis="y", linestyle="--", alpha=0.4)
        ax.legend(fontsize=8)
    fig.suptitle("Figure 4 — Substrate tax (TTFT p90 + TPOT p90) "
                 "at reference rates", fontweight="bold")
    fig.tight_layout()
    fig.savefig(out_dir / "fig4_substrate_tax.png", dpi=160)
    plt.close(fig)


def fig5(agg, out_dir):
    configs = agg["configs"]
    if "plain" not in configs:
        return

    # Build (input, output) → ttft_p90 per config
    cfg_names = [c for c in configs if c != "plain"]

    def shape_map(d):
        m = {}
        for c in d.get("phase_b", []):
            key = (c["input_tokens"], c["output_tokens"])
            m[key] = c.get("ttft_ms_p90")
        return m

    plain_map = shape_map(configs["plain"])
    if not plain_map:
        return

    inputs = sorted({k[0] for k in plain_map})
    outputs = sorted({k[1] for k in plain_map})

    n = len(cfg_names)
    cols = min(3, n)
    rows = (n + cols - 1) // cols
    fig, axes = plt.subplots(rows, cols, figsize=(5 * cols, 4 * rows))
    if n == 1:
        axes = np.array([[axes]])
    elif rows == 1:
        axes = axes.reshape(1, -1)

    for idx, cfg in enumerate(cfg_names):
        ax = axes[idx // cols][idx % cols]
        cmap = shape_map(configs[cfg])
        mat = np.full((len(outputs), len(inputs)), np.nan)
        for i, ip in enumerate(inputs):
            for j, op in enumerate(outputs):
                a = cmap.get((ip, op))
                b = plain_map.get((ip, op))
                if a and b:
                    mat[j, i] = a / b - 1
        im = ax.imshow(mat * 100, cmap="YlOrRd", aspect="auto")
        ax.set_xticks(range(len(inputs)))
        ax.set_xticklabels([f"{x//1024}k" if x >= 1024 else str(x) for x in inputs])
        ax.set_yticks(range(len(outputs)))
        ax.set_yticklabels([str(y) for y in outputs])
        ax.set_xlabel("input")
        ax.set_ylabel("output")
        ax.set_title(cfg, fontsize=10)
        for i in range(len(inputs)):
            for j in range(len(outputs)):
                v = mat[j, i]
                if not np.isnan(v):
                    txt = f"{v*100:.0f}%"
                    ax.text(i, j, txt, ha="center", va="center",
                            fontsize=9,
                            color="white" if v * 100 > 30 else "black")
        plt.colorbar(im, ax=ax, fraction=0.04, pad=0.02)

    # Hide leftover axes
    for idx in range(n, rows * cols):
        axes[idx // cols][idx % cols].axis("off")

    fig.suptitle("Figure 5 — TTFT p90 substrate tax (%) by shape (Phase B)",
                 fontweight="bold")
    fig.tight_layout()
    fig.savefig(out_dir / "fig5_shape_tax_heatmap.png", dpi=160)
    plt.close(fig)


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--root", type=Path, required=True)
    args = ap.parse_args()

    agg = load(args.root)
    out = args.root
    out.mkdir(parents=True, exist_ok=True)

    fig1_2_3(agg, out)
    fig4(agg, out)
    fig5(agg, out)
    print(f"Saved figures into {out}")


if __name__ == "__main__":
    main()