agentic-kvc/microbench/fresh_setup/plot_fig4_linear_vs_lmetric.py

"""Linear vs LMetric routing on the real agentic trace (first600s).

Visualizes the wall-cap finding: with the 2x-colo-wall cap on PD-disagg arms,
linear and LMetric reach the *same* success-rate ceiling -- the static P:D
split has a structural completion ceiling that does not depend on the routing
policy or on how long you keep retrying.  Routing affects only how much wall
time is wasted on requests that will never succeed.

Inputs : analysis/v2/fig4l_lmetric.json   (8 arms, both traces; we use first600s)
         analysis/v2/fig4r_linear.json    (4 arms, first600s, PD wall-capped)
Output : figs/v2/fig4_linear_vs_lmetric.png
"""
from __future__ import annotations

import json
from pathlib import Path

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np

ROOT = Path(__file__).resolve().parents[2]
DATA = ROOT / "analysis" / "v2"
OUT = ROOT / "figs" / "v2" / "fig4_linear_vs_lmetric.png"

ARMS = ["colo", "6P+2D", "4P+4D", "2P+6D"]
POLICY_COLOR = {"linear": "#9467bd", "lmetric": "#2ca02c"}
POLICY_LABEL = {"linear": "linear (cache-aware + session-affinity)",
                "lmetric": "LMetric (P_tokens × BS)"}


def pick(rows, arm, trace="first600s"):
    for r in rows:
        if r["arm"] == arm and r["trace"] == trace:
            return r
    return None


def main():
    lin = json.load(open(DATA / "fig4r_linear.json"))
    lme = json.load(open(DATA / "fig4l_lmetric.json"))

    # colo wall (linear) sets the 2x cap reference
    colo_lin_wall = pick(lin, "colo")["wall"]
    cap = 2 * colo_lin_wall

    fig, axes = plt.subplots(1, 3, figsize=(15, 4.5))
    x = np.arange(len(ARMS))
    w = 0.38

    # (a) success rate
    ax = axes[0]
    for i, (pol, rows) in enumerate([("linear", lin), ("lmetric", lme)]):
        vals = [pick(rows, a)["n"] / pick(rows, a)["req"] * 100 for a in ARMS]
        bars = ax.bar(x + (i - 0.5) * w, vals, w, color=POLICY_COLOR[pol], label=POLICY_LABEL[pol])
        for bx, bv in zip(x + (i - 0.5) * w, vals):
            ax.annotate(f"{bv:.0f}%", (bx, bv + 1.5), ha="center", fontsize=8)
    ax.axhline(100, color="grey", ls=":", lw=1)
    ax.set_xticks(x); ax.set_xticklabels(ARMS)
    ax.set_ylabel("success rate (% of trace)"); ax.set_ylim(0, 115)
    ax.set_title("(a) success ceiling is policy-invariant")
    ax.legend(fontsize=8, loc="upper right"); ax.grid(alpha=.3, axis="y")

    # (b) wall (log y) with cap line
    ax = axes[1]
    for i, (pol, rows) in enumerate([("linear", lin), ("lmetric", lme)]):
        vals = [pick(rows, a)["wall"] for a in ARMS]
        bars = ax.bar(x + (i - 0.5) * w, vals, w, color=POLICY_COLOR[pol],
                      label=POLICY_LABEL[pol])
        for bx, bv, r in zip(x + (i - 0.5) * w, vals,
                              [pick(rows, a) for a in ARMS]):
            mark = " ⊗" if r.get("capped") else ""
            ax.annotate(f"{bv:.0f}s{mark}", (bx, bv * 1.05), ha="center", fontsize=7)
    ax.axhline(cap, color="red", ls="--", lw=1.5,
               label=f"2× colo wall cap = {cap:.0f}s")
    ax.set_xticks(x); ax.set_xticklabels(ARMS)
    ax.set_ylabel("wall-clock (s, log)"); ax.set_yscale("log")
    ax.set_title("(b) linear w/ cap vs lmetric w/o cap — ⊗ = cap-killed")
    ax.legend(fontsize=8, loc="upper left"); ax.grid(alpha=.3, which="both", axis="y")

    # (c) goodput per minute of wall (success rate / wall × 60)
    ax = axes[2]
    for i, (pol, rows) in enumerate([("linear", lin), ("lmetric", lme)]):
        vals = [pick(rows, a)["n"] / pick(rows, a)["wall"] * 60 for a in ARMS]
        bars = ax.bar(x + (i - 0.5) * w, vals, w, color=POLICY_COLOR[pol], label=POLICY_LABEL[pol])
        for bx, bv in zip(x + (i - 0.5) * w, vals):
            ax.annotate(f"{bv:.1f}", (bx, bv + max(vals) * 0.02),
                        ha="center", fontsize=8)
    ax.set_xticks(x); ax.set_xticklabels(ARMS)
    ax.set_ylabel("goodput (successful req / min)")
    ax.set_title("(c) linear+cap is 1.5–17× more wall-efficient on PD")
    ax.legend(fontsize=8, loc="upper right"); ax.grid(alpha=.3, axis="y")

    fig.suptitle("Fig 4r — Linear vs LMetric on the real agentic trace (first600s, "
                 "PD-disagg wall-capped at 2× colo)",
                 fontsize=12, y=1.0)
    fig.tight_layout()
    fig.savefig(OUT, dpi=130, bbox_inches="tight")
    print(f"wrote {OUT}")


if __name__ == "__main__":
    main()