"""Linear vs LMetric routing on the real agentic trace (first600s). Visualizes the wall-cap finding: with the 2x-colo-wall cap on PD-disagg arms, linear and LMetric reach the *same* success-rate ceiling -- the static P:D split has a structural completion ceiling that does not depend on the routing policy or on how long you keep retrying. Routing affects only how much wall time is wasted on requests that will never succeed. Inputs : analysis/v2/fig4l_lmetric.json (8 arms, both traces; we use first600s) analysis/v2/fig4r_linear.json (4 arms, first600s, PD wall-capped) Output : figs/v2/fig4_linear_vs_lmetric.png """ from __future__ import annotations import json from pathlib import Path import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import numpy as np ROOT = Path(__file__).resolve().parents[2] DATA = ROOT / "analysis" / "v2" OUT = ROOT / "figs" / "v2" / "fig4_linear_vs_lmetric.png" ARMS = ["colo", "6P+2D", "4P+4D", "2P+6D"] POLICY_COLOR = {"linear": "#9467bd", "lmetric": "#2ca02c"} POLICY_LABEL = {"linear": "linear (cache-aware + session-affinity)", "lmetric": "LMetric (P_tokens × BS)"} def pick(rows, arm, trace="first600s"): for r in rows: if r["arm"] == arm and r["trace"] == trace: return r return None def main(): lin = json.load(open(DATA / "fig4r_linear.json")) lme = json.load(open(DATA / "fig4l_lmetric.json")) # colo wall (linear) sets the 2x cap reference colo_lin_wall = pick(lin, "colo")["wall"] cap = 2 * colo_lin_wall fig, axes = plt.subplots(1, 3, figsize=(15, 4.5)) x = np.arange(len(ARMS)) w = 0.38 # (a) success rate ax = axes[0] for i, (pol, rows) in enumerate([("linear", lin), ("lmetric", lme)]): vals = [pick(rows, a)["n"] / pick(rows, a)["req"] * 100 for a in ARMS] bars = ax.bar(x + (i - 0.5) * w, vals, w, color=POLICY_COLOR[pol], label=POLICY_LABEL[pol]) for bx, bv in zip(x + (i - 0.5) * w, vals): ax.annotate(f"{bv:.0f}%", (bx, bv + 1.5), ha="center", fontsize=8) ax.axhline(100, color="grey", ls=":", lw=1) ax.set_xticks(x); ax.set_xticklabels(ARMS) ax.set_ylabel("success rate (% of trace)"); ax.set_ylim(0, 115) ax.set_title("(a) success ceiling is policy-invariant") ax.legend(fontsize=8, loc="upper right"); ax.grid(alpha=.3, axis="y") # (b) wall (log y) with cap line ax = axes[1] for i, (pol, rows) in enumerate([("linear", lin), ("lmetric", lme)]): vals = [pick(rows, a)["wall"] for a in ARMS] bars = ax.bar(x + (i - 0.5) * w, vals, w, color=POLICY_COLOR[pol], label=POLICY_LABEL[pol]) for bx, bv, r in zip(x + (i - 0.5) * w, vals, [pick(rows, a) for a in ARMS]): mark = " ⊗" if r.get("capped") else "" ax.annotate(f"{bv:.0f}s{mark}", (bx, bv * 1.05), ha="center", fontsize=7) ax.axhline(cap, color="red", ls="--", lw=1.5, label=f"2× colo wall cap = {cap:.0f}s") ax.set_xticks(x); ax.set_xticklabels(ARMS) ax.set_ylabel("wall-clock (s, log)"); ax.set_yscale("log") ax.set_title("(b) linear w/ cap vs lmetric w/o cap — ⊗ = cap-killed") ax.legend(fontsize=8, loc="upper left"); ax.grid(alpha=.3, which="both", axis="y") # (c) goodput per minute of wall (success rate / wall × 60) ax = axes[2] for i, (pol, rows) in enumerate([("linear", lin), ("lmetric", lme)]): vals = [pick(rows, a)["n"] / pick(rows, a)["wall"] * 60 for a in ARMS] bars = ax.bar(x + (i - 0.5) * w, vals, w, color=POLICY_COLOR[pol], label=POLICY_LABEL[pol]) for bx, bv in zip(x + (i - 0.5) * w, vals): ax.annotate(f"{bv:.1f}", (bx, bv + max(vals) * 0.02), ha="center", fontsize=8) ax.set_xticks(x); ax.set_xticklabels(ARMS) ax.set_ylabel("goodput (successful req / min)") ax.set_title("(c) linear+cap is 1.5–17× more wall-efficient on PD") ax.legend(fontsize=8, loc="upper right"); ax.grid(alpha=.3, axis="y") fig.suptitle("Fig 4r — Linear vs LMetric on the real agentic trace (first600s, " "PD-disagg wall-capped at 2× colo)", fontsize=12, y=1.0) fig.tight_layout() fig.savefig(OUT, dpi=130, bbox_inches="tight") print(f"wrote {OUT}") if __name__ == "__main__": main()