agentic-kvc/scripts/plot_inter_turn_gap.py

#!/usr/bin/env python3
"""Plot the production trace inter-turn gap distribution.

Inter-turn gap = next_turn.request_ready_time_ms - prev_turn.request_end_time_ms
(i.e. T_external: the wall-clock between a turn finishing and the next turn
of the same session arriving). This is the tool-call latency + any pause,
not the conflated arrival-to-arrival interval.

Data is pre-computed on dash0 by scripts/agentic_gap.py and cached under
``analysis/characterization/data/agentic_inter_turn_gap.json`` (~23 KB).
"""
from __future__ import annotations

import argparse
import json
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np


def load(cache_path: Path) -> tuple[np.ndarray, np.ndarray, dict]:
    d = json.loads(cache_path.read_text())
    samples = d["cdf_samples"]
    xs = np.array([s["gap_s"] for s in samples])
    ys = np.array([s["rank_pct"] for s in samples])
    return xs, ys, d


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--agentic-data",
        default="analysis/characterization/data/agentic_inter_turn_gap.json",
    )
    parser.add_argument(
        "--chatbot-data",
        default="analysis/characterization/data/chatbot_inter_turn_gap.json",
    )
    parser.add_argument("--out", default="figs/f3a_inter_turn_gap.png")
    args = parser.parse_args()

    a_xs, a_ys, a_d = load(Path(args.agentic_data))
    c_xs, c_ys, c_d = load(Path(args.chatbot_data))

    fig, ax = plt.subplots(figsize=(10, 5.5))
    ax.plot(a_xs, a_ys, color="#1f77b4", lw=2.4,
            label=f"agentic   (n={a_d['n_gaps']:,} gaps, "
                  f"{a_d['n_sessions']:,} multi-turn sessions, Qwen3-Coder)")
    ax.plot(c_xs, c_ys, color="#c44e52", lw=2.4,
            label=f"chatbot  (n={c_d['n_gaps']:,} gaps, "
                  f"{c_d['n_sessions']:,} multi-turn sessions, qwen3-max)")

    for d, color, side in [(a_d, "#1f4e79", "left"), (c_d, "#7a1d1d", "right")]:
        for pct, key in [(50, "p50")]:
            v = d["stats_s"][key]
            ax.scatter([v], [pct], color=color, s=55, zorder=5)
            xt = 8 if side == "left" else -110
            yt = -10 if side == "left" else 8
            ax.annotate(f"p50 = {v:.2g}s",
                        xy=(v, pct), xytext=(xt, yt),
                        textcoords="offset points",
                        fontsize=10, color=color)

    refs = [
        ("lmetric TTFT p90 = 15.7s", 15.7, "#888"),
        ("unified TTFT p90 = 7.3s", 7.3, "#444"),
    ]
    for label, v, color in refs:
        ax.axvline(v, color=color, ls=":", lw=1.2, alpha=0.7)
        ax.text(v * 1.05, 5, label, fontsize=8.5, color=color,
                rotation=90, va="bottom")

    ax.set_xscale("log")
    ax.set_xlim(0.05, 5000)
    ax.set_ylim(0, 102)
    ax.set_xlabel(
        "Inter-turn gap T_external (s, log scale) — next.ready − prev.end"
    )
    ax.set_ylabel("Cumulative % of inter-turn intervals")
    ap = a_d["stats_s"]
    cp = c_d["stats_s"]
    af = a_d["fraction_below"]
    cf = c_d["fraction_below"]
    ax.set_title(
        f"Agentic vs chatbot inter-turn external gap — agentic has a "
        f"sub-second tool-call mode chatbot lacks\n"
        f"agentic p50={ap['p50']:.2g}s, frac<1s={af['1.0s']*100:.0f}%, "
        f"frac<5s={af['5.0s']*100:.0f}% · "
        f"chatbot p50={cp['p50']:.2g}s, frac<1s={cf['1.0s']*100:.0f}%, "
        f"frac<5s={cf['5.0s']*100:.0f}%"
    )
    ax.grid(True, which="both", alpha=0.3)
    ax.legend(loc="lower right", framealpha=0.92, fontsize=9)

    out_path = Path(args.out)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    fig.savefig(out_path, dpi=150, bbox_inches="tight")
    print(f"wrote {out_path}")


if __name__ == "__main__":
    main()