agentic-kvc/microbench/connector_tax/cache_sweep/analyze_trace_replay.py

#!/usr/bin/env python3
"""Compute TTFT/TPOT/E2E mean/p50/p90/p99 from trace-replay metrics.jsonl.

Usage:
  python analyze_trace_replay.py --root <outroot>
where <outroot>/{unified,unified_kv_both,unified_kv_both_drfix}/metrics.jsonl
each contain one JSONL object per request (with ttft_s, tpot_s, latency_s).
"""

import argparse
import json
import statistics
from pathlib import Path


def pct(xs, p):
    if not xs:
        return None
    xs = sorted(xs)
    k = max(0, min(len(xs) - 1, int(p / 100.0 * (len(xs) - 1))))
    return xs[k]


def summarise(rows):
    ok = [r for r in rows if not r.get("error")]
    ttft = [r["ttft_s"] * 1000 for r in ok if r.get("ttft_s") is not None]
    tpot = [r["tpot_s"] * 1000 for r in ok if r.get("tpot_s")]
    e2e = [r["latency_s"] * 1000 for r in ok if r.get("latency_s") is not None]
    return {
        "n_total": len(rows),
        "n_ok": len(ok),
        "n_err": len(rows) - len(ok),
        "ttft_mean_ms": statistics.mean(ttft) if ttft else None,
        "ttft_p50_ms": pct(ttft, 50),
        "ttft_p90_ms": pct(ttft, 90),
        "ttft_p99_ms": pct(ttft, 99),
        "tpot_mean_ms": statistics.mean(tpot) if tpot else None,
        "tpot_p50_ms": pct(tpot, 50),
        "tpot_p90_ms": pct(tpot, 90),
        "tpot_p99_ms": pct(tpot, 99),
        "e2e_mean_ms": statistics.mean(e2e) if e2e else None,
        "e2e_p50_ms": pct(e2e, 50),
        "e2e_p90_ms": pct(e2e, 90),
        "e2e_p99_ms": pct(e2e, 99),
    }


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--root", type=Path, required=True)
    ap.add_argument("--configs", nargs="+",
                    default=["unified", "unified_kv_both", "unified_kv_both_drfix"])
    args = ap.parse_args()

    results = {}
    for cfg in args.configs:
        p = args.root / cfg / "metrics.jsonl"
        if not p.exists():
            print(f"MISSING: {p}")
            continue
        rows = [json.loads(l) for l in open(p) if l.strip()]
        results[cfg] = summarise(rows)

    def fmt(v):
        return f"{v:.0f}" if v is not None else "-"

    def pctd(a, b):
        if a is None or b is None or a == 0:
            return "-"
        return f"{(b/a-1)*100:+.1f}%"

    print(f"{'metric':<14}", end="")
    for cfg in args.configs:
        print(f"{cfg[:20]:>22}", end="")
    print()
    print("-" * (14 + 22 * len(args.configs)))

    for m in ["n_ok", "ttft_mean_ms", "ttft_p50_ms", "ttft_p90_ms", "ttft_p99_ms",
              "tpot_mean_ms", "tpot_p50_ms", "tpot_p90_ms", "tpot_p99_ms",
              "e2e_mean_ms",  "e2e_p50_ms",  "e2e_p90_ms",  "e2e_p99_ms"]:
        print(f"{m:<14}", end="")
        for cfg in args.configs:
            if cfg not in results:
                print(f"{'-':>22}", end="")
            else:
                print(f"{fmt(results[cfg][m]):>22}", end="")
        print()

    # Tax tables
    if len(args.configs) >= 2 and all(c in results for c in args.configs):
        plain = args.configs[0]
        for ref_cfg in args.configs[1:]:
            print(f"\n=== {ref_cfg} vs {plain} ===")
            for m in ["ttft_p50_ms", "ttft_p90_ms", "ttft_p99_ms",
                      "tpot_p50_ms", "tpot_p90_ms", "tpot_p99_ms",
                      "e2e_p50_ms",  "e2e_p90_ms",  "e2e_p99_ms"]:
                a = results[plain][m]; b = results[ref_cfg][m]
                print(f"  {m:<14} {fmt(a):>10} → {fmt(b):>10}  ({pctd(a, b)})")
        if "unified_kv_both" in results and "unified_kv_both_drfix" in results:
            print(f"\n=== DR-fix improvement: unified_kv_both_drfix vs unified_kv_both ===")
            for m in ["ttft_p50_ms", "ttft_p90_ms", "ttft_p99_ms",
                      "tpot_p50_ms", "tpot_p90_ms", "tpot_p99_ms",
                      "e2e_p50_ms",  "e2e_p90_ms",  "e2e_p99_ms"]:
                a = results["unified_kv_both"][m]
                b = results["unified_kv_both_drfix"][m]
                print(f"  {m:<14} {fmt(a):>10} → {fmt(b):>10}  ({pctd(a, b)})")

    # Save machine-readable
    with open(args.root / "trace_replay_summary.json", "w") as f:
        json.dump(results, f, indent=2)
    print(f"\nWrote {args.root}/trace_replay_summary.json")


if __name__ == "__main__":
    main()