agentic-kvc/microbench/fresh_setup/partial_summary.py

"""Compute a per-run summary directly from replay_metrics.jsonl (for partial / in-flight runs).

Used when the replayer hasn't completed (so replay_metrics.summary.json doesn't exist
yet) but enough records have streamed to disk to read out the per-arm result.

Also accepts a finished run's directory and prints the same one-line summary for
apples-to-apples comparison.
"""
from __future__ import annotations

import json
import re
import statistics
import sys
from pathlib import Path


def stats(xs):
    xs = sorted(xs)
    n = len(xs)
    if n == 0:
        return None
    return {
        "n": n,
        "mean": statistics.fmean(xs),
        "p50": xs[n // 2],
        "p90": xs[int(0.9 * (n - 1))],
        "p99": xs[int(0.99 * (n - 1))],
    }


def apc(run: Path, producer_ports):
    f = run / "instance_apc.txt"
    if not f.exists():
        return None
    q = h = 0.0
    for line in open(f):
        m = dict(re.findall(r"(\w+)=(\S+)", line))
        try:
            p = int(m.get("port", -1))
        except ValueError:
            continue
        if p in producer_ports:
            q += float(m.get("queries", 0))
            h += float(m.get("hits", 0))
    return (h / q) if q else None


def main():
    for d in sys.argv[1:]:
        run = Path(d)
        # prefer the live replay_metrics.jsonl (so partials work); fall back to metrics.jsonl
        for fn in ("replay_metrics.partial.jsonl", "replay_metrics.jsonl", "metrics.jsonl"):
            p = run / fn
            if p.exists():
                rec_path = p
                break
        else:
            print(f"{run.name}: no records"); continue
        recs = [json.loads(l) for l in open(rec_path)]
        oks = [r for r in recs if r.get("error") is None]
        lat = stats([r["latency_s"] for r in oks if "latency_s" in r])
        ttft = stats([r["ttft_s"] for r in oks if "ttft_s" in r])
        tpot = stats([r["tpot_s"] for r in oks if "tpot_s" in r])
        out = sum(r.get("actual_output_tokens", r.get("output_length", 0)) for r in oks)
        ts = [r["t_dispatch_unix"] for r in oks if "t_dispatch_unix" in r]
        tf = [r["t_finish_unix"] for r in oks if "t_finish_unix" in r]
        span = max(tf) - min(ts) if ts and tf else 0
        tps = out / span if span else 0

        # producer ports by arm tag in dirname
        n = run.name
        if "_colo_" in n:
            ports = list(range(8000, 8008))
        elif "_pd6_" in n:
            ports = list(range(8000, 8006))
        elif "_pd2_" in n:
            ports = list(range(8000, 8002))
        else:
            ports = list(range(8000, 8004))
        a = apc(run, ports)

        print(f"{run.name}")
        print(f"  n_ok={len(oks)}/{len(recs)}"
              + (f"  (target=1214 -> {len(oks)*100/1214:.1f}%)" if len(recs) < 1214 else ""))
        if lat:
            print(f"  E2E   mean={lat['mean']:.2f}  p50={lat['p50']:.2f}  p90={lat['p90']:.2f}  p99={lat['p99']:.2f}")
        if ttft:
            print(f"  TTFT  mean={ttft['mean']:.2f}  p50={ttft['p50']:.2f}  p90={ttft['p90']:.2f}  p99={ttft['p99']:.2f}")
        if tpot:
            print(f"  TPOT  mean={tpot['mean']*1000:.1f}ms  p90={tpot['p90']*1000:.1f}ms  p99={tpot['p99']*1000:.1f}ms")
        print(f"  output_tokens={out:.0f}  span={span:.0f}s  TPS={tps:.0f}")
        if a is not None:
            print(f"  producer APC={a*100:.1f}%")


if __name__ == "__main__":
    main()