agentic-kvc/scripts/analyze_breakdown.py

"""Analyze per-request breakdown data from the proxy."""
import json, statistics, sys

url = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:9090/breakdown"

if url.startswith("http"):
    import urllib.request
    data = json.loads(urllib.request.urlopen(url, timeout=10).read())
else:
    data = json.load(open(url))

print("Total records: %d" % len(data))

results = []
for d in data:
    keys = ["t_proxy_recv", "t_prefill_sent", "t_prefill_done", "t_decode_sent", "t_first_token"]
    if not all(k in d for k in keys):
        continue
    results.append({
        "input": d["input_length"],
        "prefill": d["t_prefill_done"] - d["t_prefill_sent"],
        "proxy_gap": d["t_decode_sent"] - d["t_prefill_done"],
        "kv_decode": d["t_first_token"] - d["t_decode_sent"],
        "ttft": d["t_first_token"] - d["t_proxy_recv"],
    })

results.sort(key=lambda x: x["input"])
print("Complete breakdown: %d" % len(results))

if not results:
    print("No complete records yet")
    sys.exit(0)

print()
print("  %8s %9s %9s %9s %9s" % ("input", "prefill", "proxy", "kv+dec", "TTFT"))
print("  %8s %9s %9s %9s %9s" % ("-----", "-------", "-----", "------", "----"))
for r in results[:25]:
    print("  %8d %9.3f %9.3f %9.3f %9.3f" % (
        r["input"], r["prefill"], r["proxy_gap"], r["kv_decode"], r["ttft"]))

print()
for key in ["prefill", "proxy_gap", "kv_decode", "ttft"]:
    vals = sorted([r[key] for r in results])
    p = lambda q: vals[min(int(q * len(vals)), len(vals) - 1)]
    print("  %s: p50=%.3fs p90=%.3fs mean=%.3fs" % (
        key, p(.5), p(.9), statistics.fmean(vals)))

# Fraction of TTFT by stage
print()
print("  TTFT breakdown (fraction of total):")
for key in ["prefill", "proxy_gap", "kv_decode"]:
    fracs = [r[key] / r["ttft"] * 100 for r in results if r["ttft"] > 0.01]
    if fracs:
        print("    %s: mean=%.1f%% of TTFT" % (key, statistics.fmean(fracs)))