Files
agentic-kvc/scripts/analyze_breakdown.py
Gahow Wang 645b067dd4 Fix review bugs: PD-sep counter leaks, hardcoded paths, missing deps
Critical:
- cache_aware_proxy: _handle_pd_sep leaked p_inst.num_requests (never
  decremented) and never managed d_inst.num_requests; fix media_type
  from application/json to text/event-stream for SSE stream

High:
- b3_sweep/b3_isolated_policy/b3_analyze: replace hardcoded
  /home/admin/cpfs/wjh/ ROOT with script-relative $(dirname "$0")/..
- b3_analyze: replace hardcoded 8-port WORKER_MAP with dynamic
  generation from BASE_PORT and N_INSTANCES

Medium:
- analyze_breakdown: warn on stderr when records are skipped (was silent)
- deploy_vllm_patches: fail-fast on SSH/SCP errors instead of
  continuing with empty VENV_SITE
- pyproject.toml: declare fastapi and uvicorn as runtime dependencies
- launch_elastic_p2p: kill EngineCore and proxy in trap handler to
  prevent GPU memory leaks on exit
2026-05-26 15:54:55 +08:00

60 lines
2.0 KiB
Python

"""Analyze per-request breakdown data from the proxy."""
import json, statistics, sys
url = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:9090/breakdown"
if url.startswith("http"):
import urllib.request
data = json.loads(urllib.request.urlopen(url, timeout=10).read())
else:
data = json.load(open(url))
print("Total records: %d" % len(data))
results = []
skipped = 0
for d in data:
keys = ["t_proxy_recv", "t_prefill_sent", "t_prefill_done", "t_decode_sent", "t_first_token"]
if not all(k in d for k in keys):
skipped += 1
continue
results.append({
"input": d["input_length"],
"prefill": d["t_prefill_done"] - d["t_prefill_sent"],
"proxy_gap": d["t_decode_sent"] - d["t_prefill_done"],
"kv_decode": d["t_first_token"] - d["t_decode_sent"],
"ttft": d["t_first_token"] - d["t_proxy_recv"],
})
results.sort(key=lambda x: x["input"])
print("Complete breakdown: %d" % len(results))
if skipped:
print("WARNING: %d records skipped (missing breakdown timestamps)" % skipped,
file=sys.stderr)
if not results:
print("No complete records yet")
sys.exit(0)
print()
print(" %8s %9s %9s %9s %9s" % ("input", "prefill", "proxy", "kv+dec", "TTFT"))
print(" %8s %9s %9s %9s %9s" % ("-----", "-------", "-----", "------", "----"))
for r in results[:25]:
print(" %8d %9.3f %9.3f %9.3f %9.3f" % (
r["input"], r["prefill"], r["proxy_gap"], r["kv_decode"], r["ttft"]))
print()
for key in ["prefill", "proxy_gap", "kv_decode", "ttft"]:
vals = sorted([r[key] for r in results])
p = lambda q: vals[min(int(q * len(vals)), len(vals) - 1)]
print(" %s: p50=%.3fs p90=%.3fs mean=%.3fs" % (
key, p(.5), p(.9), statistics.fmean(vals)))
# Fraction of TTFT by stage
print()
print(" TTFT breakdown (fraction of total):")
for key in ["prefill", "proxy_gap", "kv_decode"]:
fracs = [r[key] / r["ttft"] * 100 for r in results if r["ttft"] > 0.01]
if fracs:
print(" %s: mean=%.1f%% of TTFT" % (key, statistics.fmean(fracs)))