Files
agentic-kvc/scripts/legacy/analyze_h4_results.py
Gahow Wang 547611e022 scripts: archive obsolete one-off shell/python scripts to legacy/ (D2, D3)
D2: run_benchmark.sh and run_experiments.sh still pass --time-scale and
--max-inflight-sessions to the replayer, but those flags were removed when
the project moved to trace-driven dispatch. The scripts cannot run as-is.

D3: ~25 ad-hoc analyze_* / compare_* / profile_* / final_* scripts and a
handful of single-experiment run_*.sh point at /home/admin/cpfs paths,
deleted output directories, or a sampled trace file that no longer exists.
Keep them in scripts/legacy/ for historical reference; the scripts that
remain in scripts/ (analyze_trace, analyze_breakdown, analyze_cache_hit,
analyze_eviction, compare_results, compute_roofline, sample_trace,
analyze_agentic_patterns, simulate_cache_policies, plus launch_*.sh,
gpu_monitor.sh, bench.sh) cover the current workflow.

Adds scripts/legacy/README.md to document the archival policy.
2026-05-23 20:57:32 +08:00

97 lines
4.5 KiB
Python

"""Analyze H4 cache-aware gate experiment results."""
import json
import sys
from collections import Counter
outdir = sys.argv[1] if len(sys.argv) > 1 else "outputs/h4_cache_gate"
rows = [json.loads(l) for l in open(f"{outdir}/metrics.jsonl")]
ok = [r for r in rows if not r.get("error")]
fail = [r for r in rows if r.get("error")]
p = lambda v, q: sorted(v)[min(int(q * len(v)), len(v) - 1)] if v else 0
ttfts = sorted([r["ttft_s"] for r in ok if r.get("ttft_s")])
tpots = sorted([r["tpot_s"] for r in ok if r.get("tpot_s") and r["tpot_s"] > 0])
e2es = sorted([r["latency_s"] for r in ok])
print("=" * 70)
print("H4 Cache-Aware Offload Gate Results")
print("=" * 70)
print(f"OK={len(ok)}/{len(rows)} TTFT50={p(ttfts,.5):.3f} TTFT90={p(ttfts,.9):.3f} TPOT90={p(tpots,.9):.4f} E2E50={p(e2es,.5):.3f} E2E90={p(e2es,.9):.3f}")
# Per-class breakdown
for lo, hi, cl in [(0, 5000, "WARM"), (5000, 20000, "MED"), (20000, 200000, "HEAVY")]:
sub = [r for r in ok if lo <= r["input_length"] < hi and r.get("ttft_s")]
if sub:
t = sorted([r["ttft_s"] for r in sub])
tp = sorted([r["tpot_s"] for r in sub if r.get("tpot_s") and r["tpot_s"] > 0])
e = sorted([r["latency_s"] for r in sub])
print(f" {cl:6s} n={len(sub):3d} TTFT50={p(t,.5):.3f} TTFT90={p(t,.9):.3f} TPOT90={p(tp,.9):.4f} E2E50={p(e,.5):.3f} E2E90={p(e,.9):.3f}")
# Route distribution from breakdown
try:
bd = json.load(open(f"{outdir}/breakdown.json"))
rc = Counter(b.get("route_class", "") for b in bd)
print(f"\nRoute class distribution:")
for cls, cnt in sorted(rc.items()):
print(f" {cls}: {cnt}")
heavy = [b for b in bd if b.get("route_class", "").startswith("HEAVY")]
reasons = Counter(b.get("offload_reason", "") for b in heavy)
print(f"\nHEAVY offload reasons: {dict(reasons)}")
colo = [b for b in bd if b.get("route_class") == "HEAVY_COLO"]
offloaded = [b for b in bd if b.get("route_class") == "HEAVY_OFFLOAD"]
print(f"\nHEAVY_COLO (cold, no RDMA): {len(colo)}")
print(f"HEAVY_OFFLOAD (cached, RDMA): {len(offloaded)}")
# Cache ratio distribution for HEAVY
print("\nCache ratio distribution for HEAVY:")
for b in heavy:
cr = b.get("cache_ratio", b.get("cache_hit", 0) / max(b.get("input_length", 1), 1))
cls = b.get("route_class", "")
reason = b.get("offload_reason", "")
# Don't print individual ones, summarize
ratios = [b.get("cache_ratio", b.get("cache_hit", 0) / max(b.get("input_length", 1), 1)) for b in heavy]
if ratios:
ratios.sort()
print(f" min={min(ratios):.2f} p50={p(ratios,.5):.2f} mean={sum(ratios)/len(ratios):.2f} max={max(ratios):.2f}")
print(f" >=0.3 (would offload): {sum(1 for r in ratios if r >= 0.3)}")
print(f" <0.3 (stays colo): {sum(1 for r in ratios if r < 0.3)}")
# TTFT comparison: HEAVY_COLO timing
if colo:
colo_ttft = sorted([b["t_first_token"] - b["t_proxy_recv"] for b in colo if b.get("t_first_token")])
if colo_ttft:
print(f"\n HEAVY_COLO TTFT: p50={p(colo_ttft,.5):.2f}s p90={p(colo_ttft,.9):.2f}s")
if offloaded:
off_ttft = sorted([b["t_first_token"] - b["t_proxy_recv"] for b in offloaded if b.get("t_first_token")])
if off_ttft:
print(f" HEAVY_OFFLOAD TTFT: p50={p(off_ttft,.5):.2f}s p90={p(off_ttft,.9):.2f}s")
pf = [b["t_prefill_done"] - b["t_prefill_sent"] for b in offloaded if b.get("t_prefill_done") and b.get("t_prefill_sent")]
kv = [b["t_first_token"] - b["t_prefill_done"] for b in offloaded if b.get("t_first_token") and b.get("t_prefill_done")]
if pf:
pf.sort()
print(f" Offload prefill: p50={p(pf,.5):.2f}s p90={p(pf,.9):.2f}s")
if kv:
kv.sort()
print(f" Offload KV xfer: p50={p(kv,.5):.2f}s p90={p(kv,.9):.2f}s")
except Exception as e:
print(f"Breakdown analysis error: {e}")
if fail:
print(f"\nFailed requests ({len(fail)}):")
for r in fail[:5]:
print(f" input={r['input_length']} error={str(r['error'])[:80]}")
print()
print("=" * 70)
print("Comparison with all prior experiments")
print("=" * 70)
print("Baseline 8C plain: OK=198/200 TTFT50=1.075 TTFT90=9.384 TPOT90=0.0761 E2E50=5.075")
print("Phase0A 7C kv_both: OK=198/200 TTFT50=1.073 TPOT90=0.0738 E2E50=5.096")
print("V2 all-offload: OK=179/185 TTFT50=0.762 TPOT90=0.0746 E2E50=4.628")
print(f"H4 cache-aware gate: OK={len(ok)}/{len(rows)} TTFT50={p(ttfts,.5):.3f} TTFT90={p(ttfts,.9):.3f} TPOT90={p(tpots,.9):.4f} E2E50={p(e2es,.5):.3f}")