Files
agentic-kvc/scripts/legacy/analyze_h5_rdma.py
Gahow Wang 547611e022 scripts: archive obsolete one-off shell/python scripts to legacy/ (D2, D3)
D2: run_benchmark.sh and run_experiments.sh still pass --time-scale and
--max-inflight-sessions to the replayer, but those flags were removed when
the project moved to trace-driven dispatch. The scripts cannot run as-is.

D3: ~25 ad-hoc analyze_* / compare_* / profile_* / final_* scripts and a
handful of single-experiment run_*.sh point at /home/admin/cpfs paths,
deleted output directories, or a sampled trace file that no longer exists.
Keep them in scripts/legacy/ for historical reference; the scripts that
remain in scripts/ (analyze_trace, analyze_breakdown, analyze_cache_hit,
analyze_eviction, compare_results, compute_roofline, sample_trace,
analyze_agentic_patterns, simulate_cache_policies, plus launch_*.sh,
gpu_monitor.sh, bench.sh) cover the current workflow.

Adds scripts/legacy/README.md to document the archival policy.
2026-05-23 20:57:32 +08:00

61 lines
2.6 KiB
Python

"""H5: RDMA transfer breakdown analysis from V2 offload data."""
import json
import statistics
import sys
bd_path = sys.argv[1] if len(sys.argv) > 1 else "outputs/v2_offload/breakdown.json"
bd = json.load(open(bd_path))
offloaded = [b for b in bd if b.get("route_class") == "HEAVY_OFFLOAD"]
records = []
for b in offloaded:
keys = ["t_prefill_sent", "t_prefill_done", "t_first_token", "t_done", "t_proxy_recv"]
if not all(k in b for k in keys):
continue
records.append({
"il": b["input_length"],
"ch": b.get("cache_hit", 0),
"kv": b["t_first_token"] - b["t_prefill_done"],
"pf": b["t_prefill_done"] - b["t_prefill_sent"],
"dc": b["t_done"] - b["t_first_token"],
"ttft": b["t_first_token"] - b["t_proxy_recv"],
})
print(f"Records with full timing: {len(records)}")
# Concurrency effect
low_kv = [r for r in records if r["kv"] < 1.5]
high_kv = [r for r in records if r["kv"] >= 1.5]
print("\n=== Concurrency Effect on KV Transfer ===")
if low_kv:
print(f" Low KV (<1.5s): n={len(low_kv)} mean_input={statistics.mean([r['il'] for r in low_kv])/1000:.0f}k")
if high_kv:
print(f" High KV (>=1.5s): n={len(high_kv)} mean_input={statistics.mean([r['il'] for r in high_kv])/1000:.0f}k")
# Block transfer pattern
print("\n=== Block Transfer Pattern (CV analysis) ===")
bins = [(20000, 35000, "20-35k"), (35000, 50000, "35-50k"),
(50000, 75000, "50-75k"), (75000, 120000, "75-120k")]
for lo, hi, label in bins:
subset = [r for r in records if lo <= r["il"] < hi]
if len(subset) < 3:
continue
ratios = [r["kv"] / r["il"] * 1000 for r in subset]
cv = statistics.stdev(ratios) / statistics.mean(ratios) if statistics.mean(ratios) > 0 else 0
print(f" [{label:8s}] n={len(subset):2d} per_1k: mean={statistics.mean(ratios):.4f}s CV={cv:.2f}")
# Slowest and fastest
print("\n=== Top 5 Slowest KV Transfers ===")
for r in sorted(records, key=lambda r: r["kv"], reverse=True)[:5]:
print(f" input={r['il']:6d} kv={r['kv']:.2f}s prefill={r['pf']:.1f}s per1k={r['kv']/r['il']*1000:.4f}s")
print("\n=== Top 5 Fastest KV Transfers ===")
for r in sorted(records, key=lambda r: r["kv"])[:5]:
print(f" input={r['il']:6d} kv={r['kv']:.3f}s per1k={r['kv']/r['il']*1000:.4f}s")
print("\n=== Summary ===")
print(" R^2=0.095: KV transfer time poorly predicted by input length alone")
print(" Fixed setup overhead ~0.08s (negligible, ~3% of median KV time)")
print(" High per-1k CV (0.5-1.3) suggests variable contention, not stepwise block transfer")
print(" Mooncake likely does batched block transfer (smooth, not per-block)")