"""Per-GPU utilization report from gpu_util.csv (companion to bench_report.py). bench_report's per-worker GPU util needs request routing (breakdown.json), which the MB5 proxy doesn't log. But worker == GPU by index, and the prefill/decode role split is fixed by config, so per-GPU util from gpu_util.csv directly answers "GPU utils by workers" — and for PD it exposes the key signal: are the prefill-side GPUs saturated while the decode-side idles (or vice versa, or stalled at ~0)? Usage: gpu_util_report.py [--prefill-gpus 0,1,2,3 --decode-gpus 4,5,6,7] """ from __future__ import annotations import argparse import csv import json import statistics from pathlib import Path def pct(xs, p): xs = sorted(xs) return xs[max(0, min(len(xs) - 1, int(round(p / 100 * (len(xs) - 1)))))] if xs else None def main(): ap = argparse.ArgumentParser() ap.add_argument("run_dir", type=Path) ap.add_argument("--prefill-gpus", default="") ap.add_argument("--decode-gpus", default="") a = ap.parse_args() win = {} wp = a.run_dir / "run_window.json" if wp.exists(): win = json.load(open(wp)) t0, t1 = win.get("t_start_unix"), win.get("t_end_unix") csvp = a.run_dir / "gpu_util.csv" if not csvp.exists(): print(f"{a.run_dir.name}: gpu_util.csv absent"); return by_gpu = {} for row in csv.DictReader(open(csvp)): try: ts = float(row["timestamp"]); g = int(row["gpu"]); u = float(row["util_pct"]); m = float(row["mem_used_mb"]) except (ValueError, KeyError): continue if t0 and not (t0 <= ts <= t1): continue by_gpu.setdefault(g, {"u": [], "m": []}) by_gpu[g]["u"].append(u); by_gpu[g]["m"].append(m) print(f"=== {a.run_dir.name}: per-GPU util over replay window ({sum(len(d['u']) for d in by_gpu.values())} samples) ===") print(f"{'gpu':>4}{'util_mean':>11}{'util_p90':>10}{'util_max':>10}{'mem_max_GB':>12}") for g in sorted(by_gpu): u, m = by_gpu[g]["u"], by_gpu[g]["m"] print(f"{g:>4}{statistics.fmean(u):>11.1f}{pct(u,90):>10.1f}{max(u):>10.1f}{max(m)/1024:>12.1f}") def agg(gpus, label): gpus = [int(x) for x in gpus.split(",") if x != ""] us = [v for g in gpus for v in by_gpu.get(g, {}).get("u", [])] if us: print(f" {label:<14} gpus={gpus} util mean={statistics.fmean(us):.1f}% p90={pct(us,90):.1f}% max={max(us):.1f}%") if a.prefill_gpus: agg(a.prefill_gpus, "prefill-side") if a.decode_gpus: agg(a.decode_gpus, "decode-side") if __name__ == "__main__": main()