agentic-kvc/microbench/fresh_setup/gpu_util_report.py

"""Per-GPU utilization report from gpu_util.csv (companion to bench_report.py).

bench_report's per-worker GPU util needs request routing (breakdown.json), which
the MB5 proxy doesn't log. But worker == GPU by index, and the prefill/decode role
split is fixed by config, so per-GPU util from gpu_util.csv directly answers
"GPU utils by workers" — and for PD it exposes the key signal: are the prefill-side
GPUs saturated while the decode-side idles (or vice versa, or stalled at ~0)?

Usage:
  gpu_util_report.py <run_dir> [--prefill-gpus 0,1,2,3 --decode-gpus 4,5,6,7]
"""
from __future__ import annotations

import argparse
import csv
import json
import statistics
from pathlib import Path


def pct(xs, p):
    xs = sorted(xs)
    return xs[max(0, min(len(xs) - 1, int(round(p / 100 * (len(xs) - 1)))))] if xs else None


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("run_dir", type=Path)
    ap.add_argument("--prefill-gpus", default="")
    ap.add_argument("--decode-gpus", default="")
    a = ap.parse_args()

    win = {}
    wp = a.run_dir / "run_window.json"
    if wp.exists():
        win = json.load(open(wp))
    t0, t1 = win.get("t_start_unix"), win.get("t_end_unix")

    csvp = a.run_dir / "gpu_util.csv"
    if not csvp.exists():
        print(f"{a.run_dir.name}: gpu_util.csv absent"); return
    by_gpu = {}
    for row in csv.DictReader(open(csvp)):
        try:
            ts = float(row["timestamp"]); g = int(row["gpu"]); u = float(row["util_pct"]); m = float(row["mem_used_mb"])
        except (ValueError, KeyError):
            continue
        if t0 and not (t0 <= ts <= t1):
            continue
        by_gpu.setdefault(g, {"u": [], "m": []})
        by_gpu[g]["u"].append(u); by_gpu[g]["m"].append(m)

    print(f"=== {a.run_dir.name}: per-GPU util over replay window ({sum(len(d['u']) for d in by_gpu.values())} samples) ===")
    print(f"{'gpu':>4}{'util_mean':>11}{'util_p90':>10}{'util_max':>10}{'mem_max_GB':>12}")
    for g in sorted(by_gpu):
        u, m = by_gpu[g]["u"], by_gpu[g]["m"]
        print(f"{g:>4}{statistics.fmean(u):>11.1f}{pct(u,90):>10.1f}{max(u):>10.1f}{max(m)/1024:>12.1f}")

    def agg(gpus, label):
        gpus = [int(x) for x in gpus.split(",") if x != ""]
        us = [v for g in gpus for v in by_gpu.get(g, {}).get("u", [])]
        if us:
            print(f"  {label:<14} gpus={gpus} util mean={statistics.fmean(us):.1f}% p90={pct(us,90):.1f}% max={max(us):.1f}%")
    if a.prefill_gpus:
        agg(a.prefill_gpus, "prefill-side")
    if a.decode_gpus:
        agg(a.decode_gpus, "decode-side")


if __name__ == "__main__":
    main()