"""Aggregate a set of MB5 run dirs into one comparison table. Pulls the three core metrics the analysis cares about, per run: - E2E latency (from replay_metrics.summary.json: latency_stats_s) - TPS (output tokens / wall_clock_s) - GPU util by workers (gpu_util.csv over run_window, split prefill/decode by role) plus honest reuse (producer-side APC from instance_apc.txt) and TTFT/TPOT for logs. Arm + GPU role split + producer APC ports are inferred from the dir name: *_colo_* -> 8 kv_both ; apc ports 8000-8007 (all keep prefix) *_pd6_* -> 6P+2D P0-5/D6-7 ; apc 8000-8005 *_pd_* -> 4P+4D P0-3/D4-7 ; apc 8000-8003 (note: "pd" not "pd4") *_pd2_* -> 2P+6D P0-1/D2-7 ; apc 8000-8001 Usage: fig_agg.py [ ...] """ from __future__ import annotations import csv import json import re import statistics import sys from pathlib import Path def arm_of(name: str): # New driver naming (run_conc.sh / run_reuse_fixed.sh): "...__rep". if "8C-proxy" in name: return "colo", list(range(8)), [], list(range(8000, 8008)) if "6P+2D" in name: return "6P+2D", [0, 1, 2, 3, 4, 5], [6, 7], list(range(8000, 8006)) if "2P+6D" in name: return "2P+6D", [0, 1], [2, 3, 4, 5, 6, 7], list(range(8000, 8002)) if "4P+4D" in name: return "4P+4D", [0, 1, 2, 3], [4, 5, 6, 7], list(range(8000, 8004)) # Legacy naming (original May-30 corrected runs). if "_colo_" in name or name.endswith("_colo"): return "colo", list(range(8)), [], list(range(8000, 8008)) if "_pd6_" in name: return "6P+2D", [0, 1, 2, 3, 4, 5], [6, 7], list(range(8000, 8006)) if "_pd2_" in name: return "2P+6D", [0, 1], [2, 3, 4, 5, 6, 7], list(range(8000, 8002)) if "_pd4_" in name or "_pd_" in name: return "4P+4D", [0, 1, 2, 3], [4, 5, 6, 7], list(range(8000, 8004)) return "?", list(range(8)), [], list(range(8000, 8008)) def util_split(run: Path, pgpus, dgpus): win = {} wp = run / "run_window.json" if wp.exists(): win = json.load(open(wp)) t0, t1 = win.get("t_start_unix"), win.get("t_end_unix") csvp = run / "gpu_util.csv" if not csvp.exists(): return None, None by = {} for row in csv.DictReader(open(csvp)): try: ts = float(row["timestamp"]); g = int(row["gpu"]); u = float(row["util_pct"]) except (ValueError, KeyError): continue if t0 and not (t0 <= ts <= t1): continue by.setdefault(g, []).append(u) pm = [v for g in pgpus for v in by.get(g, [])] dm = [v for g in dgpus for v in by.get(g, [])] return (statistics.fmean(pm) if pm else None, statistics.fmean(dm) if dm else None) def apc(run: Path, ports): f = run / "instance_apc.txt" if not f.exists(): return None q = h = 0 for line in open(f): m = dict(re.findall(r"(\w+)=(\S+)", line)) try: p = int(m.get("port", -1)) except ValueError: continue if p in ports: q += float(m.get("queries", 0)); h += float(m.get("hits", 0)) return (h / q) if q else None def main(): args = sys.argv[1:] as_json = False if "--json" in args: as_json = True args = [a for a in args if a != "--json"] rows = [] for d in args: run = Path(d) sp = run / "replay_metrics.summary.json" if not sp.exists(): continue s = json.load(open(sp)) arm, pg, dg, ports = arm_of(run.name) lat = s.get("latency_stats_s", {}) ttft = s.get("ttft_stats_s", {}) tpot = s.get("tpot_stats_s", {}) wall = s.get("wall_clock_s") or 1.0 out = s.get("actual_output_tokens_stats", {}) n = s.get("success_count", 0); req = s.get("request_count", 0) tot_out = out.get("count", 0) * out.get("mean", 0) tps = tot_out / wall pu, du = util_split(run, pg, dg) a = apc(run, ports) rows.append({ "name": run.name, "arm": arm, "n": n, "req": req, "e2e_p50": lat.get("p50"), "e2e_p90": lat.get("p90"), "e2e_p99": lat.get("p99"), "e2e_mean": lat.get("mean"), "ttft_p90": ttft.get("p90"), "tpot_p99": tpot.get("p99"), "tps": tps, "wall": wall, "pu": pu, "du": du, "apc": a, }) if as_json: print(json.dumps(rows)) return def f(x, w=7, p=1): return f"{x:>{w}.{p}f}" if isinstance(x, (int, float)) else f"{'-':>{w}}" hdr = (f"{'run':<34}{'arm':>7}{'ok/req':>9}{'E2Ep50':>8}{'E2Ep90':>8}{'E2Ep99':>8}" f"{'TPS':>8}{'Putil':>7}{'Dutil':>7}{'APC%':>7}{'TTFTp90':>9}{'TPOTp99ms':>10}") print(hdr); print("-" * len(hdr)) for r in sorted(rows, key=lambda r: r["name"]): print(f"{r['name']:<34}{r['arm']:>7}{str(r['n'])+'/'+str(r['req']):>9}" f"{f(r['e2e_p50'])}{f(r['e2e_p90'])}{f(r['e2e_p99'])}" f"{f(r['tps'],8,1)}{f(r['pu'])}{f(r['du'])}" f"{f((r['apc'] or 0)*100)}{f(r['ttft_p90'],9,2)}" f"{f((r['tpot_p99'] or 0)*1000,10,1)}") if __name__ == "__main__": main()