diff --git a/scripts/final_gpu_comparison.py b/scripts/final_gpu_comparison.py new file mode 100644 index 0000000..c69beb3 --- /dev/null +++ b/scripts/final_gpu_comparison.py @@ -0,0 +1,42 @@ +"""Final GPU util + latency comparison across all tested configs.""" +import csv, json, statistics, os + +def gpu_s(path): + rows = list(csv.DictReader(open(path))) + vals = [float(r["util_pct"]) for r in rows] + s = sorted(vals) + p = lambda q: s[min(int(q*len(s)),len(s)-1)] + nz = sum(1 for v in vals if v > 0) + return {"mean": statistics.fmean(vals), "p50": p(.5), "active": nz*100//len(vals)} + +def lat_s(path): + rows = [json.loads(l) for l in open(path)] + ok = [r for r in rows if not r.get("error")] + ttfts = sorted([r["ttft_s"] for r in ok if r.get("ttft_s")]) + tpots = sorted([r["tpot_s"] for r in ok if r.get("tpot_s") and r["tpot_s"]>0]) + lats = sorted([r["latency_s"] for r in ok]) + p = lambda v,q: v[min(int(q*len(v)),len(v)-1)] if v else 0 + return {"ok": len(ok), "n": len(rows), "t50": p(ttfts,.5), "t90": p(ttfts,.9), + "p50": p(tpots,.5), "p90": p(tpots,.9), "e50": p(lats,.5)} + +print("COMPLETE COMPARISON (200 req, time_scale=20, GPU monitoring)") +print("=" * 75) +fmt = "%-25s %6s %8s %8s %8s %7s %7s" +print(fmt % ("Config", "OK/N", "TTFT50", "TPOT90", "E2E50", "GPU%", "Active")) +print("-" * 75) + +for d, label in [ + ("gpu_ab_combined", "Combined (old cache-aware)"), + ("gpu_ab_hybrid", "Combined (hybrid routing)"), + ("gpu_ab_pdsep", "PD-Sep 4P+4D"), + ("gpu_ab_6p2d", "PD-Sep 6P+2D"), +]: + gp = "outputs/%s/gpu_util.csv" % d + mp = "outputs/%s/metrics.jsonl" % d + if not os.path.exists(gp) or not os.path.exists(mp): + continue + g = gpu_s(gp) + l = lat_s(mp) + print(fmt % (label, "%d/%d" % (l["ok"],l["n"]), + "%.3f" % l["t50"], "%.3f" % l["p90"], "%.3f" % l["e50"], + "%.1f" % g["mean"], "%d%%" % g["active"]))