"""Compare benchmark results between PD-combined and PD-separated modes. Reads summary JSON files and per-request metrics to produce a detailed comparison report including TTFT, TPOT, E2E, cache hit ratio, and throughput analysis. Usage: python scripts/compare_results.py \ --combined outputs/combined_1000req/metrics.summary.json \ --separated outputs/separated_1000req/metrics.summary.json """ from __future__ import annotations import argparse import json import sys from pathlib import Path def load_summary(path: Path) -> dict: return json.loads(path.read_text()) def load_metrics(path: Path) -> list[dict]: rows = [] with path.open() as fh: for line in fh: rows.append(json.loads(line)) return rows def fmt_stat(stat: dict | None, unit: str = "s") -> str: if stat is None: return "N/A" return (f"mean={stat['mean']:.3f}{unit} " f"p50={stat['p50']:.3f}{unit} " f"p90={stat['p90']:.3f}{unit} " f"p99={stat['p99']:.3f}{unit}") def compare(combined: dict, separated: dict) -> None: print("=" * 70) print("PD-Combined vs PD-Separated Performance Comparison") print("=" * 70) for label, s in [("PD-Combined", combined), ("PD-Separated", separated)]: print(f"\n--- {label} ---") print(f" Requests: {s['request_count']} (success: {s['success_count']}, errors: {s['error_count']})") print(f" Wall clock: {s.get('wall_clock_s', 0):.1f}s") print(f" TTFT: {fmt_stat(s.get('ttft_stats_s'))}") print(f" TPOT: {fmt_stat(s.get('tpot_stats_s'))}") print(f" E2E: {fmt_stat(s.get('latency_stats_s'))}") hit_ratio = s.get('prefix_cache_hit_ratio', 0) print(f" Prefix cache hit ratio: {hit_ratio*100:.1f}%") queries = s.get('prefix_cache_queries_tokens', 0) hits = s.get('prefix_cache_hits_tokens', 0) print(f" ({hits}/{queries} tokens)") print("\n--- Comparison (Separated vs Combined) ---") for metric_key, label in [ ("ttft_stats_s", "TTFT"), ("tpot_stats_s", "TPOT"), ("latency_stats_s", "E2E"), ]: c = combined.get(metric_key, {}) s = separated.get(metric_key, {}) if c and s: for pct in ["mean", "p50", "p90", "p99"]: cv, sv = c.get(pct, 0), s.get(pct, 0) if cv > 0: change = (sv - cv) / cv * 100 direction = "slower" if change > 0 else "faster" print(f" {label} {pct}: {abs(change):.1f}% {direction} " f"({cv:.3f}s → {sv:.3f}s)") c_ratio = combined.get("prefix_cache_hit_ratio", 0) s_ratio = separated.get("prefix_cache_hit_ratio", 0) print(f" Cache hit ratio: {c_ratio*100:.1f}% → {s_ratio*100:.1f}%") c_wall = combined.get("wall_clock_s", 1) s_wall = separated.get("wall_clock_s", 1) c_tput = combined["success_count"] / c_wall s_tput = separated["success_count"] / s_wall print(f" Throughput: {c_tput:.1f} → {s_tput:.1f} req/s " f"({(s_tput/c_tput - 1)*100:+.1f}%)") def main(): p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) p.add_argument("--combined", type=Path, required=True) p.add_argument("--separated", type=Path, required=True) args = p.parse_args() combined = load_summary(args.combined) separated = load_summary(args.separated) compare(combined, separated) if __name__ == "__main__": main()