agentic-pd-hybrid/scripts/analysis/analyze_backpressure_smoke.py

#!/usr/bin/env python3
"""Analyze backpressure smoke sweep outputs.

For each run dir with a `request-metrics.jsonl` and the new `structural/`
subdir (admission-events.jsonl, backpressure-events.jsonl,
session-d-binding.jsonl), report:

- Headline (errors, latency, ttft, direct-to-D rate)
- Backpressure pause histogram (count, p50/p90 sleep, total pause time per D)
- Admission probe stats (RPC count, mean RTT, queue_depth distribution,
  pause_ms distribution)
- Session pinning (distinct D per session, bimodal direct-to-D rate)
"""
from __future__ import annotations

import argparse
import json
import statistics
from collections import Counter, defaultdict
from pathlib import Path


def load_jsonl(path: Path) -> list[dict]:
    if not path.exists():
        return []
    return [json.loads(l) for l in path.open("r", encoding="utf-8") if l.strip()]


def summarize_run(run_dir: Path) -> dict:
    metrics_path = next(run_dir.rglob("request-metrics.jsonl"), None)
    if metrics_path is None:
        return {"run_dir": str(run_dir), "error": "no request-metrics.jsonl"}

    summary_path = metrics_path.with_suffix(metrics_path.suffix + ".summary.json")
    summary = (
        json.load(summary_path.open()) if summary_path.exists() else {}
    )

    structural_dir = run_dir / "structural"
    if not structural_dir.exists():
        # try metrics dir's parent / structural
        structural_dir = metrics_path.parent / "structural"

    admission_events = load_jsonl(structural_dir / "admission-events.jsonl")
    backpressure_events = load_jsonl(structural_dir / "backpressure-events.jsonl")
    binding_events = load_jsonl(structural_dir / "session-d-binding.jsonl")

    out: dict = {"run_dir": str(run_dir)}

    # Headline metrics from summary.json
    out["request_count"] = summary.get("request_count")
    out["error_count"] = summary.get("error_count")
    out["latency"] = summary.get("latency_stats_s")
    out["ttft"] = summary.get("ttft_stats_s")
    out["execution_modes"] = summary.get("execution_modes")
    out["per_decode_load"] = summary.get("per_decode_load")
    out["per_prefill_load"] = summary.get("per_prefill_load")

    # Direct-to-D rate from execution_modes
    em = summary.get("execution_modes", {}) or {}
    direct = em.get("kvcache-direct-to-d-session", 0)
    total = sum(em.values()) or 1
    out["direct_to_d_rate"] = direct / total

    # Session pinning
    bind_per_session: dict[str, set[int]] = defaultdict(set)
    for ev in binding_events:
        bind_per_session[ev["session_id"]].add(ev["decode_worker_index"])
    if bind_per_session:
        out["session_count"] = len(bind_per_session)
        out["avg_distinct_d_per_session"] = (
            sum(len(v) for v in bind_per_session.values()) / len(bind_per_session)
        )
    else:
        out["session_count"] = 0
        out["avg_distinct_d_per_session"] = None

    # Direct-to-D rate per session (bimodal check)
    records = load_jsonl(metrics_path)
    sess_records: dict[str, list[dict]] = defaultdict(list)
    for r in records:
        sess_records[r["session_id"]].append(r)
    rates = []
    for sid, turns in sess_records.items():
        ndir = sum(
            1 for t in turns if t.get("execution_mode") == "kvcache-direct-to-d-session"
        )
        rates.append(ndir / len(turns))
    if rates:
        buckets = [0, 0, 0, 0, 0]
        for r in rates:
            buckets[min(4, int(r * 5))] += 1
        out["direct_to_d_rate_buckets"] = {
            "0-20%": buckets[0],
            "20-40%": buckets[1],
            "40-60%": buckets[2],
            "60-80%": buckets[3],
            "80-100%": buckets[4],
        }

    # Backpressure events
    if backpressure_events:
        sleeps = [ev["sleep_s"] for ev in backpressure_events]
        out["backpressure"] = {
            "event_count": len(backpressure_events),
            "total_sleep_s": round(sum(sleeps), 2),
            "sleep_p50_s": round(statistics.median(sleeps), 4),
            "sleep_p90_s": round(
                sorted(sleeps)[int(len(sleeps) * 0.9)] if sleeps else 0, 4
            ),
            "events_per_d": dict(
                Counter(ev["server_url"] for ev in backpressure_events).most_common()
            ),
        }
    else:
        out["backpressure"] = {"event_count": 0, "note": "no backpressure events"}

    # Admission probe stats
    if admission_events:
        rtts = [ev["rtt_s"] for ev in admission_events]
        depths = [ev.get("queue_depth", 0) for ev in admission_events]
        pauses = [ev.get("recommended_pause_ms", 0) for ev in admission_events]
        out["admission_probes"] = {
            "count": len(admission_events),
            "mean_rtt_s": round(sum(rtts) / len(rtts), 4),
            "p99_rtt_s": round(sorted(rtts)[int(len(rtts) * 0.99)], 4),
            "queue_depth_p50": int(statistics.median(depths)),
            "queue_depth_p90": int(sorted(depths)[int(len(depths) * 0.9)]),
            "queue_depth_max": max(depths),
            "pause_ms_p50": int(statistics.median(pauses)),
            "pause_ms_p90": int(sorted(pauses)[int(len(pauses) * 0.9)]),
            "pause_ms_max": max(pauses),
            "nonzero_pause_count": sum(1 for p in pauses if p > 0),
            "by_reason": dict(
                Counter(ev.get("reason") or "ok" for ev in admission_events).most_common()
            ),
        }

    return out


def main() -> None:
    ap = argparse.ArgumentParser()
    ap.add_argument("sweep_root", type=Path)
    ap.add_argument("--json", action="store_true", help="emit JSON only")
    args = ap.parse_args()

    summaries = []
    for run_dir in sorted(args.sweep_root.iterdir()):
        if not run_dir.is_dir():
            continue
        summary = summarize_run(run_dir)
        summaries.append(summary)

    if args.json:
        print(json.dumps(summaries, indent=2))
        return

    for s in summaries:
        print(f"\n{'=' * 70}")
        print(f"  {s['run_dir']}")
        print(f"{'=' * 70}")
        if "error" in s:
            print(f"  ERROR: {s['error']}")
            continue
        print(f"  reqs={s.get('request_count')} errors={s.get('error_count')}")
        if s.get("latency"):
            lt = s["latency"]
            print(
                f"  latency: mean={lt.get('mean'):.3f} "
                f"p50={lt.get('p50'):.3f} p90={lt.get('p90'):.3f} p99={lt.get('p99'):.3f}"
            )
        if s.get("ttft"):
            tt = s["ttft"]
            print(
                f"  ttft:    mean={tt.get('mean'):.3f} "
                f"p50={tt.get('p50'):.3f} p90={tt.get('p90'):.3f}"
            )
        print(f"  direct_to_d_rate: {s.get('direct_to_d_rate', 0) * 100:.1f}%")
        print(f"  sessions: {s.get('session_count')} | "
              f"avg distinct-D-per-session: {s.get('avg_distinct_d_per_session')}")
        if s.get("direct_to_d_rate_buckets"):
            print(f"  direct-to-D distribution by session: {s['direct_to_d_rate_buckets']}")
        if s.get("backpressure"):
            print(f"  backpressure: {s['backpressure']}")
        if s.get("admission_probes"):
            print(f"  admission probes: {s['admission_probes']}")


if __name__ == "__main__":
    main()