agentic-kvc/analysis/characterization/b2_sweep_analysis.py

"""Aggregate B2 microbench cells: same- vs different-worker prefill overlap.

For each (variant × prefill_size) cell we have:
- 240 short-prompt decode requests at qps=4
- 4 large-prompt one-token "prefill injections"

The interesting question is *not* "does any other request's prefill overlap
this decode" (the answer is always yes — every decode begins with its own
short prefill, and at qps=4 they overlap each other constantly). The
interesting question is "does an injected large prefill on the *same* worker
materially slow this decode down?".

So we:
1) extract each cell's injection windows = [(t_dispatch, t_finish)
   for r in metrics if r.workload=="prefill"];
2) label each decode request as overlap iff its
   [t_first_token, t_finish] intersects at least one injection window;
3) compute TPOT p50/p90/p99 for overlap vs clean;
4) the per-cell interference index = TPOT_p90(overlap) /
   TPOT_p90(clean). For "different" variant this should hover near 1.0;
   for "same" it should rise with prefill_size.
"""

from __future__ import annotations

import argparse
import json
from collections import defaultdict
from pathlib import Path

from analysis.characterization.joined_analysis import (
    _percentile,
    load_jsonl,
    write_json,
)


def _overlaps(a_start: float, a_end: float, b_start: float, b_end: float) -> bool:
    return a_start <= b_end and b_start <= a_end


def _analyze_cell(metrics_rows: list[dict]) -> dict:
    prefills = [r for r in metrics_rows if r.get("workload") == "prefill"
                 and r.get("error") is None]
    decodes = [r for r in metrics_rows if r.get("workload") == "decode"
                and r.get("error") is None]

    inj_windows: list[tuple[float, float]] = []
    for p in prefills:
        ts = p.get("t_dispatch_unix")
        te = p.get("t_finish_unix")
        if ts is None or te is None:
            continue
        inj_windows.append((float(ts), float(te)))

    overlap_tpots: list[float] = []
    clean_tpots: list[float] = []
    overlap_ttfts: list[float] = []
    clean_ttfts: list[float] = []
    for d in decodes:
        ts = d.get("t_dispatch_unix")
        te = d.get("t_finish_unix")
        if ts is None or te is None:
            continue
        is_overlap = any(_overlaps(ts, te, ws, we) for ws, we in inj_windows)
        tpot = d.get("tpot_s")
        ttft = d.get("ttft_s")
        if tpot is not None:
            (overlap_tpots if is_overlap else clean_tpots).append(float(tpot))
        if ttft is not None:
            (overlap_ttfts if is_overlap else clean_ttfts).append(float(ttft))

    p90_overlap = _percentile(overlap_tpots, 0.90) if overlap_tpots else None
    p90_clean = _percentile(clean_tpots, 0.90) if clean_tpots else None
    idx = (p90_overlap / p90_clean) if (p90_overlap and p90_clean) else None
    return {
        "n_prefill_injections": len(prefills),
        "n_decode_total": len(decodes),
        "n_decode_overlap": len(overlap_tpots),
        "n_decode_clean": len(clean_tpots),
        "tpot_p50_overlap_s": _percentile(overlap_tpots, 0.50),
        "tpot_p90_overlap_s": p90_overlap,
        "tpot_p99_overlap_s": _percentile(overlap_tpots, 0.99),
        "tpot_p50_clean_s": _percentile(clean_tpots, 0.50),
        "tpot_p90_clean_s": p90_clean,
        "tpot_p99_clean_s": _percentile(clean_tpots, 0.99),
        "ttft_p90_overlap_s": _percentile(overlap_ttfts, 0.90)
                                if overlap_ttfts else None,
        "ttft_p90_clean_s": _percentile(clean_ttfts, 0.90)
                                if clean_ttfts else None,
        "interference_index": idx,
    }


def main() -> None:
    p = argparse.ArgumentParser(description="B2 sweep aggregation (window-overlap)")
    p.add_argument("--sweep-dir", type=Path, required=True)
    p.add_argument("--out", type=Path, default=None)
    args = p.parse_args()

    rows: list[dict] = []
    for variant_dir in sorted(args.sweep_dir.glob("*/")):
        if variant_dir.name in ("logs",):
            continue
        for cell_dir in sorted(variant_dir.glob("p*/")):
            window_path = cell_dir / "run_window.json"
            metrics_path = cell_dir / "metrics.jsonl"
            if not window_path.exists() or not metrics_path.exists():
                continue
            window = json.loads(window_path.read_text())
            metrics_rows = load_jsonl(metrics_path)
            cell = _analyze_cell(metrics_rows)
            rows.append({
                "variant": variant_dir.name,
                "prefill_size": int(window["prefill_size"]),
                "decode_endpoint": window["decode_endpoint"],
                "prefill_endpoint": window["prefill_endpoint"],
                **cell,
            })
    out_path = args.out or args.sweep_dir / "b2_sweep_summary.json"
    write_json(out_path, {"rows": rows})
    print(json.dumps(rows, indent=2))


if __name__ == "__main__":
    main()