Files
agentic-pd-hybrid/scripts/analysis/analyze_backpressure_smoke.py
kzlin 7affb565b2 feat(kvc): add backpressure smoke sweep + analyzer (and v6 p1 profile script)
scripts/sweep_backpressure_smoke.sh: 4-run smoke matrix (KVC baseline /
KVC + backpressure / KVC + backpressure @ time-scale=1 / DP @
time-scale=1) designed to fit ~3-4h GPU budget. Validates §3 backpressure
implementation and partially probes §7 time-scale distortion.

scripts/analysis/analyze_backpressure_smoke.py: consumes the new
structural/* jsonl files plus request-metrics; emits headline metrics,
backpressure histograms, admission probe stats, and per-session pinning
distribution.

scripts/sweep_tp1_v6_p1_profile.sh: pre-existing v6 P1 profile sweep
script (was untracked; included for completeness).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-06 21:29:56 +08:00

192 lines
7.1 KiB
Python
Executable File

#!/usr/bin/env python3
"""Analyze backpressure smoke sweep outputs.
For each run dir with a `request-metrics.jsonl` and the new `structural/`
subdir (admission-events.jsonl, backpressure-events.jsonl,
session-d-binding.jsonl), report:
- Headline (errors, latency, ttft, direct-to-D rate)
- Backpressure pause histogram (count, p50/p90 sleep, total pause time per D)
- Admission probe stats (RPC count, mean RTT, queue_depth distribution,
pause_ms distribution)
- Session pinning (distinct D per session, bimodal direct-to-D rate)
"""
from __future__ import annotations
import argparse
import json
import statistics
from collections import Counter, defaultdict
from pathlib import Path
def load_jsonl(path: Path) -> list[dict]:
if not path.exists():
return []
return [json.loads(l) for l in path.open("r", encoding="utf-8") if l.strip()]
def summarize_run(run_dir: Path) -> dict:
metrics_path = next(run_dir.rglob("request-metrics.jsonl"), None)
if metrics_path is None:
return {"run_dir": str(run_dir), "error": "no request-metrics.jsonl"}
summary_path = metrics_path.with_suffix(metrics_path.suffix + ".summary.json")
summary = (
json.load(summary_path.open()) if summary_path.exists() else {}
)
structural_dir = run_dir / "structural"
if not structural_dir.exists():
# try metrics dir's parent / structural
structural_dir = metrics_path.parent / "structural"
admission_events = load_jsonl(structural_dir / "admission-events.jsonl")
backpressure_events = load_jsonl(structural_dir / "backpressure-events.jsonl")
binding_events = load_jsonl(structural_dir / "session-d-binding.jsonl")
out: dict = {"run_dir": str(run_dir)}
# Headline metrics from summary.json
out["request_count"] = summary.get("request_count")
out["error_count"] = summary.get("error_count")
out["latency"] = summary.get("latency_stats_s")
out["ttft"] = summary.get("ttft_stats_s")
out["execution_modes"] = summary.get("execution_modes")
out["per_decode_load"] = summary.get("per_decode_load")
out["per_prefill_load"] = summary.get("per_prefill_load")
# Direct-to-D rate from execution_modes
em = summary.get("execution_modes", {}) or {}
direct = em.get("kvcache-direct-to-d-session", 0)
total = sum(em.values()) or 1
out["direct_to_d_rate"] = direct / total
# Session pinning
bind_per_session: dict[str, set[int]] = defaultdict(set)
for ev in binding_events:
bind_per_session[ev["session_id"]].add(ev["decode_worker_index"])
if bind_per_session:
out["session_count"] = len(bind_per_session)
out["avg_distinct_d_per_session"] = (
sum(len(v) for v in bind_per_session.values()) / len(bind_per_session)
)
else:
out["session_count"] = 0
out["avg_distinct_d_per_session"] = None
# Direct-to-D rate per session (bimodal check)
records = load_jsonl(metrics_path)
sess_records: dict[str, list[dict]] = defaultdict(list)
for r in records:
sess_records[r["session_id"]].append(r)
rates = []
for sid, turns in sess_records.items():
ndir = sum(
1 for t in turns if t.get("execution_mode") == "kvcache-direct-to-d-session"
)
rates.append(ndir / len(turns))
if rates:
buckets = [0, 0, 0, 0, 0]
for r in rates:
buckets[min(4, int(r * 5))] += 1
out["direct_to_d_rate_buckets"] = {
"0-20%": buckets[0],
"20-40%": buckets[1],
"40-60%": buckets[2],
"60-80%": buckets[3],
"80-100%": buckets[4],
}
# Backpressure events
if backpressure_events:
sleeps = [ev["sleep_s"] for ev in backpressure_events]
out["backpressure"] = {
"event_count": len(backpressure_events),
"total_sleep_s": round(sum(sleeps), 2),
"sleep_p50_s": round(statistics.median(sleeps), 4),
"sleep_p90_s": round(
sorted(sleeps)[int(len(sleeps) * 0.9)] if sleeps else 0, 4
),
"events_per_d": dict(
Counter(ev["server_url"] for ev in backpressure_events).most_common()
),
}
else:
out["backpressure"] = {"event_count": 0, "note": "no backpressure events"}
# Admission probe stats
if admission_events:
rtts = [ev["rtt_s"] for ev in admission_events]
depths = [ev.get("queue_depth", 0) for ev in admission_events]
pauses = [ev.get("recommended_pause_ms", 0) for ev in admission_events]
out["admission_probes"] = {
"count": len(admission_events),
"mean_rtt_s": round(sum(rtts) / len(rtts), 4),
"p99_rtt_s": round(sorted(rtts)[int(len(rtts) * 0.99)], 4),
"queue_depth_p50": int(statistics.median(depths)),
"queue_depth_p90": int(sorted(depths)[int(len(depths) * 0.9)]),
"queue_depth_max": max(depths),
"pause_ms_p50": int(statistics.median(pauses)),
"pause_ms_p90": int(sorted(pauses)[int(len(pauses) * 0.9)]),
"pause_ms_max": max(pauses),
"nonzero_pause_count": sum(1 for p in pauses if p > 0),
"by_reason": dict(
Counter(ev.get("reason") or "ok" for ev in admission_events).most_common()
),
}
return out
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("sweep_root", type=Path)
ap.add_argument("--json", action="store_true", help="emit JSON only")
args = ap.parse_args()
summaries = []
for run_dir in sorted(args.sweep_root.iterdir()):
if not run_dir.is_dir():
continue
summary = summarize_run(run_dir)
summaries.append(summary)
if args.json:
print(json.dumps(summaries, indent=2))
return
for s in summaries:
print(f"\n{'=' * 70}")
print(f" {s['run_dir']}")
print(f"{'=' * 70}")
if "error" in s:
print(f" ERROR: {s['error']}")
continue
print(f" reqs={s.get('request_count')} errors={s.get('error_count')}")
if s.get("latency"):
lt = s["latency"]
print(
f" latency: mean={lt.get('mean'):.3f} "
f"p50={lt.get('p50'):.3f} p90={lt.get('p90'):.3f} p99={lt.get('p99'):.3f}"
)
if s.get("ttft"):
tt = s["ttft"]
print(
f" ttft: mean={tt.get('mean'):.3f} "
f"p50={tt.get('p50'):.3f} p90={tt.get('p90'):.3f}"
)
print(f" direct_to_d_rate: {s.get('direct_to_d_rate', 0) * 100:.1f}%")
print(f" sessions: {s.get('session_count')} | "
f"avg distinct-D-per-session: {s.get('avg_distinct_d_per_session')}")
if s.get("direct_to_d_rate_buckets"):
print(f" direct-to-D distribution by session: {s['direct_to_d_rate_buckets']}")
if s.get("backpressure"):
print(f" backpressure: {s['backpressure']}")
if s.get("admission_probes"):
print(f" admission probes: {s['admission_probes']}")
if __name__ == "__main__":
main()