scripts/sweep_backpressure_smoke.sh: 4-run smoke matrix (KVC baseline / KVC + backpressure / KVC + backpressure @ time-scale=1 / DP @ time-scale=1) designed to fit ~3-4h GPU budget. Validates §3 backpressure implementation and partially probes §7 time-scale distortion. scripts/analysis/analyze_backpressure_smoke.py: consumes the new structural/* jsonl files plus request-metrics; emits headline metrics, backpressure histograms, admission probe stats, and per-session pinning distribution. scripts/sweep_tp1_v6_p1_profile.sh: pre-existing v6 P1 profile sweep script (was untracked; included for completeness). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
192 lines
7.1 KiB
Python
Executable File
192 lines
7.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Analyze backpressure smoke sweep outputs.
|
|
|
|
For each run dir with a `request-metrics.jsonl` and the new `structural/`
|
|
subdir (admission-events.jsonl, backpressure-events.jsonl,
|
|
session-d-binding.jsonl), report:
|
|
|
|
- Headline (errors, latency, ttft, direct-to-D rate)
|
|
- Backpressure pause histogram (count, p50/p90 sleep, total pause time per D)
|
|
- Admission probe stats (RPC count, mean RTT, queue_depth distribution,
|
|
pause_ms distribution)
|
|
- Session pinning (distinct D per session, bimodal direct-to-D rate)
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import statistics
|
|
from collections import Counter, defaultdict
|
|
from pathlib import Path
|
|
|
|
|
|
def load_jsonl(path: Path) -> list[dict]:
|
|
if not path.exists():
|
|
return []
|
|
return [json.loads(l) for l in path.open("r", encoding="utf-8") if l.strip()]
|
|
|
|
|
|
def summarize_run(run_dir: Path) -> dict:
|
|
metrics_path = next(run_dir.rglob("request-metrics.jsonl"), None)
|
|
if metrics_path is None:
|
|
return {"run_dir": str(run_dir), "error": "no request-metrics.jsonl"}
|
|
|
|
summary_path = metrics_path.with_suffix(metrics_path.suffix + ".summary.json")
|
|
summary = (
|
|
json.load(summary_path.open()) if summary_path.exists() else {}
|
|
)
|
|
|
|
structural_dir = run_dir / "structural"
|
|
if not structural_dir.exists():
|
|
# try metrics dir's parent / structural
|
|
structural_dir = metrics_path.parent / "structural"
|
|
|
|
admission_events = load_jsonl(structural_dir / "admission-events.jsonl")
|
|
backpressure_events = load_jsonl(structural_dir / "backpressure-events.jsonl")
|
|
binding_events = load_jsonl(structural_dir / "session-d-binding.jsonl")
|
|
|
|
out: dict = {"run_dir": str(run_dir)}
|
|
|
|
# Headline metrics from summary.json
|
|
out["request_count"] = summary.get("request_count")
|
|
out["error_count"] = summary.get("error_count")
|
|
out["latency"] = summary.get("latency_stats_s")
|
|
out["ttft"] = summary.get("ttft_stats_s")
|
|
out["execution_modes"] = summary.get("execution_modes")
|
|
out["per_decode_load"] = summary.get("per_decode_load")
|
|
out["per_prefill_load"] = summary.get("per_prefill_load")
|
|
|
|
# Direct-to-D rate from execution_modes
|
|
em = summary.get("execution_modes", {}) or {}
|
|
direct = em.get("kvcache-direct-to-d-session", 0)
|
|
total = sum(em.values()) or 1
|
|
out["direct_to_d_rate"] = direct / total
|
|
|
|
# Session pinning
|
|
bind_per_session: dict[str, set[int]] = defaultdict(set)
|
|
for ev in binding_events:
|
|
bind_per_session[ev["session_id"]].add(ev["decode_worker_index"])
|
|
if bind_per_session:
|
|
out["session_count"] = len(bind_per_session)
|
|
out["avg_distinct_d_per_session"] = (
|
|
sum(len(v) for v in bind_per_session.values()) / len(bind_per_session)
|
|
)
|
|
else:
|
|
out["session_count"] = 0
|
|
out["avg_distinct_d_per_session"] = None
|
|
|
|
# Direct-to-D rate per session (bimodal check)
|
|
records = load_jsonl(metrics_path)
|
|
sess_records: dict[str, list[dict]] = defaultdict(list)
|
|
for r in records:
|
|
sess_records[r["session_id"]].append(r)
|
|
rates = []
|
|
for sid, turns in sess_records.items():
|
|
ndir = sum(
|
|
1 for t in turns if t.get("execution_mode") == "kvcache-direct-to-d-session"
|
|
)
|
|
rates.append(ndir / len(turns))
|
|
if rates:
|
|
buckets = [0, 0, 0, 0, 0]
|
|
for r in rates:
|
|
buckets[min(4, int(r * 5))] += 1
|
|
out["direct_to_d_rate_buckets"] = {
|
|
"0-20%": buckets[0],
|
|
"20-40%": buckets[1],
|
|
"40-60%": buckets[2],
|
|
"60-80%": buckets[3],
|
|
"80-100%": buckets[4],
|
|
}
|
|
|
|
# Backpressure events
|
|
if backpressure_events:
|
|
sleeps = [ev["sleep_s"] for ev in backpressure_events]
|
|
out["backpressure"] = {
|
|
"event_count": len(backpressure_events),
|
|
"total_sleep_s": round(sum(sleeps), 2),
|
|
"sleep_p50_s": round(statistics.median(sleeps), 4),
|
|
"sleep_p90_s": round(
|
|
sorted(sleeps)[int(len(sleeps) * 0.9)] if sleeps else 0, 4
|
|
),
|
|
"events_per_d": dict(
|
|
Counter(ev["server_url"] for ev in backpressure_events).most_common()
|
|
),
|
|
}
|
|
else:
|
|
out["backpressure"] = {"event_count": 0, "note": "no backpressure events"}
|
|
|
|
# Admission probe stats
|
|
if admission_events:
|
|
rtts = [ev["rtt_s"] for ev in admission_events]
|
|
depths = [ev.get("queue_depth", 0) for ev in admission_events]
|
|
pauses = [ev.get("recommended_pause_ms", 0) for ev in admission_events]
|
|
out["admission_probes"] = {
|
|
"count": len(admission_events),
|
|
"mean_rtt_s": round(sum(rtts) / len(rtts), 4),
|
|
"p99_rtt_s": round(sorted(rtts)[int(len(rtts) * 0.99)], 4),
|
|
"queue_depth_p50": int(statistics.median(depths)),
|
|
"queue_depth_p90": int(sorted(depths)[int(len(depths) * 0.9)]),
|
|
"queue_depth_max": max(depths),
|
|
"pause_ms_p50": int(statistics.median(pauses)),
|
|
"pause_ms_p90": int(sorted(pauses)[int(len(pauses) * 0.9)]),
|
|
"pause_ms_max": max(pauses),
|
|
"nonzero_pause_count": sum(1 for p in pauses if p > 0),
|
|
"by_reason": dict(
|
|
Counter(ev.get("reason") or "ok" for ev in admission_events).most_common()
|
|
),
|
|
}
|
|
|
|
return out
|
|
|
|
|
|
def main() -> None:
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("sweep_root", type=Path)
|
|
ap.add_argument("--json", action="store_true", help="emit JSON only")
|
|
args = ap.parse_args()
|
|
|
|
summaries = []
|
|
for run_dir in sorted(args.sweep_root.iterdir()):
|
|
if not run_dir.is_dir():
|
|
continue
|
|
summary = summarize_run(run_dir)
|
|
summaries.append(summary)
|
|
|
|
if args.json:
|
|
print(json.dumps(summaries, indent=2))
|
|
return
|
|
|
|
for s in summaries:
|
|
print(f"\n{'=' * 70}")
|
|
print(f" {s['run_dir']}")
|
|
print(f"{'=' * 70}")
|
|
if "error" in s:
|
|
print(f" ERROR: {s['error']}")
|
|
continue
|
|
print(f" reqs={s.get('request_count')} errors={s.get('error_count')}")
|
|
if s.get("latency"):
|
|
lt = s["latency"]
|
|
print(
|
|
f" latency: mean={lt.get('mean'):.3f} "
|
|
f"p50={lt.get('p50'):.3f} p90={lt.get('p90'):.3f} p99={lt.get('p99'):.3f}"
|
|
)
|
|
if s.get("ttft"):
|
|
tt = s["ttft"]
|
|
print(
|
|
f" ttft: mean={tt.get('mean'):.3f} "
|
|
f"p50={tt.get('p50'):.3f} p90={tt.get('p90'):.3f}"
|
|
)
|
|
print(f" direct_to_d_rate: {s.get('direct_to_d_rate', 0) * 100:.1f}%")
|
|
print(f" sessions: {s.get('session_count')} | "
|
|
f"avg distinct-D-per-session: {s.get('avg_distinct_d_per_session')}")
|
|
if s.get("direct_to_d_rate_buckets"):
|
|
print(f" direct-to-D distribution by session: {s['direct_to_d_rate_buckets']}")
|
|
if s.get("backpressure"):
|
|
print(f" backpressure: {s['backpressure']}")
|
|
if s.get("admission_probes"):
|
|
print(f" admission probes: {s['admission_probes']}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|