#!/usr/bin/env python3 """Compute inter-turn T_external (next.ready - prev.end) on the raw agentic trace. Run on dash0 (the trace is at the path below; not co-located with the repo). Writes /tmp/agentic_inter_turn_gap.json which is then scp'd into the repo at analysis/characterization/data/agentic_inter_turn_gap.json for figure rebuild. Reproduce: scp scripts/compute_inter_turn_gap_remote.py dash0:/tmp/ ssh dash0 'python3 /tmp/compute_inter_turn_gap_remote.py' scp dash0:/tmp/agentic_inter_turn_gap.json analysis/characterization/data/ """ import json from collections import defaultdict import numpy as np path = "/home/admin/cpfs/wjh/ali-trace/trace-glm5.1-formatted/051315-051317-raw.jsonl" sessions = defaultdict(list) n_total = 0 n_kept = 0 with open(path) as f: for line in f: try: r = json.loads(line) except Exception: continue n_total += 1 m = r.get("meta", {}) sid = m.get("session_id") ready = m.get("request_ready_time_ms") end = m.get("request_end_time_ms") if sid is None or ready is None or end is None: continue if end <= 0 or ready <= 0 or end < ready: continue sessions[sid].append((int(ready), int(end))) n_kept += 1 print(f"records_total: {n_total}") print(f"records_kept: {n_kept}") print(f"sessions_total: {len(sessions)}") gaps_ms = [] neg = 0 for sid, turns in sessions.items(): if len(turns) < 2: continue turns.sort(key=lambda x: x[0]) for i in range(len(turns) - 1): g = turns[i + 1][0] - turns[i][1] if g < 0: neg += 1 continue gaps_ms.append(g) gaps = np.array(gaps_ms, dtype=np.float64) / 1000.0 print(f"sessions_with_>=2_turns: {sum(1 for t in sessions.values() if len(t) >= 2)}") print(f"gaps_kept: {len(gaps)}") print(f"gaps_negative_dropped: {neg}") pcts = [1, 5, 25, 50, 75, 90, 95, 99] ps = {f"p{p}": float(np.percentile(gaps, p)) for p in pcts} print(f"stats_s: min={gaps.min():.3f} mean={gaps.mean():.3f} max={gaps.max():.3f} {ps}") for thr in [0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 30.0, 60.0]: pct = (gaps < thr).sum() / len(gaps) * 100 print(f"frac < {thr:5.1f}s : {pct:5.1f}%") n = len(gaps) arr = np.sort(gaps) idx_top = np.unique(np.round(np.geomspace(1, max(1, n // 100), 200)).astype(int)) - 1 idx_rest = np.unique(np.linspace(n // 100, n - 1, 300).astype(int)) idx = np.unique(np.concatenate([[0], idx_top, idx_rest, [n - 1]])) idx = idx[idx < n] samples = [{"rank_pct": float((i + 1) / n * 100), "gap_s": float(arr[i])} for i in idx] out = { "n_gaps": n, "n_sessions": sum(1 for t in sessions.values() if len(t) >= 2), "negative_dropped": neg, "stats_s": {**{"min": float(gaps.min()), "max": float(gaps.max()), "mean": float(gaps.mean())}, **ps}, "fraction_below": {f"{thr}s": float((gaps < thr).sum() / n) for thr in [0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 30.0, 60.0]}, "cdf_samples": samples, } open("/tmp/agentic_inter_turn_gap.json", "w").write(json.dumps(out)) print("wrote /tmp/agentic_inter_turn_gap.json")