User-requested comparison of inter-turn external gap distribution between the production agentic trace (Qwen3-Coder) and a production chatbot trace (qwen3-max chat). Both computed as T_external = next_turn.start_ms - prev_turn.end_ms on the same kind of pipeline (raw input + raw output join on request_id, session structure from the formatted trace's parent_chat_id chains). The chatbot trace lives as two files on dash0: input : bailian-trace/qwen-trace-260321-260327/qwen3-max-input-032309-032311.jsonl output : bailian-trace/qwen-trace-260321-260327/qwen3-max-output-032109-032711.jsonl The raw input has no session_id (uuid is per-record, user_id has only 4 distinct tenant values for 346 k requests). We recover session structure from the formatted file (qwen_chat_blksz_64_032309-032311.jsonl, which groups requests by parent_chat_id), matching each formatted record to a raw record by (timestamp, output_length) — prompt_token_num is anonymized to 0 in this trace, so we use generate_token_num as the join key. End time is derived from time_to_finish_token (ms duration) not the "time" string field (which is the log-write time, not request completion). Numbers (chatbot, 42 228 inter-turn gaps over 32 262 multi-turn sessions): p25 4.85 s p50 7.18 s p75 8.22 s p90 15.0 s p99 43 s 4% gaps < 1 s 29% < 5 s 78% < 10 s 98% < 30 s Compare to agentic (same metric, scripts/compute_inter_turn_gap_remote.py): p25 0.69 s p50 1.6 s p75 8.6 s p90 44 s p99 738 s 39% gaps < 1 s 67% < 5 s 77% < 10 s 87% < 30 s Distributions differ in shape, not just location: - Chatbot is tight, unimodal around 5–10 s (human interaction). - Agentic is bimodal: a sub-second autonomous tool-call mode (39 % < 1 s) plus a long-pause tail (13 % > 30 s, p99 = 738 s) for sessions where the operator steps away. - The sub-second tool-call mass is where dispatch coupling lives — those turns have W_turn ≫ T_external for any current scheduler. The earlier "chatbot has T_human ≈ 30 s" hand-wave was wrong empirically. The right framing for §2.3 is "agentic has a sub-second tool-call mode that chatbot doesn't", not "chatbot has think-time and agentic doesn't". Adds: - scripts/compute_inter_turn_gap_chatbot.py: dash0-side aggregator (raw input/output join + formatted alignment by ts + output_length) - analysis/characterization/data/chatbot_inter_turn_gap.json: CDF cache - scripts/plot_inter_turn_gap.py: overlays both curves on log-x Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
104 lines
3.7 KiB
Python
104 lines
3.7 KiB
Python
#!/usr/bin/env python3
|
||
"""Plot the production trace inter-turn gap distribution.
|
||
|
||
Inter-turn gap = next_turn.request_ready_time_ms - prev_turn.request_end_time_ms
|
||
(i.e. T_external: the wall-clock between a turn finishing and the next turn
|
||
of the same session arriving). This is the tool-call latency + any pause,
|
||
not the conflated arrival-to-arrival interval.
|
||
|
||
Data is pre-computed on dash0 by scripts/agentic_gap.py and cached under
|
||
``analysis/characterization/data/agentic_inter_turn_gap.json`` (~23 KB).
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import json
|
||
from pathlib import Path
|
||
|
||
import matplotlib.pyplot as plt
|
||
import numpy as np
|
||
|
||
|
||
def load(cache_path: Path) -> tuple[np.ndarray, np.ndarray, dict]:
|
||
d = json.loads(cache_path.read_text())
|
||
samples = d["cdf_samples"]
|
||
xs = np.array([s["gap_s"] for s in samples])
|
||
ys = np.array([s["rank_pct"] for s in samples])
|
||
return xs, ys, d
|
||
|
||
|
||
def main() -> None:
|
||
parser = argparse.ArgumentParser()
|
||
parser.add_argument(
|
||
"--agentic-data",
|
||
default="analysis/characterization/data/agentic_inter_turn_gap.json",
|
||
)
|
||
parser.add_argument(
|
||
"--chatbot-data",
|
||
default="analysis/characterization/data/chatbot_inter_turn_gap.json",
|
||
)
|
||
parser.add_argument("--out", default="figs/f3a_inter_turn_gap.png")
|
||
args = parser.parse_args()
|
||
|
||
a_xs, a_ys, a_d = load(Path(args.agentic_data))
|
||
c_xs, c_ys, c_d = load(Path(args.chatbot_data))
|
||
|
||
fig, ax = plt.subplots(figsize=(10, 5.5))
|
||
ax.plot(a_xs, a_ys, color="#1f77b4", lw=2.4,
|
||
label=f"agentic (n={a_d['n_gaps']:,} gaps, "
|
||
f"{a_d['n_sessions']:,} multi-turn sessions, Qwen3-Coder)")
|
||
ax.plot(c_xs, c_ys, color="#c44e52", lw=2.4,
|
||
label=f"chatbot (n={c_d['n_gaps']:,} gaps, "
|
||
f"{c_d['n_sessions']:,} multi-turn sessions, qwen3-max)")
|
||
|
||
for d, color, side in [(a_d, "#1f4e79", "left"), (c_d, "#7a1d1d", "right")]:
|
||
for pct, key in [(50, "p50")]:
|
||
v = d["stats_s"][key]
|
||
ax.scatter([v], [pct], color=color, s=55, zorder=5)
|
||
xt = 8 if side == "left" else -110
|
||
yt = -10 if side == "left" else 8
|
||
ax.annotate(f"p50 = {v:.2g}s",
|
||
xy=(v, pct), xytext=(xt, yt),
|
||
textcoords="offset points",
|
||
fontsize=10, color=color)
|
||
|
||
refs = [
|
||
("lmetric TTFT p90 = 15.7s", 15.7, "#888"),
|
||
("unified TTFT p90 = 7.3s", 7.3, "#444"),
|
||
]
|
||
for label, v, color in refs:
|
||
ax.axvline(v, color=color, ls=":", lw=1.2, alpha=0.7)
|
||
ax.text(v * 1.05, 5, label, fontsize=8.5, color=color,
|
||
rotation=90, va="bottom")
|
||
|
||
ax.set_xscale("log")
|
||
ax.set_xlim(0.05, 5000)
|
||
ax.set_ylim(0, 102)
|
||
ax.set_xlabel(
|
||
"Inter-turn gap T_external (s, log scale) — next.ready − prev.end"
|
||
)
|
||
ax.set_ylabel("Cumulative % of inter-turn intervals")
|
||
ap = a_d["stats_s"]
|
||
cp = c_d["stats_s"]
|
||
af = a_d["fraction_below"]
|
||
cf = c_d["fraction_below"]
|
||
ax.set_title(
|
||
f"Agentic vs chatbot inter-turn external gap — agentic has a "
|
||
f"sub-second tool-call mode chatbot lacks\n"
|
||
f"agentic p50={ap['p50']:.2g}s, frac<1s={af['1.0s']*100:.0f}%, "
|
||
f"frac<5s={af['5.0s']*100:.0f}% · "
|
||
f"chatbot p50={cp['p50']:.2g}s, frac<1s={cf['1.0s']*100:.0f}%, "
|
||
f"frac<5s={cf['5.0s']*100:.0f}%"
|
||
)
|
||
ax.grid(True, which="both", alpha=0.3)
|
||
ax.legend(loc="lower right", framealpha=0.92, fontsize=9)
|
||
|
||
out_path = Path(args.out)
|
||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||
fig.savefig(out_path, dpi=150, bbox_inches="tight")
|
||
print(f"wrote {out_path}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|