import csv import gc import json import os import subprocess from collections import Counter, defaultdict from pathlib import Path import matplotlib import psutil matplotlib.use("Agg") import matplotlib.pyplot as plt from tqdm.auto import tqdm from .features import compute_features, feature_to_row from .helpers import percentile, safe_div, safe_float, safe_int, series_stats from .layout import DETAILS_DIR_NAME, DETAILS_SUMMARY_FILENAME, preferred_details_dir from .parser import path_looks_like_release_trace from trace_model_meta import resolve_tokenizer_path from .report import ensure_output_dir, write_features, write_normalized, write_report def _estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, fraction_done): baseline = max(current_rss_mb, peak_rss_mb) headroom = 1.0 + 0.25 * max(0.0, 1.0 - fraction_done) return baseline * headroom def _progress_postfix(process, peak_rss_mb, fraction_done, **extra): current_rss_mb = process.memory_info().rss / (1024 * 1024) peak_rss_mb = max(peak_rss_mb, current_rss_mb) postfix = { "rss_mb": f"{current_rss_mb:.0f}", "est_peak_mb": f"{_estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, fraction_done):.0f}", } postfix.update(extra) return postfix, peak_rss_mb def sort_records_for_time(records): return sorted( records, key=lambda record: ( record.meta.request_ready_time_ms or 0, record.meta.line_number, ), ) def resolve_study_tokenizer_path(tokenizer_path=None, *, model_family="auto", model_meta_dir=None, records=None): return resolve_tokenizer_path( tokenizer_path, model_family=model_family, model_meta_dir=model_meta_dir, records=records, ) def load_segmenter(segment_mode="tokenizer", tokenizer_path=None, *, model_family="auto", model_meta_dir=None, records=None): if segment_mode == "bytes": return (lambda text: list(text.encode("utf-8"))), "" if segment_mode != "tokenizer": raise ValueError(f"Unsupported segment mode: {segment_mode}") from tokenizers import Tokenizer from transformers import AutoTokenizer, PreTrainedTokenizerFast resolved_tokenizer_path = resolve_study_tokenizer_path( tokenizer_path, model_family=model_family, model_meta_dir=model_meta_dir, records=records, ) path_obj = Path(resolved_tokenizer_path) tokenizer_file = path_obj / "tokenizer.json" if path_obj.is_dir() else path_obj if tokenizer_file.exists(): try: raw_tokenizer = Tokenizer.from_file(str(tokenizer_file)) return (lambda text: raw_tokenizer.encode(text).ids), resolved_tokenizer_path except Exception: pass try: tokenizer = AutoTokenizer.from_pretrained( resolved_tokenizer_path, trust_remote_code=True, local_files_only=path_obj.exists(), use_fast=True, ) except Exception: tokenizer = PreTrainedTokenizerFast( tokenizer_file=str(tokenizer_file), ) return (lambda text: tokenizer.encode(text, add_special_tokens=False)), resolved_tokenizer_path def build_cdf(values): cleaned = sorted(value for value in values if value is not None) if not cleaned: return [] total = len(cleaned) rows = [] for index, value in enumerate(cleaned, start=1): rows.append({"value": value, "cdf": index / total}) return rows def write_csv(path, rows): if not rows: with open(path, "w", encoding="utf-8", newline="") as handle: handle.write("") return path fieldnames = list(rows[0].keys()) with open(path, "w", encoding="utf-8", newline="") as handle: writer = csv.DictWriter(handle, fieldnames=fieldnames) writer.writeheader() writer.writerows(rows) return path def plot_cdf_series(path, series, title, xlabel, ylabel="CDF"): plt.figure(figsize=(8, 5)) for label, rows in series: if not rows: continue xs = [row["value"] for row in rows] ys = [row["cdf"] for row in rows] plt.step(xs, ys, where="post", label=label) plt.title(title) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.grid(True, alpha=0.3) if len(series) > 1: plt.legend() plt.tight_layout() plt.savefig(path, dpi=600) plt.close() return path def plot_cdf_series_with_zoom_windows( path, series, title, xlabel, zoom_windows, ylabel="CDF", start_y_at_value=None, ): nonempty_series = [(label, rows) for label, rows in series if rows] if not nonempty_series: return path all_values = sorted(row["value"] for _, rows in nonempty_series for row in rows) panel_count = 1 + len(zoom_windows) fig, axes = plt.subplots(1, panel_count, figsize=(6 * panel_count, 5), sharey=True) if panel_count == 1: axes = [axes] def _trim_rows(rows): if start_y_at_value is None: return rows, None trimmed = rows baseline_cdf = None last_match_index = -1 for index, row in enumerate(rows): if row["value"] <= start_y_at_value: last_match_index = index else: break if last_match_index >= 0: baseline_cdf = rows[last_match_index]["cdf"] trimmed = rows[last_match_index:] return trimmed, baseline_cdf def _plot(ax, panel_title, xlim=None): baseline_candidates = [] for label, rows in nonempty_series: trimmed_rows, baseline_cdf = _trim_rows(rows) xs = [row["value"] for row in trimmed_rows] ys = [row["cdf"] for row in trimmed_rows] ax.step(xs, ys, where="post", label=label) if baseline_cdf is not None: baseline_candidates.append(baseline_cdf) ax.set_title(panel_title) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.grid(True, alpha=0.3) if xlim is not None: lo, hi = xlim if lo == hi: pad = max(abs(lo) * 0.05, 1e-6) lo -= pad hi += pad ax.set_xlim(lo, hi) if baseline_candidates: ymin = min(baseline_candidates) if ymin >= 1.0: ymin = 0.999 if ymin > 0.0: ax.set_ylim(ymin, 1.0) _plot(axes[0], "Full Range") for axis, (lo_pct, hi_pct, label) in zip(axes[1:], zoom_windows): lo = percentile(all_values, lo_pct) hi = percentile(all_values, hi_pct) _plot(axis, label, xlim=(lo, hi)) handles, labels = axes[0].get_legend_handles_labels() if len(nonempty_series) > 1 and handles: fig.legend(handles, labels, loc="upper center", ncol=min(len(labels), 6)) fig.suptitle(title) fig.tight_layout(rect=(0, 0, 1, 0.93)) fig.savefig(path, dpi=600) plt.close(fig) return path def plot_bar_chart(path, rows, label_key, value_key, title, xlabel, ylabel, top_n=20): limited = rows[:top_n] labels = [row[label_key] for row in limited] values = [row[value_key] for row in limited] plt.figure(figsize=(10, 6)) plt.barh(labels[::-1], values[::-1]) plt.title(title) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.tight_layout() plt.savefig(path, dpi=600) plt.close() return path def plot_grouped_bar_chart(path, rows, label_key, series_keys, title, xlabel, ylabel): if not rows: return path labels = [row[label_key] for row in rows] x = range(len(labels)) width = 0.8 / max(len(series_keys), 1) plt.figure(figsize=(10, 6)) for idx, (key, label) in enumerate(series_keys): offsets = [value + (idx - (len(series_keys) - 1) / 2) * width for value in x] values = [row[key] for row in rows] plt.bar(offsets, values, width=width, label=label) plt.title(title) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.xticks(list(x), labels, rotation=20) if len(series_keys) > 1: plt.legend() plt.tight_layout() plt.savefig(path, dpi=600) plt.close() return path def plot_fraction_bar_chart(path, rows, label_key, value_key, title, xlabel="Fraction", ylabel="Metric"): if not rows: return path labels = [row[label_key] for row in rows] values = [row[value_key] for row in rows] plt.figure(figsize=(10, 6)) plt.barh(labels[::-1], values[::-1]) plt.xlim(0.0, 1.0) plt.title(title) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.tight_layout() plt.savefig(path, dpi=600) plt.close() return path def plot_time_series(path, rows, x_key, y_key, title, xlabel, ylabel): if not rows: return path xs = [row[x_key] for row in rows] ys = [row[y_key] for row in rows] plt.figure(figsize=(10, 5.5)) plt.step(xs, ys, where="post") plt.title(title) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.grid(True, alpha=0.3) plt.tight_layout() plt.savefig(path, dpi=600) plt.close() return path def build_grouped_cdf_rows(series_by_group, group_key): rows = [] for group, values in series_by_group.items(): for cdf_row in build_cdf(values): rows.append( { group_key: group, "value": cdf_row["value"], "cdf": cdf_row["cdf"], } ) return rows def _normalize_request_metric_row(row): pattern_labels = row.get("pattern_labels", "") if isinstance(pattern_labels, str): pattern_labels = {label for label in pattern_labels.split(";") if label} else: pattern_labels = set(pattern_labels or []) return { "request_id": row.get("request_id", ""), "session_id": row.get("session_id", ""), "declared_tool_count": safe_int(row.get("declared_tool_count")), "tool_msg_count": safe_int(row.get("tool_msg_count")), "input_tokens": safe_int(row.get("input_tokens")), "uncached_prompt_tokens": safe_int(row.get("uncached_prompt_tokens")), "cache_hit_ratio": safe_float(row.get("cache_hit_ratio")), "request_ready_time_ms": safe_int(row.get("request_ready_time_ms")), "request_end_time_ms": safe_int(row.get("request_end_time_ms")), "theoretical_source_request_id": row.get("theoretical_source_request_id", ""), "pattern_labels": pattern_labels, } def _compute_agentic_patterns(request_metric_rows): rows = [_normalize_request_metric_row(row) for row in request_metric_rows] sessions = defaultdict(list) request_to_session = {} for row in rows: sessions[row["session_id"]].append(row) request_to_session[row["request_id"]] = row["session_id"] for session_rows in sessions.values(): session_rows.sort(key=lambda row: (row["request_ready_time_ms"], row["request_id"])) session_sizes = sorted(len(session_rows) for session_rows in sessions.values()) total_requests = len(rows) total_sessions = len(session_sizes) bucket_defs = [ ("1-2", 1, 2), ("3-9", 3, 9), ("10-19", 10, 19), ("20-49", 20, 49), ("50-99", 50, 99), ("100-149", 100, 149), ("150+", 150, None), ] session_turn_bucket_rows = [] for label, low, high in bucket_defs: matched = [size for size in session_sizes if size >= low and (high is None or size <= high)] request_count = sum(matched) session_turn_bucket_rows.append( { "bucket": label, "session_count": len(matched), "session_fraction": safe_div(len(matched), total_sessions), "request_count": request_count, "request_fraction": safe_div(request_count, total_requests), } ) request_fraction_rows = [ { "metric": "request_has_tool_msg", "fraction": safe_div(sum(row["tool_msg_count"] > 0 for row in rows), total_requests), }, { "metric": "request_declares_tools", "fraction": safe_div(sum(row["declared_tool_count"] > 0 for row in rows), total_requests), }, { "metric": "request_tool_heavy", "fraction": safe_div(sum("tool-heavy" in row["pattern_labels"] for row in rows), total_requests), }, { "metric": "request_tool_burst", "fraction": safe_div(sum("tool-burst" in row["pattern_labels"] for row in rows), total_requests), }, { "metric": "request_cache_efficient", "fraction": safe_div(sum("cache-efficient" in row["pattern_labels"] for row in rows), total_requests), }, ] pair_gap_ready_values = [] pair_gap_end_values = [] pair_count = 0 prev_has_tool = 0 next_has_tool = 0 both_have_tool = 0 next_cache_95 = 0 next_cache_99 = 0 append_like = 0 append_like_after_tool = 0 append_like_after_tool_pairs = 0 same_or_longer_input = 0 short_gap_pairs = 0 append_like_within_30s = 0 for session_rows in sessions.values(): for previous, current in zip(session_rows, session_rows[1:]): pair_count += 1 gap_ready = max(current["request_ready_time_ms"] - previous["request_ready_time_ms"], 0) gap_end = max(current["request_ready_time_ms"] - previous["request_end_time_ms"], 0) pair_gap_ready_values.append(gap_ready) pair_gap_end_values.append(gap_end) if current["input_tokens"] >= previous["input_tokens"]: same_or_longer_input += 1 if previous["tool_msg_count"] > 0: prev_has_tool += 1 if current["tool_msg_count"] > 0: next_has_tool += 1 if previous["tool_msg_count"] > 0 and current["tool_msg_count"] > 0: both_have_tool += 1 if current["cache_hit_ratio"] >= 0.95: next_cache_95 += 1 if current["cache_hit_ratio"] >= 0.99: next_cache_99 += 1 is_append_like = ( current["input_tokens"] >= previous["input_tokens"] and current["cache_hit_ratio"] >= 0.95 and current["uncached_prompt_tokens"] <= 4096 ) if is_append_like: append_like += 1 if previous["tool_msg_count"] > 0: append_like_after_tool_pairs += 1 if is_append_like: append_like_after_tool += 1 if gap_ready <= 30000: short_gap_pairs += 1 if is_append_like: append_like_within_30s += 1 pair_fraction_rows = [ {"metric": "pair_prev_has_tool", "fraction": safe_div(prev_has_tool, pair_count)}, {"metric": "pair_next_has_tool", "fraction": safe_div(next_has_tool, pair_count)}, {"metric": "pair_both_have_tool", "fraction": safe_div(both_have_tool, pair_count)}, {"metric": "pair_next_cache_hit_ge_0_95", "fraction": safe_div(next_cache_95, pair_count)}, {"metric": "pair_next_cache_hit_ge_0_99", "fraction": safe_div(next_cache_99, pair_count)}, {"metric": "pair_same_or_longer_input", "fraction": safe_div(same_or_longer_input, pair_count)}, {"metric": "pair_append_like_proxy", "fraction": safe_div(append_like, pair_count)}, { "metric": "pair_append_like_after_tool", "fraction": safe_div(append_like_after_tool, append_like_after_tool_pairs), }, { "metric": "pair_append_like_within_30s", "fraction": safe_div(append_like_within_30s, short_gap_pairs), }, ] source_known = 0 source_same_session = 0 source_cross_session = 0 for row in rows: source_request_id = row["theoretical_source_request_id"] if not source_request_id: continue source_known += 1 if request_to_session.get(source_request_id) == row["session_id"]: source_same_session += 1 else: source_cross_session += 1 source_scope_rows = [ {"scope": "same_session", "fraction": safe_div(source_same_session, source_known)}, {"scope": "cross_session", "fraction": safe_div(source_cross_session, source_known)}, ] gap_cdf_rows = { "cdf_session_inter_request_gap_ready_ms.csv": build_cdf(pair_gap_ready_values), "cdf_session_inter_request_gap_end_ms.csv": build_cdf(pair_gap_end_values), } summary = { "request_count": total_requests, "session_count": total_sessions, "session_turn_stats": { "min": min(session_sizes) if session_sizes else 0, "max": max(session_sizes) if session_sizes else 0, "p50": percentile(session_sizes, 0.5) if session_sizes else 0.0, "p90": percentile(session_sizes, 0.9) if session_sizes else 0.0, "p99": percentile(session_sizes, 0.99) if session_sizes else 0.0, }, "session_turn_bucket_rows": session_turn_bucket_rows, "request_level_fraction_rows": request_fraction_rows, "pair_level_fraction_rows": pair_fraction_rows, "pair_gap_ready_ms_stats": series_stats(pair_gap_ready_values), "pair_gap_end_ms_stats": series_stats(pair_gap_end_values), "theoretical_source_scope": { "known_fraction": safe_div(source_known, total_requests), "same_session_fraction_of_known": safe_div(source_same_session, source_known), "cross_session_fraction_of_known": safe_div(source_cross_session, source_known), }, "append_like_proxy_definition": ( "next_input_tokens >= prev_input_tokens AND " "next_cache_hit_ratio >= 0.95 AND next_uncached_prompt_tokens <= 4096" ), } return { "summary": summary, "session_turn_bucket_rows": session_turn_bucket_rows, "request_fraction_rows": request_fraction_rows, "pair_fraction_rows": pair_fraction_rows, "source_scope_rows": source_scope_rows, "gap_cdf_rows": gap_cdf_rows, } def write_agentic_outputs_from_rows(request_metric_rows, advanced_dir): advanced_dir = ensure_output_dir(advanced_dir) agentic = _compute_agentic_patterns(request_metric_rows) write_csv(advanced_dir / "agentic_session_turn_buckets.csv", agentic["session_turn_bucket_rows"]) write_csv(advanced_dir / "agentic_request_level_fractions.csv", agentic["request_fraction_rows"]) write_csv(advanced_dir / "agentic_pair_level_fractions.csv", agentic["pair_fraction_rows"]) write_csv(advanced_dir / "agentic_theoretical_source_scope.csv", agentic["source_scope_rows"]) for filename, rows in agentic["gap_cdf_rows"].items(): write_csv(advanced_dir / filename, rows) plot_grouped_bar_chart( advanced_dir / "agentic_session_turn_buckets.png", agentic["session_turn_bucket_rows"], label_key="bucket", series_keys=[ ("session_fraction", "session_fraction"), ("request_fraction", "request_fraction"), ], title="Session Turn Buckets: Session Share vs Request Share", xlabel="Session turn bucket", ylabel="Fraction", ) plot_cdf_series( advanced_dir / "cdf_session_inter_request_gap_ms.png", [ ("gap_from_prev_ready_ms", agentic["gap_cdf_rows"]["cdf_session_inter_request_gap_ready_ms.csv"]), ("gap_from_prev_end_ms", agentic["gap_cdf_rows"]["cdf_session_inter_request_gap_end_ms.csv"]), ], title="CDF of Session Inter-Request Gap", xlabel="Milliseconds", ) plot_cdf_series_with_zoom_windows( advanced_dir / "cdf_session_inter_request_gap_ready_ms_zoom80.png", [ ("gap_from_prev_ready_ms", agentic["gap_cdf_rows"]["cdf_session_inter_request_gap_ready_ms.csv"]), ], title="CDF of Session Inter-Request Gap: Ready to Ready", xlabel="Milliseconds", zoom_windows=[ (0.10, 0.90, "Central 80% (p10-p90)"), ], start_y_at_value=0.0, ) plot_cdf_series_with_zoom_windows( advanced_dir / "cdf_session_inter_request_gap_end_ms_zoom80.png", [ ("gap_from_prev_end_ms", agentic["gap_cdf_rows"]["cdf_session_inter_request_gap_end_ms.csv"]), ], title="CDF of Session Inter-Request Gap: End to Ready", xlabel="Milliseconds", zoom_windows=[ (0.10, 0.90, "Central 80% (p10-p90)"), ], start_y_at_value=0.0, ) plot_cdf_series_with_zoom_windows( advanced_dir / "cdf_session_inter_request_gap_ready_ms_zoom90.png", [ ("gap_from_prev_ready_ms", agentic["gap_cdf_rows"]["cdf_session_inter_request_gap_ready_ms.csv"]), ], title="CDF of Session Inter-Request Gap: Ready to Ready", xlabel="Milliseconds", zoom_windows=[ (0.05, 0.95, "Central 90% (p05-p95)"), ], start_y_at_value=0.0, ) plot_cdf_series_with_zoom_windows( advanced_dir / "cdf_session_inter_request_gap_end_ms_zoom90.png", [ ("gap_from_prev_end_ms", agentic["gap_cdf_rows"]["cdf_session_inter_request_gap_end_ms.csv"]), ], title="CDF of Session Inter-Request Gap: End to Ready", xlabel="Milliseconds", zoom_windows=[ (0.05, 0.95, "Central 90% (p05-p95)"), ], start_y_at_value=0.0, ) plot_fraction_bar_chart( advanced_dir / "agentic_request_level_fractions.png", agentic["request_fraction_rows"], label_key="metric", value_key="fraction", title="Request-Level Agentic Fractions", ) plot_fraction_bar_chart( advanced_dir / "agentic_pair_level_fractions.png", agentic["pair_fraction_rows"], label_key="metric", value_key="fraction", title="Pair-Level Agentic Fractions", ) plot_fraction_bar_chart( advanced_dir / "agentic_theoretical_source_scope.png", agentic["source_scope_rows"], label_key="scope", value_key="fraction", title="Theoretical Prefix Reuse Scope", ) summary_path = advanced_dir / "agentic_patterns_summary.json" with open(summary_path, "w", encoding="utf-8") as handle: json.dump(agentic["summary"], handle, ensure_ascii=False, indent=2) return agentic["summary"], summary_path def write_agentic_outputs_from_request_metrics_path(request_metrics_path, advanced_dir): with open(request_metrics_path, "r", encoding="utf-8") as handle: rows = [] for row in csv.DictReader(handle): rows.append( { "request_id": row.get("request_id", ""), "session_id": row.get("session_id", ""), "declared_tool_count": row.get("declared_tool_count", 0), "tool_msg_count": row.get("tool_msg_count", 0), "input_tokens": row.get("input_tokens", 0), "uncached_prompt_tokens": row.get("uncached_prompt_tokens", 0), "cache_hit_ratio": row.get("cache_hit_ratio", 0.0), "request_ready_time_ms": row.get("request_ready_time_ms", 0), "request_end_time_ms": row.get("request_end_time_ms", 0), "theoretical_source_request_id": row.get("theoretical_source_request_id", ""), "pattern_labels": row.get("pattern_labels", ""), } ) return write_agentic_outputs_from_rows(rows, advanced_dir) def _normalize_request_metric_input_length_row(row): return { "request_id": row.get("request_id", ""), "input_tokens": safe_int(row.get("input_tokens")), "theoretical_prompt_unit_length": safe_int(row.get("theoretical_prompt_unit_length")), } def _build_input_length_comparison(request_metric_rows): rows = [_normalize_request_metric_input_length_row(row) for row in request_metric_rows] provider_values = [] retokenized_values = [] delta_values = [] ratio_values = [] relative_delta_values = [] same_count = 0 retokenized_gt_count = 0 retokenized_lt_count = 0 provider_zero_count = 0 retokenized_zero_count = 0 for row in rows: provider = row["input_tokens"] retokenized = row["theoretical_prompt_unit_length"] provider_values.append(provider) retokenized_values.append(retokenized) delta_values.append(retokenized - provider) if provider > 0: ratio_values.append(retokenized / provider) relative_delta_values.append((retokenized - provider) / provider) if provider == retokenized: same_count += 1 elif retokenized > provider: retokenized_gt_count += 1 else: retokenized_lt_count += 1 if provider == 0: provider_zero_count += 1 if retokenized == 0: retokenized_zero_count += 1 summary = { "request_count": len(rows), "same_count": same_count, "same_fraction": safe_div(same_count, len(rows)), "retokenized_gt_provider_count": retokenized_gt_count, "retokenized_gt_provider_fraction": safe_div(retokenized_gt_count, len(rows)), "retokenized_lt_provider_count": retokenized_lt_count, "retokenized_lt_provider_fraction": safe_div(retokenized_lt_count, len(rows)), "provider_zero_count": provider_zero_count, "retokenized_zero_count": retokenized_zero_count, "provider_input_tokens_stats": series_stats(provider_values), "retokenized_prompt_tokens_stats": series_stats(retokenized_values), "delta_tokens_stats": series_stats(delta_values), "ratio_stats": series_stats(ratio_values), "relative_delta_vs_provider_stats": series_stats(relative_delta_values), } cdf_rows = { "cdf_retokenized_prompt_tokens.csv": build_cdf(retokenized_values), "cdf_input_length_delta_tokens.csv": build_cdf(delta_values), "cdf_input_length_ratio_retokenized_over_provider.csv": build_cdf(ratio_values), "cdf_input_length_relative_delta_vs_provider.csv": build_cdf(relative_delta_values), } provider_cdf_rows = build_cdf(provider_values) return summary, cdf_rows, provider_cdf_rows def write_input_length_comparison_from_rows(request_metric_rows, advanced_dir): advanced_dir = ensure_output_dir(advanced_dir) summary, cdf_rows, provider_cdf_rows = _build_input_length_comparison(request_metric_rows) for filename, rows in cdf_rows.items(): write_csv(advanced_dir / filename, rows) plot_cdf_series( advanced_dir / "cdf_input_length_provider_vs_retokenized.png", [ ("provider_input_tokens", provider_cdf_rows), ("retokenized_prompt_tokens", cdf_rows["cdf_retokenized_prompt_tokens.csv"]), ], title="CDF of Provider vs Retokenized Input Length", xlabel="Tokens", ) plot_cdf_series( advanced_dir / "cdf_input_length_delta_tokens.png", [("retokenized_minus_provider", cdf_rows["cdf_input_length_delta_tokens.csv"])], title="CDF of Retokenized - Provider Input Length", xlabel="Tokens", ) plot_cdf_series( advanced_dir / "cdf_input_length_ratio.png", [("retokenized_over_provider", cdf_rows["cdf_input_length_ratio_retokenized_over_provider.csv"])], title="CDF of Retokenized / Provider Input Length", xlabel="Ratio", ) summary_path = advanced_dir / "input_length_comparison_summary.json" with open(summary_path, "w", encoding="utf-8") as handle: json.dump(summary, handle, ensure_ascii=False, indent=2) return summary, summary_path def write_input_length_comparison_from_request_metrics_path(request_metrics_path, advanced_dir): with open(request_metrics_path, "r", encoding="utf-8") as handle: rows = list(csv.DictReader(handle)) return write_input_length_comparison_from_rows(rows, advanced_dir) DEFAULT_INPUT_LENGTH_BUCKET_THRESHOLDS = [ 32 * 1024, 85 * 1024, 128 * 1024, ] def parse_input_length_bucket_thresholds(spec): if spec is None: return list(DEFAULT_INPUT_LENGTH_BUCKET_THRESHOLDS) thresholds = [] for chunk in str(spec).split(";"): text = chunk.strip() if not text: continue normalized = text.lower() multiplier = 1 if normalized.endswith("ki"): multiplier = 1024 normalized = normalized[:-2] elif normalized.endswith("mi"): multiplier = 1024 * 1024 normalized = normalized[:-2] try: value = int(normalized) * multiplier except ValueError as exc: raise ValueError( f"Invalid input-length bucket threshold `{text}`. " "Use semicolon-separated token counts such as `32768;87040;131072` or `32Ki;85Ki;128Ki`." ) from exc if value <= 0: raise ValueError("Input-length bucket thresholds must be positive integers.") thresholds.append(value) if not thresholds: raise ValueError("At least one input-length bucket threshold is required.") return sorted(set(thresholds)) def _format_bucket_boundary(value): if value == 0: return "0" if value % (1024 * 1024) == 0: return f"{value // (1024 * 1024)}Mi" if value % 1024 == 0: return f"{value // 1024}Ki" return str(value) def build_input_length_bucket_defs(thresholds=None): parsed_thresholds = ( list(DEFAULT_INPUT_LENGTH_BUCKET_THRESHOLDS) if thresholds is None else sorted(set(int(value) for value in thresholds)) ) if not parsed_thresholds: raise ValueError("At least one input-length bucket threshold is required.") if any(value <= 0 for value in parsed_thresholds): raise ValueError("Input-length bucket thresholds must be positive integers.") if parsed_thresholds == DEFAULT_INPUT_LENGTH_BUCKET_THRESHOLDS: return [ ("0-32Ki", 0, 32 * 1024), ("32-85Ki", 32 * 1024, 85 * 1024), ("85-128Ki", 85 * 1024, 128 * 1024), ("128Ki+", 128 * 1024, None), ] bucket_defs = [] lower_bound = 0 for upper_bound in parsed_thresholds: bucket_defs.append( ( f"{_format_bucket_boundary(lower_bound)}-{_format_bucket_boundary(upper_bound)}", lower_bound, upper_bound, ) ) lower_bound = upper_bound bucket_defs.append((f"{_format_bucket_boundary(lower_bound)}+", lower_bound, None)) return bucket_defs def _rows_in_input_bucket(request_metric_rows, lower_bound, upper_bound): return [ row for row in request_metric_rows if row["input_tokens"] >= lower_bound and (upper_bound is None or row["input_tokens"] < upper_bound) ] def assign_input_length_bucket(input_tokens, bucket_defs=None): bucket_defs = bucket_defs or build_input_length_bucket_defs() for bucket_label, lower_bound, upper_bound in bucket_defs: if input_tokens >= lower_bound and (upper_bound is None or input_tokens < upper_bound): return bucket_label return bucket_defs[-1][0] def summarize_cache_reuse_by_input_length_bucket(request_metric_rows, bucket_defs=None): bucket_defs = bucket_defs or build_input_length_bucket_defs() total_requests = len(request_metric_rows) bucket_rows = [] for bucket_label, lower_bound, upper_bound in bucket_defs: matched_rows = _rows_in_input_bucket(request_metric_rows, lower_bound, upper_bound) input_token_values = [row["input_tokens"] for row in matched_rows] retokenized_values = [row["theoretical_prompt_unit_length"] for row in matched_rows] actual_ratio_values = [row["cache_hit_ratio"] for row in matched_rows] theoretical_ratio_values = [row["theoretical_prefix_hit_ratio"] for row in matched_rows] bucketed_theoretical_ratio_values = [ row.get("bucketed_theoretical_prefix_hit_ratio", row["theoretical_prefix_hit_ratio"]) for row in matched_rows ] input_token_sum = sum(input_token_values) retokenized_sum = sum(retokenized_values) cached_token_sum = sum(row["cached_tokens"] for row in matched_rows) theoretical_hit_sum = sum(row["theoretical_prefix_hit_units"] for row in matched_rows) bucketed_theoretical_hit_sum = sum( row.get("bucketed_theoretical_prefix_hit_units", row["theoretical_prefix_hit_units"]) for row in matched_rows ) bucket_rows.append( { "bucket": bucket_label, "input_tokens_min_inclusive": lower_bound, "input_tokens_max_exclusive": upper_bound if upper_bound is not None else "", "request_count": len(matched_rows), "request_fraction": safe_div(len(matched_rows), total_requests), "provider_input_tokens_mean": series_stats(input_token_values)["mean"], "provider_input_tokens_median": series_stats(input_token_values)["median"], "provider_input_tokens_p90": series_stats(input_token_values)["p90"], "retokenized_prompt_tokens_mean": series_stats(retokenized_values)["mean"], "retokenized_prompt_tokens_median": series_stats(retokenized_values)["median"], "retokenized_prompt_tokens_p90": series_stats(retokenized_values)["p90"], "actual_cache_hit_ratio_mean": series_stats(actual_ratio_values)["mean"], "actual_cache_hit_ratio_median": series_stats(actual_ratio_values)["median"], "actual_cache_hit_ratio_p90": series_stats(actual_ratio_values)["p90"], "theoretical_cache_hit_ratio_mean": series_stats(theoretical_ratio_values)["mean"], "theoretical_cache_hit_ratio_median": series_stats(theoretical_ratio_values)["median"], "theoretical_cache_hit_ratio_p90": series_stats(theoretical_ratio_values)["p90"], "bucketed_theoretical_cache_hit_ratio_mean": series_stats(bucketed_theoretical_ratio_values)["mean"], "bucketed_theoretical_cache_hit_ratio_median": series_stats(bucketed_theoretical_ratio_values)[ "median" ], "bucketed_theoretical_cache_hit_ratio_p90": series_stats(bucketed_theoretical_ratio_values)["p90"], "weighted_actual_cache_hit_ratio": safe_div(cached_token_sum, input_token_sum), "weighted_theoretical_cache_hit_ratio": safe_div(theoretical_hit_sum, retokenized_sum), "weighted_bucketed_theoretical_cache_hit_ratio": safe_div( bucketed_theoretical_hit_sum, retokenized_sum ), "weighted_bucket_boundary_loss_ratio": safe_div( max(theoretical_hit_sum - bucketed_theoretical_hit_sum, 0), retokenized_sum, ), "actual_reused_request_fraction": safe_div( sum(row["cached_tokens"] > 0 for row in matched_rows), len(matched_rows), ), "theoretical_reused_request_fraction": safe_div( sum(row["theoretical_prefix_hit_units"] > 0 for row in matched_rows), len(matched_rows), ), "bucketed_theoretical_reused_request_fraction": safe_div( sum( row.get("bucketed_theoretical_prefix_hit_units", row["theoretical_prefix_hit_units"]) > 0 for row in matched_rows ), len(matched_rows), ), "actual_cached_tokens_sum": cached_token_sum, "theoretical_hit_units_sum": theoretical_hit_sum, "bucketed_theoretical_hit_units_sum": bucketed_theoretical_hit_sum, } ) summary = { "bucket_definition": { "unit": "tokens", "k_definition": 1024, "buckets": [ { "bucket": bucket_label, "input_tokens_min_inclusive": lower_bound, "input_tokens_max_exclusive": upper_bound, } for bucket_label, lower_bound, upper_bound in bucket_defs ], }, "request_count": total_requests, "bucket_rows": bucket_rows, } return summary, bucket_rows def write_cache_reuse_by_input_length_bucket_from_rows(request_metric_rows, advanced_dir, bucket_defs=None): advanced_dir = ensure_output_dir(advanced_dir) summary, bucket_rows = summarize_cache_reuse_by_input_length_bucket( request_metric_rows, bucket_defs=bucket_defs, ) csv_path = write_csv(advanced_dir / "input_length_bucket_cache_reuse.csv", bucket_rows) plot_path = plot_grouped_bar_chart( advanced_dir / "input_length_bucket_cache_reuse.png", bucket_rows, label_key="bucket", series_keys=[ ("weighted_actual_cache_hit_ratio", "actual_weighted_hit_ratio"), ("weighted_theoretical_cache_hit_ratio", "theoretical_weighted_hit_ratio"), ("weighted_bucketed_theoretical_cache_hit_ratio", "bucketed_theoretical_weighted_hit_ratio"), ], title="Weighted Cache Hit Ratio by Provider Input-Length Bucket", xlabel="Input-length bucket", ylabel="Weighted hit ratio", ) summary_path = advanced_dir / "input_length_bucket_cache_reuse_summary.json" with open(summary_path, "w", encoding="utf-8") as handle: json.dump(summary, handle, ensure_ascii=False, indent=2) return summary, summary_path, csv_path, plot_path def write_cache_reuse_by_input_length_bucket_from_request_metrics_path( request_metrics_path, advanced_dir, bucket_defs=None, ): with open(request_metrics_path, "r", encoding="utf-8") as handle: rows = [] for row in csv.DictReader(handle): rows.append( { "input_tokens": safe_int(row.get("input_tokens")), "cached_tokens": safe_int(row.get("cached_tokens")), "cache_hit_ratio": safe_float(row.get("cache_hit_ratio")), "theoretical_prompt_unit_length": safe_int(row.get("theoretical_prompt_unit_length")), "theoretical_prefix_hit_units": safe_int(row.get("theoretical_prefix_hit_units")), "theoretical_prefix_hit_ratio": safe_float(row.get("theoretical_prefix_hit_ratio")), "bucketed_theoretical_prefix_hit_units": safe_int( row.get("bucketed_theoretical_prefix_hit_units", row.get("theoretical_prefix_hit_units")) ), "bucketed_theoretical_prefix_hit_ratio": safe_float( row.get("bucketed_theoretical_prefix_hit_ratio", row.get("theoretical_prefix_hit_ratio")) ), } ) return write_cache_reuse_by_input_length_bucket_from_rows(rows, advanced_dir, bucket_defs=bucket_defs) def summarize_session_bucket_boundary_miss(parent_child_rows, bucket_defs=None): bucket_defs = bucket_defs or build_input_length_bucket_defs() normalized_rows = [] for row in parent_child_rows: child_input_tokens = safe_int(row.get("child_input_tokens")) child_bucket = row.get("child_bucket") or assign_input_length_bucket(child_input_tokens, bucket_defs) normalized_rows.append( { "session_id": row.get("session_id", ""), "parent_request_id": row.get("parent_request_id", ""), "child_request_id": row.get("child_request_id", ""), "parent_bucket": row.get("parent_bucket", ""), "child_bucket": child_bucket, "shared_prefix_units": safe_int(row.get("shared_prefix_units")), "is_cross_bucket": safe_int(row.get("is_cross_bucket")), } ) total_edge_count = len(normalized_rows) total_reusable_edge_count = sum(row["shared_prefix_units"] > 0 for row in normalized_rows) total_cross_bucket_edge_count = sum(row["is_cross_bucket"] for row in normalized_rows) total_shared_prefix_units = sum(row["shared_prefix_units"] for row in normalized_rows) total_cross_bucket_shared_prefix_units = sum( row["shared_prefix_units"] for row in normalized_rows if row["is_cross_bucket"] ) bucket_rows = [] for bucket_label, _, _ in bucket_defs: matched_rows = [row for row in normalized_rows if row["child_bucket"] == bucket_label] shared_prefix_units = sum(row["shared_prefix_units"] for row in matched_rows) cross_bucket_shared_prefix_units = sum( row["shared_prefix_units"] for row in matched_rows if row["is_cross_bucket"] ) reusable_edge_count = sum(row["shared_prefix_units"] > 0 for row in matched_rows) cross_bucket_edge_count = sum(row["is_cross_bucket"] for row in matched_rows) bucket_rows.append( { "bucket": bucket_label, "edge_count": len(matched_rows), "edge_fraction": safe_div(len(matched_rows), total_edge_count), "reusable_edge_count": reusable_edge_count, "cross_bucket_edge_count": cross_bucket_edge_count, "cross_bucket_edge_fraction": safe_div(cross_bucket_edge_count, len(matched_rows)), "shared_prefix_units_sum": shared_prefix_units, "cross_bucket_shared_prefix_units_sum": cross_bucket_shared_prefix_units, "cross_bucket_shared_prefix_unit_fraction": safe_div( cross_bucket_shared_prefix_units, shared_prefix_units ), } ) summary = { "edge_count": total_edge_count, "reusable_edge_count": total_reusable_edge_count, "cross_bucket_edge_count": total_cross_bucket_edge_count, "cross_bucket_edge_fraction": safe_div(total_cross_bucket_edge_count, total_edge_count), "shared_prefix_units_sum": total_shared_prefix_units, "cross_bucket_shared_prefix_units_sum": total_cross_bucket_shared_prefix_units, "cross_bucket_shared_prefix_unit_fraction": safe_div( total_cross_bucket_shared_prefix_units, total_shared_prefix_units ), "bucket_rows": bucket_rows, } return summary, bucket_rows def write_session_bucket_boundary_miss_from_rows(parent_child_rows, advanced_dir, bucket_defs=None): advanced_dir = ensure_output_dir(advanced_dir) summary, bucket_rows = summarize_session_bucket_boundary_miss(parent_child_rows, bucket_defs=bucket_defs) csv_path = write_csv(advanced_dir / "session_bucket_boundary_miss.csv", bucket_rows) plot_path = plot_grouped_bar_chart( advanced_dir / "session_bucket_boundary_miss.png", bucket_rows, label_key="bucket", series_keys=[ ("cross_bucket_edge_fraction", "cross_bucket_edge_fraction"), ("cross_bucket_shared_prefix_unit_fraction", "cross_bucket_shared_prefix_unit_fraction"), ], title="Session Bucket Boundary Miss by Child Input-Length Bucket", xlabel="Child input-length bucket", ylabel="Fraction", ) summary_path = advanced_dir / "session_bucket_boundary_miss_summary.json" with open(summary_path, "w", encoding="utf-8") as handle: json.dump(summary, handle, ensure_ascii=False, indent=2) return summary, summary_path, csv_path, plot_path def build_alive_block_timeline(block_rows): events = Counter() for row in block_rows: start_ms = safe_int(row.get("first_seen_ms")) end_ms = safe_int(row.get("span_end_ms", row.get("last_reuse_ms", row.get("first_seen_ms")))) if start_ms <= 0 and end_ms <= 0: continue if end_ms < start_ms: end_ms = start_ms events[start_ms] += 1 events[end_ms + 1] -= 1 alive_rows = [] alive_count = 0 peak_alive_blocks = 0 for timestamp_ms in sorted(events): alive_count += events[timestamp_ms] peak_alive_blocks = max(peak_alive_blocks, alive_count) alive_rows.append( { "timestamp_ms": timestamp_ms, "delta_alive_blocks": events[timestamp_ms], "alive_block_count": alive_count, } ) summary = { "event_count": len(alive_rows), "peak_alive_blocks": peak_alive_blocks, "first_timestamp_ms": alive_rows[0]["timestamp_ms"] if alive_rows else 0, "last_timestamp_ms": alive_rows[-1]["timestamp_ms"] if alive_rows else 0, } return summary, alive_rows def _normalize_source_payload(row): meta = row.get("meta", {}) if isinstance(row.get("meta", {}), dict) else {} usage = row.get("usage", {}) if isinstance(row.get("usage", {}), dict) else {} messages = row.get("messages") if not isinstance(messages, list): messages = row.get("message_events", []) role_sequence = row.get("role_sequence", []) if not role_sequence and isinstance(messages, list): role_sequence = [str(message.get("role", "")) for message in messages if isinstance(message, dict)] declared_tools = row.get("declared_tools", []) return { "meta": meta, "usage": usage, "canonical_prompt": str(row.get("canonical_prompt", "")), "declared_tools": [tool for tool in declared_tools if isinstance(tool, dict)], "messages": [message for message in messages if isinstance(message, dict)], "role_sequence": [str(role) for role in role_sequence], } def _iter_source_message_minimal(source_path): with open(source_path, "r", encoding="utf-8") as handle: for line in handle: payload = _normalize_source_payload(json.loads(line)) meta = payload["meta"] yield { "request_id": meta["request_id"], "session_id": meta["session_id"], "request_ready_time_ms": safe_int(meta.get("request_ready_time_ms")), "request_end_time_ms": safe_int(meta.get("request_end_time_ms")), "messages": payload["messages"], "role_sequence": payload["role_sequence"], } def _message_signature(message): return message.get("role", "") def _common_prefix_message_count(previous_messages, current_messages): count = 0 for previous, current in zip(previous_messages, current_messages): if _message_signature(previous) != _message_signature(current): break count += 1 return count def _classify_transition(previous_messages, current_messages): common_prefix_count = _common_prefix_message_count(previous_messages, current_messages) appended_messages = current_messages[common_prefix_count:] appended_roles = [message.get("role", "unknown") for message in appended_messages] first_new_role = appended_roles[0] if appended_roles else "" if not appended_messages: if len(current_messages) < len(previous_messages): return { "common_prefix_message_count": common_prefix_count, "appended_message_count": 0, "appended_roles": "", "first_new_role": "", "trigger_group": "compaction", "trigger_detail": "context_shrunk_without_append", "appended_text_len": 0, } return { "common_prefix_message_count": common_prefix_count, "appended_message_count": 0, "appended_roles": "", "first_new_role": "", "trigger_group": "no_change", "trigger_detail": "no_new_messages", "appended_text_len": 0, } appended_text_len = sum(safe_int(message.get("text_len")) for message in appended_messages) appended_roles_head = ";".join(appended_roles[:8]) if len(appended_roles) > 8: appended_roles_head += ";..." trigger_group = "unknown" trigger_detail = "unknown" if first_new_role == "user": trigger_group = "user" trigger_detail = "user_first" elif first_new_role == "tool": trigger_group = "tool" trigger_detail = "tool_first" elif first_new_role == "assistant": roles_after_first = appended_roles[1:] if "tool" in roles_after_first and ("user" not in roles_after_first or roles_after_first.index("tool") < roles_after_first.index("user")): trigger_group = "tool" trigger_detail = "assistant_then_tool" elif "user" in roles_after_first: trigger_group = "user" trigger_detail = "assistant_then_user" else: trigger_group = "assistant" trigger_detail = "assistant_first" elif first_new_role == "system": trigger_group = "system" trigger_detail = "system_first" else: trigger_group = first_new_role or "unknown" trigger_detail = f"{trigger_group}_first" if first_new_role else "unknown" return { "common_prefix_message_count": common_prefix_count, "appended_message_count": len(appended_messages), "appended_roles": appended_roles_head, "first_new_role": first_new_role, "trigger_group": trigger_group, "trigger_detail": trigger_detail, "appended_text_len": appended_text_len, } def build_transition_markdown_section(transition_summary): if not transition_summary: return "" lines = [ "## Session Transition Analysis", "- This section analyzes each `prev_request -> next_request` transition inside a session.", f"- Transition count: {transition_summary['transition_count']}", f"- Negative signed-delta transitions: {transition_summary['negative_delta_count']} ({transition_summary['negative_delta_fraction']:.4f})", f"- Trigger proportions: {json.dumps(transition_summary['trigger_group_stats'], ensure_ascii=False)}", f"- Source scope proportions: {json.dumps(transition_summary['source_scope_stats'], ensure_ascii=False)}", f"- Signed context delta ratio stats vs current context: {json.dumps(transition_summary['delta_fraction_current_stats'], ensure_ascii=False)}", f"- Absolute context delta ratio stats vs current context: {json.dumps(transition_summary['abs_delta_fraction_current_stats'], ensure_ascii=False)}", f"- Uncached prompt fraction stats vs current context: {json.dumps(transition_summary['uncached_fraction_current_stats'], ensure_ascii=False)}", f"- Source gap stats (ms): {json.dumps(transition_summary['source_gap_ms_stats'], ensure_ascii=False)}", "", "Trigger groups:", "| trigger_group | count | fraction | negative_delta_fraction | p50_abs_delta_fraction_of_current | p90_abs_delta_fraction_of_current | p50_uncached_fraction_of_current | p90_uncached_fraction_of_current |", "| --- | --- | --- | --- | --- | --- | --- | --- |", ] for row in transition_summary["trigger_group_rows"]: lines.append( f"| {row['trigger_group']} | {row['count']} | {row['fraction']:.4f} | " f"{row['negative_delta_fraction']:.4f} | " f"{row['p50_abs_delta_fraction_of_current']:.4f} | {row['p90_abs_delta_fraction_of_current']:.4f} | " f"{row['p50_uncached_fraction_of_current']:.4f} | {row['p90_uncached_fraction_of_current']:.4f} |" ) lines.extend( [ "", "KV-cache source scope:", "| source_scope | count | fraction | negative_delta_fraction | p50_source_gap_ms | p90_source_gap_ms |", "| --- | --- | --- | --- | --- | --- |", ] ) for row in transition_summary["source_scope_rows"]: lines.append( f"| {row['source_scope']} | {row['count']} | {row['fraction']:.4f} | " f"{row['negative_delta_fraction']:.4f} | " f"{row['p50_source_gap_ms']:.1f} | {row['p90_source_gap_ms']:.1f} |" ) return "\n".join(lines) def build_retokenized_transition_markdown_section(retokenized_transition_summary, provider_transition_summary=None): if not retokenized_transition_summary: return "" lines = [ "## Retokenized Transition Length Analysis", "- This section recomputes session transition length change using analyzer-retokenized `canonical_prompt` length (`theoretical_prompt_unit_length`) instead of provider `usage.input_tokens`.", f"- Transition count: {retokenized_transition_summary['transition_count']}", f"- Negative signed-delta transitions: {retokenized_transition_summary['negative_delta_count']} ({retokenized_transition_summary['negative_delta_fraction']:.4f})", f"- Signed context delta ratio stats vs current retokenized context: {json.dumps(retokenized_transition_summary['delta_fraction_current_stats'], ensure_ascii=False)}", f"- Absolute context delta ratio stats vs current retokenized context: {json.dumps(retokenized_transition_summary['abs_delta_fraction_current_stats'], ensure_ascii=False)}", f"- Delta token stats (`next_retokenized - prev_retokenized`): {json.dumps(retokenized_transition_summary['delta_tokens_stats'], ensure_ascii=False)}", f"- Absolute delta token stats: {json.dumps(retokenized_transition_summary['abs_delta_tokens_stats'], ensure_ascii=False)}", ] if provider_transition_summary: lines.extend( [ f"- Provider-length negative signed-delta fraction: {provider_transition_summary['negative_delta_fraction']:.4f}", f"- Retokenized-length negative signed-delta fraction: {retokenized_transition_summary['negative_delta_fraction']:.4f}", ] ) lines.extend( [ "", "Trigger groups:", "| trigger_group | count | fraction | negative_delta_fraction | p10_signed_delta_fraction_of_current | p50_signed_delta_fraction_of_current | p90_signed_delta_fraction_of_current | p95_signed_delta_fraction_of_current | p50_abs_delta_fraction_of_current | p90_abs_delta_fraction_of_current |", "| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |", ] ) for row in retokenized_transition_summary["trigger_group_rows"]: lines.append( f"| {row['trigger_group']} | {row['count']} | {row['fraction']:.4f} | {row['negative_delta_fraction']:.4f} | " f"{row['p10_signed_delta_fraction_of_current']:.4f} | {row['p50_signed_delta_fraction_of_current']:.4f} | " f"{row['p90_signed_delta_fraction_of_current']:.4f} | {row['p95_signed_delta_fraction_of_current']:.4f} | " f"{row['p50_abs_delta_fraction_of_current']:.4f} | {row['p90_abs_delta_fraction_of_current']:.4f} |" ) return "\n".join(lines) def _load_request_metric_lookup(request_metrics_path): lookup = {} with open(request_metrics_path, "r", encoding="utf-8") as handle: for row in csv.DictReader(handle): lookup[row.get("request_id", "")] = { "request_id": row.get("request_id", ""), "session_id": row.get("session_id", ""), "assistant_msg_count": safe_int(row.get("assistant_msg_count")), "tool_msg_count": safe_int(row.get("tool_msg_count")), "user_msg_count": safe_int(row.get("user_msg_count")), "system_msg_count": safe_int(row.get("system_msg_count")), "declared_tool_count": safe_int(row.get("declared_tool_count")), "input_tokens": safe_int(row.get("input_tokens")), "theoretical_prompt_unit_length": safe_int(row.get("theoretical_prompt_unit_length")), "cache_hit_ratio": safe_float(row.get("cache_hit_ratio")), "uncached_prompt_tokens": safe_int(row.get("uncached_prompt_tokens")), } return lookup def _load_retokenized_transition_lookup(path): lookup = {} with open(path, "r", encoding="utf-8") as handle: for row in csv.DictReader(handle): lookup[(row.get("prev_request_id", ""), row.get("next_request_id", ""))] = { "delta_retokenized_prompt_tokens": safe_int(row.get("delta_retokenized_prompt_tokens")), "delta_retokenized_fraction_of_current": safe_float( row.get("delta_retokenized_fraction_of_current") ), "prev_retokenized_prompt_tokens": safe_int(row.get("prev_retokenized_prompt_tokens")), "next_retokenized_prompt_tokens": safe_int(row.get("next_retokenized_prompt_tokens")), } return lookup def _classify_context_change_mechanism(row): trigger_group = row["trigger_group"] prev_count = row["prev_message_count"] next_count = row["next_message_count"] common_prefix = row["common_prefix_message_count"] if trigger_group == "compaction": return "compaction" if trigger_group == "no_change" and row["appended_message_count"] == 0: return "no_change_role_stable" hard_reset_next_threshold = max(4, int(prev_count * 0.20)) hard_reset_prefix_threshold = max(2, int(prev_count * 0.10)) if next_count <= hard_reset_next_threshold and common_prefix <= hard_reset_prefix_threshold: return f"{trigger_group}_hard_reset" if next_count < prev_count: return f"{trigger_group}_history_trimmed" return f"{trigger_group}_context_rebuilt_shorter" def _message_slice_summary(messages, offset=0, top_n=5): role_counts = Counter() role_text_lens = Counter() total_text_len = 0 items = [] for index, message in enumerate(messages): role = message.get("role", "unknown") text_len = safe_int(message.get("text_len")) role_counts[role] += 1 role_text_lens[role] += text_len total_text_len += text_len items.append( { "index": offset + index, "role": role, "text_len": text_len, "content_type": message.get("content_type", ""), "item_count": safe_int(message.get("item_count")), "has_cache_control": bool(message.get("has_cache_control")), } ) items.sort(key=lambda item: item["text_len"], reverse=True) return { "count": len(messages), "total_text_len": total_text_len, "role_counts": dict(role_counts), "role_text_lens": dict(role_text_lens), "top_messages": items[:top_n], } def _format_counter(counter_dict): if not counter_dict: return "-" items = sorted(counter_dict.items(), key=lambda item: (-item[1], item[0])) return ", ".join(f"{key}={value}" for key, value in items) def _format_top_messages(items): if not items: return "-" parts = [] for item in items: cache_tag = " cache" if item["has_cache_control"] else "" parts.append( f"#{item['index']}:{item['role']} len={item['text_len']} " f"type={item['content_type']} items={item['item_count']}{cache_tag}" ) return " | ".join(parts) def _load_source_payloads(source_path, request_ids): source_path = Path(source_path) wanted_request_ids = {request_id for request_id in request_ids if request_id} payloads = {} if not wanted_request_ids: return payloads try: cmd = ["rg", "-F", "--no-heading"] for request_id in sorted(wanted_request_ids): cmd.extend(["-e", request_id]) cmd.append(str(source_path)) result = subprocess.run( cmd, capture_output=True, text=True, encoding="utf-8", check=False, ) if result.returncode in {0, 1}: for line in result.stdout.splitlines(): payload = _normalize_source_payload(json.loads(line)) request_id = payload["meta"].get("request_id", "") if request_id in wanted_request_ids: payloads[request_id] = payload except Exception: pass if len(payloads) >= len(wanted_request_ids): return payloads with open(source_path, "r", encoding="utf-8") as handle: for line in handle: payload = _normalize_source_payload(json.loads(line)) request_id = payload["meta"].get("request_id", "") if request_id in wanted_request_ids: payloads[request_id] = payload if len(payloads) >= len(wanted_request_ids): break return payloads def write_representative_pair_raw_messages( source_path, representative_pairs_path, output_path, ): source_path = Path(source_path) representative_pairs_path = Path(representative_pairs_path) output_path = Path(output_path) with open(representative_pairs_path, "r", encoding="utf-8") as handle: representative_pairs = json.load(handle) pairs = representative_pairs.get("pairs", []) request_to_pair_refs = defaultdict(list) for pair in pairs: prev_request_id = pair.get("prev_request_id", "") next_request_id = pair.get("next_request_id", "") if prev_request_id: request_to_pair_refs[prev_request_id].append(("prev", pair)) if next_request_id: request_to_pair_refs[next_request_id].append(("next", pair)) request_payloads = _load_source_payloads(source_path, request_to_pair_refs.keys()) output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, "w", encoding="utf-8") as handle: for pair in pairs: prev_payload = request_payloads.get(pair.get("prev_request_id", ""), {}) next_payload = request_payloads.get(pair.get("next_request_id", ""), {}) row = { "category": pair.get("category", ""), "session_id": pair.get("session_id", ""), "trigger_group": pair.get("trigger_group", ""), "trigger_detail": pair.get("trigger_detail", ""), "source_scope": pair.get("source_scope", ""), "source_gap_ms": pair.get("source_gap_ms", 0), "gap_from_prev_ready_ms": pair.get("gap_from_prev_ready_ms", 0), "gap_from_prev_end_ms": pair.get("gap_from_prev_end_ms", 0), "prev_request_id": pair.get("prev_request_id", ""), "next_request_id": pair.get("next_request_id", ""), "provider": { "prev_input_tokens": pair.get("prev_input_tokens", 0), "next_input_tokens": pair.get("next_input_tokens", 0), "delta_input_tokens": pair.get("delta_input_tokens", 0), "delta_input_fraction_of_current": pair.get("delta_input_fraction_of_current", 0.0), }, "retokenized": { "prev_prompt_tokens": pair.get("prev_retokenized_prompt_tokens", 0), "next_prompt_tokens": pair.get("next_retokenized_prompt_tokens", 0), "delta_prompt_tokens": pair.get("delta_retokenized_prompt_tokens", 0), "delta_fraction_of_current": pair.get("delta_retokenized_fraction_of_current", 0.0), }, "prev": { "meta": prev_payload.get("meta", {}), "usage": prev_payload.get("usage", {}), "role_sequence": prev_payload.get("role_sequence", []), "messages": prev_payload.get("messages", []), "declared_tools": prev_payload.get("declared_tools", []), }, "next": { "meta": next_payload.get("meta", {}), "usage": next_payload.get("usage", {}), "role_sequence": next_payload.get("role_sequence", []), "messages": next_payload.get("messages", []), "declared_tools": next_payload.get("declared_tools", []), }, } handle.write(json.dumps(row, ensure_ascii=False) + "\n") return output_path def build_context_change_markdown_section(context_change_summary): if not context_change_summary: return "" lines = [ "## Context Change Deep Dive", "- This section focuses on why `input_tokens` can shrink between two consecutive requests in the same session.", "- We join provider-length transitions with retokenized transitions to separate true prompt shrink from provider-only accounting / serialization shrink.", "- Important caveat: `common_prefix_message_count` is based on message-role alignment, not exact message-content equality. The representative cases below therefore explain structural change at the message-summary level (`role`, `text_len`, `item_count`).", f"- Negative provider-length transitions: {context_change_summary['negative_provider_transition_count']} " f"({context_change_summary['negative_provider_fraction_of_all_transitions']:.4f} of all transitions)", f"- Sign agreement on those negative provider transitions: {json.dumps(context_change_summary['agreement_stats'], ensure_ascii=False)}", f"- Representative cases: `{DETAILS_DIR_NAME}/context_change_casebook.md`", "", "Generated figures:", f"- ![Context Change Mechanism Mix]({DETAILS_DIR_NAME}/context_change_mechanism_counts.png)", f"- ![Provider vs Retokenized Shrink Agreement by Mechanism]({DETAILS_DIR_NAME}/context_change_sign_agreement.png)", "", "Mechanism summary:", "| category | count | fraction_of_negative_provider | both_shrink_fraction | provider_only_shrink_fraction | p50_provider_delta_fraction_of_current | p50_retokenized_delta_fraction_of_current | p50_prev_message_count | p50_next_message_count | p50_common_prefix_message_count | p50_prev_tool_msg_count | p50_next_tool_msg_count |", "| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |", ] for row in context_change_summary["category_rows"]: lines.append( f"| {row['category']} | {row['count']} | {row['fraction_of_negative_provider']:.4f} | " f"{row['both_shrink_fraction']:.4f} | {row['provider_only_shrink_fraction']:.4f} | " f"{row['p50_provider_delta_fraction_of_current']:.4f} | " f"{row['p50_retokenized_delta_fraction_of_current']:.4f} | " f"{row['p50_prev_message_count']:.1f} | {row['p50_next_message_count']:.1f} | " f"{row['p50_common_prefix_message_count']:.1f} | " f"{row['p50_prev_tool_msg_count']:.1f} | {row['p50_next_tool_msg_count']:.1f} |" ) return "\n".join(lines) def write_context_change_deep_dive_from_existing(source_path, request_metrics_path, advanced_dir): advanced_dir = ensure_output_dir(Path(advanced_dir)) provider_edges_path = advanced_dir / "session_transition_edges.csv" retokenized_edges_path = advanced_dir / "session_transition_retokenized_edges.csv" request_lookup = _load_request_metric_lookup(request_metrics_path) retokenized_lookup = _load_retokenized_transition_lookup(retokenized_edges_path) negative_rows = [] category_values = defaultdict( lambda: { "provider_delta_fraction_of_current": [], "retokenized_delta_fraction_of_current": [], "prev_message_count": [], "next_message_count": [], "common_prefix_message_count": [], "prev_tool_msg_count": [], "next_tool_msg_count": [], "both_shrink_count": 0, "provider_only_shrink_count": 0, } ) agreement_counter = Counter() transition_count = 0 with open(provider_edges_path, "r", encoding="utf-8") as handle: for row in csv.DictReader(handle): transition_count += 1 provider_delta_fraction = safe_float(row.get("delta_input_fraction_of_current")) if provider_delta_fraction >= 0: continue prev_request_id = row.get("prev_request_id", "") next_request_id = row.get("next_request_id", "") retokenized = retokenized_lookup.get((prev_request_id, next_request_id), {}) retokenized_delta_fraction = safe_float(retokenized.get("delta_retokenized_fraction_of_current")) retokenized_negative = retokenized_delta_fraction < 0 category = _classify_context_change_mechanism( { "trigger_group": row.get("trigger_group", ""), "prev_message_count": safe_int(row.get("prev_message_count")), "next_message_count": safe_int(row.get("next_message_count")), "common_prefix_message_count": safe_int(row.get("common_prefix_message_count")), "appended_message_count": safe_int(row.get("appended_message_count")), } ) prev_metrics = request_lookup.get(prev_request_id, {}) next_metrics = request_lookup.get(next_request_id, {}) joined = { "session_id": row.get("session_id", ""), "prev_request_id": prev_request_id, "next_request_id": next_request_id, "trigger_group": row.get("trigger_group", ""), "trigger_detail": row.get("trigger_detail", ""), "source_scope": row.get("source_scope", ""), "source_gap_ms": safe_int(row.get("source_gap_ms")), "gap_from_prev_ready_ms": safe_int(row.get("gap_from_prev_ready_ms")), "gap_from_prev_end_ms": safe_int(row.get("gap_from_prev_end_ms")), "prev_input_tokens": safe_int(row.get("prev_input_tokens")), "next_input_tokens": safe_int(row.get("next_input_tokens")), "delta_input_tokens": safe_int(row.get("delta_input_tokens")), "delta_input_fraction_of_current": provider_delta_fraction, "next_cache_hit_ratio": safe_float(row.get("next_cache_hit_ratio")), "uncached_fraction_of_current": safe_float(row.get("uncached_fraction_of_current")), "prev_message_count": safe_int(row.get("prev_message_count")), "next_message_count": safe_int(row.get("next_message_count")), "common_prefix_message_count": safe_int(row.get("common_prefix_message_count")), "appended_message_count": safe_int(row.get("appended_message_count")), "appended_roles": row.get("appended_roles", ""), "category": category, "prev_tool_msg_count": prev_metrics.get("tool_msg_count", 0), "next_tool_msg_count": next_metrics.get("tool_msg_count", 0), "prev_assistant_msg_count": prev_metrics.get("assistant_msg_count", 0), "next_assistant_msg_count": next_metrics.get("assistant_msg_count", 0), "prev_user_msg_count": prev_metrics.get("user_msg_count", 0), "next_user_msg_count": next_metrics.get("user_msg_count", 0), "prev_system_msg_count": prev_metrics.get("system_msg_count", 0), "next_system_msg_count": next_metrics.get("system_msg_count", 0), "prev_retokenized_prompt_tokens": retokenized.get("prev_retokenized_prompt_tokens", 0), "next_retokenized_prompt_tokens": retokenized.get("next_retokenized_prompt_tokens", 0), "delta_retokenized_prompt_tokens": retokenized.get("delta_retokenized_prompt_tokens", 0), "delta_retokenized_fraction_of_current": retokenized_delta_fraction, "retokenized_negative": 1 if retokenized_negative else 0, } negative_rows.append(joined) bucket = "both_shrink" if retokenized_negative else "provider_only_shrink" agreement_counter[bucket] += 1 values = category_values[category] values["provider_delta_fraction_of_current"].append(provider_delta_fraction) values["retokenized_delta_fraction_of_current"].append(retokenized_delta_fraction) values["prev_message_count"].append(joined["prev_message_count"]) values["next_message_count"].append(joined["next_message_count"]) values["common_prefix_message_count"].append(joined["common_prefix_message_count"]) values["prev_tool_msg_count"].append(joined["prev_tool_msg_count"]) values["next_tool_msg_count"].append(joined["next_tool_msg_count"]) if retokenized_negative: values["both_shrink_count"] += 1 else: values["provider_only_shrink_count"] += 1 category_rows = [] for category, values in sorted( category_values.items(), key=lambda item: len(item[1]["provider_delta_fraction_of_current"]), reverse=True, ): count = len(values["provider_delta_fraction_of_current"]) category_rows.append( { "category": category, "count": count, "fraction_of_negative_provider": safe_div(count, len(negative_rows)), "both_shrink_fraction": safe_div(values["both_shrink_count"], count), "provider_only_shrink_fraction": safe_div(values["provider_only_shrink_count"], count), "p50_provider_delta_fraction_of_current": percentile( values["provider_delta_fraction_of_current"], 0.50 ), "p50_retokenized_delta_fraction_of_current": percentile( values["retokenized_delta_fraction_of_current"], 0.50 ), "p50_prev_message_count": percentile(values["prev_message_count"], 0.50), "p50_next_message_count": percentile(values["next_message_count"], 0.50), "p50_common_prefix_message_count": percentile( values["common_prefix_message_count"], 0.50 ), "p50_prev_tool_msg_count": percentile(values["prev_tool_msg_count"], 0.50), "p50_next_tool_msg_count": percentile(values["next_tool_msg_count"], 0.50), } ) write_csv(advanced_dir / "context_change_mechanism_summary.csv", category_rows) plot_bar_chart( advanced_dir / "context_change_mechanism_counts.png", category_rows, label_key="category", value_key="count", title="Negative Provider-Length Transition Mechanisms", xlabel="Transition count", ylabel="Mechanism", top_n=min(12, len(category_rows)), ) plot_grouped_bar_chart( advanced_dir / "context_change_sign_agreement.png", category_rows[: min(12, len(category_rows))], label_key="category", series_keys=[ ("both_shrink_fraction", "both_shrink_fraction"), ("provider_only_shrink_fraction", "provider_only_shrink_fraction"), ], title="Provider vs Retokenized Shrink Agreement by Mechanism", xlabel="Mechanism", ylabel="Fraction of negative provider transitions", ) chosen_rows = {} for category in [row["category"] for row in category_rows]: matched = [row for row in negative_rows if row["category"] == category] if not matched: continue median_delta = percentile( [row["delta_input_fraction_of_current"] for row in matched], 0.50, ) chosen_rows[category] = min( matched, key=lambda row: abs(row["delta_input_fraction_of_current"] - median_delta), ) wanted_request_ids = set() for row in chosen_rows.values(): wanted_request_ids.add(row["prev_request_id"]) wanted_request_ids.add(row["next_request_id"]) request_messages = { request_id: payload.get("messages", []) for request_id, payload in _load_source_payloads(source_path, wanted_request_ids).items() } casebook_lines = [ "# Context Change Deep Dive", "", "This file explains why provider-length context shrinks happen between consecutive requests inside the same session.", "", "- Scope: only transitions with `delta_input_fraction_of_current < 0` under provider `usage.input_tokens`.", "- Comparison: each transition is joined with retokenized prompt length to separate structural shrink from provider-only shrink.", "- Caveat: the prefix comparison is role-based, not content-hash based, so the case explanations are structural summaries rather than exact diff hunks.", "", "## Mechanism Summary", "| category | count | fraction_of_negative_provider | both_shrink_fraction | provider_only_shrink_fraction | p50_provider_delta_fraction_of_current | p50_retokenized_delta_fraction_of_current | p50_prev_message_count | p50_next_message_count | p50_common_prefix_message_count |", "| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |", ] for row in category_rows: casebook_lines.append( f"| {row['category']} | {row['count']} | {row['fraction_of_negative_provider']:.4f} | " f"{row['both_shrink_fraction']:.4f} | {row['provider_only_shrink_fraction']:.4f} | " f"{row['p50_provider_delta_fraction_of_current']:.4f} | " f"{row['p50_retokenized_delta_fraction_of_current']:.4f} | " f"{row['p50_prev_message_count']:.1f} | {row['p50_next_message_count']:.1f} | " f"{row['p50_common_prefix_message_count']:.1f} |" ) for row in category_rows: category = row["category"] chosen = chosen_rows.get(category) if not chosen: continue prev_messages = request_messages.get(chosen["prev_request_id"], []) next_messages = request_messages.get(chosen["next_request_id"], []) common_prefix = min(chosen["common_prefix_message_count"], len(prev_messages), len(next_messages)) prev_prefix_summary = _message_slice_summary(prev_messages[:common_prefix], 0) removed_summary = _message_slice_summary(prev_messages[common_prefix:], common_prefix) added_summary = _message_slice_summary(next_messages[common_prefix:], common_prefix) prev_full_summary = _message_slice_summary(prev_messages, 0) next_full_summary = _message_slice_summary(next_messages, 0) casebook_lines.extend( [ "", f"## {category}", f"- Pair: `{chosen['prev_request_id']}` -> `{chosen['next_request_id']}` in session `{chosen['session_id']}`", f"- Trigger: `{chosen['trigger_group']}` / `{chosen['trigger_detail']}`; source scope: `{chosen['source_scope']}`; source gap: `{chosen['source_gap_ms']} ms`", f"- Provider tokens: `{chosen['prev_input_tokens']}` -> `{chosen['next_input_tokens']}` (`{chosen['delta_input_tokens']}`), signed delta/current=`{chosen['delta_input_fraction_of_current']:.4f}`", f"- Retokenized tokens: `{chosen['prev_retokenized_prompt_tokens']}` -> `{chosen['next_retokenized_prompt_tokens']}` (`{chosen['delta_retokenized_prompt_tokens']}`), signed delta/current=`{chosen['delta_retokenized_fraction_of_current']:.4f}`", f"- Message counts: `{chosen['prev_message_count']}` -> `{chosen['next_message_count']}`, role-prefix-aligned common prefix=`{chosen['common_prefix_message_count']}`, appended count=`{chosen['appended_message_count']}`", f"- Cache on next request: hit=`{chosen['next_cache_hit_ratio']:.4f}`, uncached/current=`{chosen['uncached_fraction_of_current']:.4f}`, gap ready->ready=`{chosen['gap_from_prev_ready_ms']} ms`, end->ready=`{chosen['gap_from_prev_end_ms']} ms`", f"- Prev role counts: `{_format_counter(prev_full_summary['role_counts'])}`; next role counts: `{_format_counter(next_full_summary['role_counts'])}`", f"- Prev role text lens: `{_format_counter(prev_full_summary['role_text_lens'])}`; next role text lens: `{_format_counter(next_full_summary['role_text_lens'])}`", f"- Prefix kept: `{prev_prefix_summary['count']}` msgs, total_text_len=`{prev_prefix_summary['total_text_len']}`, roles=`{_format_counter(prev_prefix_summary['role_counts'])}`", f"- Removed tail from previous prompt: `{removed_summary['count']}` msgs, total_text_len=`{removed_summary['total_text_len']}`, roles=`{_format_counter(removed_summary['role_counts'])}`", f"- Added tail in next prompt: `{added_summary['count']}` msgs, total_text_len=`{added_summary['total_text_len']}`, roles=`{_format_counter(added_summary['role_counts'])}`", f"- Largest removed messages: `{_format_top_messages(removed_summary['top_messages'])}`", f"- Largest added messages: `{_format_top_messages(added_summary['top_messages'])}`", ] ) casebook_path = advanced_dir / "context_change_casebook.md" casebook_path.write_text("\n".join(casebook_lines) + "\n", encoding="utf-8") representative_pairs = { "pair_count": len(chosen_rows), "pairs": [ { "category": category, **chosen_rows[category], } for category in [row["category"] for row in category_rows] if category in chosen_rows ], } representative_pairs_path = advanced_dir / "context_change_representative_pairs.json" representative_pairs_path.write_text( json.dumps(representative_pairs, ensure_ascii=False, indent=2), encoding="utf-8", ) summary = { "negative_provider_transition_count": len(negative_rows), "negative_provider_fraction_of_all_transitions": safe_div(len(negative_rows), transition_count), "agreement_stats": { "both_shrink_fraction": safe_div(agreement_counter["both_shrink"], len(negative_rows)), "provider_only_shrink_fraction": safe_div( agreement_counter["provider_only_shrink"], len(negative_rows) ), }, "category_rows": category_rows, "casebook_path": str(casebook_path), "representative_pairs_path": str(representative_pairs_path), } summary_path = advanced_dir / "context_change_summary.json" with open(summary_path, "w", encoding="utf-8") as handle: json.dump(summary, handle, ensure_ascii=False, indent=2) return summary, summary_path, casebook_path, representative_pairs_path def write_transition_outputs_from_existing(source_path, request_metrics_path, advanced_dir): advanced_dir = ensure_output_dir(advanced_dir) transition_edges_path = advanced_dir / "session_transition_edges.csv" request_lookup = {} session_last = {} trigger_group_counter = Counter() source_scope_counter = Counter() negative_delta_count = 0 negative_delta_by_trigger = Counter() negative_delta_by_source_scope = Counter() trigger_group_values = defaultdict( lambda: { "delta_fraction_of_current": [], "abs_delta_fraction_of_current": [], "uncached_fraction_of_current": [], } ) source_scope_gaps = defaultdict(list) delta_fraction_current_values = [] abs_delta_fraction_current_values = [] uncached_fraction_current_values = [] source_gap_ms_values = [] with open(request_metrics_path, "r", encoding="utf-8") as metrics_handle, open( transition_edges_path, "w", encoding="utf-8", newline="" ) as output_handle: metrics_reader = csv.DictReader(metrics_handle) normalized_iter = _iter_source_message_minimal(source_path) writer = None for normalized_row, metric_row in zip(normalized_iter, metrics_reader): if normalized_row["request_id"] != metric_row.get("request_id", ""): raise ValueError( f"request order mismatch between source trace and request metrics: " f"{normalized_row['request_id']} != {metric_row.get('request_id', '')}" ) current = { "request_id": metric_row["request_id"], "session_id": metric_row["session_id"], "request_ready_time_ms": safe_int(metric_row.get("request_ready_time_ms")), "request_end_time_ms": safe_int(metric_row.get("request_end_time_ms")), "input_tokens": safe_int(metric_row.get("input_tokens")), "uncached_prompt_tokens": safe_int(metric_row.get("uncached_prompt_tokens")), "cache_hit_ratio": safe_float(metric_row.get("cache_hit_ratio")), "theoretical_source_request_id": metric_row.get("theoretical_source_request_id", ""), "messages": normalized_row["messages"], } request_lookup[current["request_id"]] = { "session_id": current["session_id"], "request_ready_time_ms": current["request_ready_time_ms"], } previous = session_last.get(current["session_id"]) if previous is not None: transition = _classify_transition(previous["messages"], current["messages"]) delta_input_tokens = current["input_tokens"] - previous["input_tokens"] abs_delta_fraction_of_current = safe_div(abs(delta_input_tokens), current["input_tokens"]) delta_fraction_of_prev = safe_div(delta_input_tokens, previous["input_tokens"]) delta_fraction_of_current = safe_div(delta_input_tokens, current["input_tokens"]) uncached_fraction_of_current = safe_div(current["uncached_prompt_tokens"], current["input_tokens"]) source_request_id = current["theoretical_source_request_id"] source_info = request_lookup.get(source_request_id) source_gap_ms = 0 if not source_request_id: source_scope = "none" elif source_request_id == previous["request_id"]: source_scope = "prev_round" source_gap_ms = max(current["request_ready_time_ms"] - previous["request_ready_time_ms"], 0) elif source_info is None: source_scope = "unknown" elif source_info["session_id"] == current["session_id"]: source_scope = "same_session_earlier" source_gap_ms = max(current["request_ready_time_ms"] - source_info["request_ready_time_ms"], 0) else: source_scope = "cross_session" source_gap_ms = max(current["request_ready_time_ms"] - source_info["request_ready_time_ms"], 0) row = { "session_id": current["session_id"], "prev_request_id": previous["request_id"], "next_request_id": current["request_id"], "prev_request_ready_time_ms": previous["request_ready_time_ms"], "prev_request_end_time_ms": previous["request_end_time_ms"], "next_request_ready_time_ms": current["request_ready_time_ms"], "gap_from_prev_ready_ms": max(current["request_ready_time_ms"] - previous["request_ready_time_ms"], 0), "gap_from_prev_end_ms": max(current["request_ready_time_ms"] - previous["request_end_time_ms"], 0), "prev_input_tokens": previous["input_tokens"], "next_input_tokens": current["input_tokens"], "delta_input_tokens": delta_input_tokens, "delta_input_fraction_of_prev": delta_fraction_of_prev, "delta_input_fraction_of_current": delta_fraction_of_current, "abs_delta_input_fraction_of_current": abs_delta_fraction_of_current, "next_uncached_prompt_tokens": current["uncached_prompt_tokens"], "uncached_fraction_of_current": uncached_fraction_of_current, "next_cache_hit_ratio": current["cache_hit_ratio"], "prev_message_count": len(previous["messages"]), "next_message_count": len(current["messages"]), **transition, "theoretical_source_request_id": source_request_id, "source_scope": source_scope, "source_gap_ms": source_gap_ms, } if writer is None: writer = csv.DictWriter(output_handle, fieldnames=list(row.keys())) writer.writeheader() writer.writerow(row) trigger_group_counter[row["trigger_group"]] += 1 source_scope_counter[source_scope] += 1 if delta_fraction_of_current < 0: negative_delta_count += 1 negative_delta_by_trigger[row["trigger_group"]] += 1 negative_delta_by_source_scope[source_scope] += 1 trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"].append(delta_fraction_of_current) trigger_group_values[row["trigger_group"]]["abs_delta_fraction_of_current"].append(abs_delta_fraction_of_current) trigger_group_values[row["trigger_group"]]["uncached_fraction_of_current"].append(uncached_fraction_of_current) source_scope_gaps[source_scope].append(source_gap_ms) delta_fraction_current_values.append(delta_fraction_of_current) abs_delta_fraction_current_values.append(abs_delta_fraction_of_current) uncached_fraction_current_values.append(uncached_fraction_of_current) if source_scope != "none": source_gap_ms_values.append(source_gap_ms) session_last[current["session_id"]] = current transition_count = sum(trigger_group_counter.values()) trigger_group_rows = [] for trigger_group, count in trigger_group_counter.most_common(): abs_delta_values = trigger_group_values[trigger_group]["abs_delta_fraction_of_current"] uncached_values = trigger_group_values[trigger_group]["uncached_fraction_of_current"] trigger_group_rows.append( { "trigger_group": trigger_group, "count": count, "fraction": safe_div(count, transition_count), "negative_delta_count": negative_delta_by_trigger[trigger_group], "negative_delta_fraction": safe_div(negative_delta_by_trigger[trigger_group], count), "p50_abs_delta_fraction_of_current": percentile(abs_delta_values, 0.5) if abs_delta_values else 0.0, "p90_abs_delta_fraction_of_current": percentile(abs_delta_values, 0.9) if abs_delta_values else 0.0, "p50_uncached_fraction_of_current": percentile(uncached_values, 0.5) if uncached_values else 0.0, "p90_uncached_fraction_of_current": percentile(uncached_values, 0.9) if uncached_values else 0.0, } ) source_scope_rows = [] for source_scope, count in source_scope_counter.most_common(): gap_values = [value for value in source_scope_gaps[source_scope] if value is not None] source_scope_rows.append( { "source_scope": source_scope, "count": count, "fraction": safe_div(count, transition_count), "negative_delta_count": negative_delta_by_source_scope[source_scope], "negative_delta_fraction": safe_div(negative_delta_by_source_scope[source_scope], count), "p50_source_gap_ms": percentile(gap_values, 0.5) if gap_values else 0.0, "p90_source_gap_ms": percentile(gap_values, 0.9) if gap_values else 0.0, } ) write_csv(advanced_dir / "transition_trigger_groups.csv", trigger_group_rows) write_csv(advanced_dir / "transition_source_scope.csv", source_scope_rows) write_csv( advanced_dir / "cdf_transition_delta_fraction_of_current.csv", build_cdf(delta_fraction_current_values), ) write_csv( advanced_dir / "cdf_transition_source_gap_ms.csv", build_cdf(source_gap_ms_values), ) write_csv( advanced_dir / "cdf_transition_abs_delta_fraction_of_current.csv", build_cdf(abs_delta_fraction_current_values), ) write_csv( advanced_dir / "cdf_transition_delta_fraction_by_trigger.csv", build_grouped_cdf_rows( { row["trigger_group"]: trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"] for row in trigger_group_rows }, "trigger_group", ), ) write_csv( advanced_dir / "cdf_transition_abs_delta_fraction_by_trigger.csv", build_grouped_cdf_rows( { row["trigger_group"]: trigger_group_values[row["trigger_group"]]["abs_delta_fraction_of_current"] for row in trigger_group_rows }, "trigger_group", ), ) write_csv( advanced_dir / "cdf_transition_uncached_fraction_by_trigger.csv", build_grouped_cdf_rows( { row["trigger_group"]: trigger_group_values[row["trigger_group"]]["uncached_fraction_of_current"] for row in trigger_group_rows }, "trigger_group", ), ) write_csv( advanced_dir / "cdf_transition_source_gap_ms_by_scope.csv", build_grouped_cdf_rows( { row["source_scope"]: source_scope_gaps[row["source_scope"]] for row in source_scope_rows if row["source_scope"] not in {"none"} and source_scope_gaps[row["source_scope"]] }, "source_scope", ), ) plot_bar_chart( advanced_dir / "transition_trigger_groups.png", trigger_group_rows, label_key="trigger_group", value_key="count", title="Session Transition Trigger Groups", xlabel="Transition count", ylabel="Trigger", top_n=min(12, len(trigger_group_rows)), ) plot_grouped_bar_chart( advanced_dir / "transition_context_change_by_trigger.png", trigger_group_rows[: min(8, len(trigger_group_rows))], label_key="trigger_group", series_keys=[ ("p50_abs_delta_fraction_of_current", "p50_abs_delta/current"), ("p90_abs_delta_fraction_of_current", "p90_abs_delta/current"), ("p50_uncached_fraction_of_current", "p50_uncached/current"), ], title="Context Change by Trigger Group", xlabel="Trigger group", ylabel="Fraction", ) plot_fraction_bar_chart( advanced_dir / "transition_source_scope.png", source_scope_rows, label_key="source_scope", value_key="fraction", title="KV-Cache Source Scope by Transition", ) plot_cdf_series( advanced_dir / "cdf_transition_delta_fraction_by_trigger.png", [ ( row["trigger_group"], build_cdf(trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"]), ) for row in trigger_group_rows if trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"] ], title="CDF of Signed Context Delta by Trigger Group", xlabel="(next_input_tokens - prev_input_tokens) / next_input_tokens", ) plot_cdf_series_with_zoom_windows( advanced_dir / "cdf_transition_delta_fraction_by_trigger_zoom.png", [ ( row["trigger_group"], build_cdf(trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"]), ) for row in trigger_group_rows if trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"] ], title="CDF of Signed Context Delta by Trigger Group with Central 80% / 90% Zoom", xlabel="(next_input_tokens - prev_input_tokens) / next_input_tokens", zoom_windows=[ (0.10, 0.90, "Central 80% (p10-p90)"), (0.05, 0.95, "Central 90% (p05-p95)"), ], ) plot_cdf_series( advanced_dir / "cdf_transition_abs_delta_fraction_by_trigger.png", [ ( row["trigger_group"], build_cdf(trigger_group_values[row["trigger_group"]]["abs_delta_fraction_of_current"]), ) for row in trigger_group_rows if trigger_group_values[row["trigger_group"]]["abs_delta_fraction_of_current"] ], title="CDF of Absolute Context Delta by Trigger Group", xlabel="abs(next_input_tokens - prev_input_tokens) / next_input_tokens", ) plot_cdf_series( advanced_dir / "cdf_transition_uncached_fraction_by_trigger.png", [ ( row["trigger_group"], build_cdf(trigger_group_values[row["trigger_group"]]["uncached_fraction_of_current"]), ) for row in trigger_group_rows if trigger_group_values[row["trigger_group"]]["uncached_fraction_of_current"] ], title="CDF of Uncached Prompt Fraction by Trigger Group", xlabel="uncached_prompt_tokens / next_input_tokens", ) plot_cdf_series( advanced_dir / "cdf_transition_source_gap_ms_by_scope.png", [ (row["source_scope"], build_cdf(source_scope_gaps[row["source_scope"]])) for row in source_scope_rows if row["source_scope"] not in {"none"} and source_scope_gaps[row["source_scope"]] ], title="CDF of KV-Cache Source Gap by Scope", xlabel="Milliseconds", ) summary = { "transition_count": transition_count, "negative_delta_count": negative_delta_count, "negative_delta_fraction": safe_div(negative_delta_count, transition_count), "trigger_group_stats": {row["trigger_group"]: row["fraction"] for row in trigger_group_rows}, "source_scope_stats": {row["source_scope"]: row["fraction"] for row in source_scope_rows}, "delta_fraction_current_stats": series_stats(delta_fraction_current_values), "abs_delta_fraction_current_stats": series_stats(abs_delta_fraction_current_values), "uncached_fraction_current_stats": series_stats(uncached_fraction_current_values), "source_gap_ms_stats": series_stats(source_gap_ms_values), "trigger_group_rows": trigger_group_rows, "source_scope_rows": source_scope_rows, } summary_path = advanced_dir / "transition_patterns_summary.json" with open(summary_path, "w", encoding="utf-8") as handle: json.dump(summary, handle, ensure_ascii=False, indent=2) return summary, summary_path, transition_edges_path def write_retokenized_transition_outputs_from_existing(source_path, request_metrics_path, advanced_dir): advanced_dir = ensure_output_dir(advanced_dir) transition_edges_path = advanced_dir / "session_transition_retokenized_edges.csv" session_last = {} trigger_group_counter = Counter() negative_delta_count = 0 negative_delta_by_trigger = Counter() trigger_group_values = defaultdict( lambda: { "delta_fraction_of_current": [], "abs_delta_fraction_of_current": [], } ) delta_fraction_current_values = [] abs_delta_fraction_current_values = [] delta_tokens_values = [] abs_delta_tokens_values = [] with open(request_metrics_path, "r", encoding="utf-8") as metrics_handle, open( transition_edges_path, "w", encoding="utf-8", newline="" ) as output_handle: metrics_reader = csv.DictReader(metrics_handle) normalized_iter = _iter_source_message_minimal(source_path) writer = None for normalized_row, metric_row in zip(normalized_iter, metrics_reader): if normalized_row["request_id"] != metric_row.get("request_id", ""): raise ValueError( f"request order mismatch between source trace and request metrics: " f"{normalized_row['request_id']} != {metric_row.get('request_id', '')}" ) current = { "request_id": metric_row["request_id"], "session_id": metric_row["session_id"], "request_ready_time_ms": safe_int(metric_row.get("request_ready_time_ms")), "request_end_time_ms": safe_int(metric_row.get("request_end_time_ms")), "retokenized_prompt_tokens": safe_int(metric_row.get("theoretical_prompt_unit_length")), "messages": normalized_row["messages"], } previous = session_last.get(current["session_id"]) if previous is not None: transition = _classify_transition(previous["messages"], current["messages"]) delta_tokens = current["retokenized_prompt_tokens"] - previous["retokenized_prompt_tokens"] delta_fraction_of_prev = safe_div(delta_tokens, previous["retokenized_prompt_tokens"]) delta_fraction_of_current = safe_div(delta_tokens, current["retokenized_prompt_tokens"]) abs_delta_fraction_of_current = safe_div(abs(delta_tokens), current["retokenized_prompt_tokens"]) row = { "session_id": current["session_id"], "prev_request_id": previous["request_id"], "next_request_id": current["request_id"], "prev_request_ready_time_ms": previous["request_ready_time_ms"], "prev_request_end_time_ms": previous["request_end_time_ms"], "next_request_ready_time_ms": current["request_ready_time_ms"], "gap_from_prev_ready_ms": max(current["request_ready_time_ms"] - previous["request_ready_time_ms"], 0), "gap_from_prev_end_ms": max(current["request_ready_time_ms"] - previous["request_end_time_ms"], 0), "prev_retokenized_prompt_tokens": previous["retokenized_prompt_tokens"], "next_retokenized_prompt_tokens": current["retokenized_prompt_tokens"], "delta_retokenized_prompt_tokens": delta_tokens, "delta_retokenized_fraction_of_prev": delta_fraction_of_prev, "delta_retokenized_fraction_of_current": delta_fraction_of_current, "abs_delta_retokenized_fraction_of_current": abs_delta_fraction_of_current, "prev_message_count": len(previous["messages"]), "next_message_count": len(current["messages"]), **transition, } if writer is None: writer = csv.DictWriter(output_handle, fieldnames=list(row.keys())) writer.writeheader() writer.writerow(row) trigger_group_counter[row["trigger_group"]] += 1 if delta_fraction_of_current < 0: negative_delta_count += 1 negative_delta_by_trigger[row["trigger_group"]] += 1 trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"].append(delta_fraction_of_current) trigger_group_values[row["trigger_group"]]["abs_delta_fraction_of_current"].append(abs_delta_fraction_of_current) delta_fraction_current_values.append(delta_fraction_of_current) abs_delta_fraction_current_values.append(abs_delta_fraction_of_current) delta_tokens_values.append(delta_tokens) abs_delta_tokens_values.append(abs(delta_tokens)) session_last[current["session_id"]] = current transition_count = sum(trigger_group_counter.values()) trigger_group_rows = [] for trigger_group, count in trigger_group_counter.most_common(): signed_values = trigger_group_values[trigger_group]["delta_fraction_of_current"] abs_values = trigger_group_values[trigger_group]["abs_delta_fraction_of_current"] trigger_group_rows.append( { "trigger_group": trigger_group, "count": count, "fraction": safe_div(count, transition_count), "negative_delta_count": negative_delta_by_trigger[trigger_group], "negative_delta_fraction": safe_div(negative_delta_by_trigger[trigger_group], count), "p10_signed_delta_fraction_of_current": percentile(signed_values, 0.10) if signed_values else 0.0, "p50_signed_delta_fraction_of_current": percentile(signed_values, 0.50) if signed_values else 0.0, "p90_signed_delta_fraction_of_current": percentile(signed_values, 0.90) if signed_values else 0.0, "p95_signed_delta_fraction_of_current": percentile(signed_values, 0.95) if signed_values else 0.0, "p50_abs_delta_fraction_of_current": percentile(abs_values, 0.50) if abs_values else 0.0, "p90_abs_delta_fraction_of_current": percentile(abs_values, 0.90) if abs_values else 0.0, } ) write_csv(advanced_dir / "transition_retokenized_trigger_groups.csv", trigger_group_rows) write_csv( advanced_dir / "cdf_transition_retokenized_delta_fraction_of_current.csv", build_cdf(delta_fraction_current_values), ) write_csv( advanced_dir / "cdf_transition_retokenized_abs_delta_fraction_of_current.csv", build_cdf(abs_delta_fraction_current_values), ) write_csv( advanced_dir / "cdf_transition_retokenized_delta_fraction_by_trigger.csv", build_grouped_cdf_rows( { row["trigger_group"]: trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"] for row in trigger_group_rows }, "trigger_group", ), ) write_csv( advanced_dir / "cdf_transition_retokenized_abs_delta_fraction_by_trigger.csv", build_grouped_cdf_rows( { row["trigger_group"]: trigger_group_values[row["trigger_group"]]["abs_delta_fraction_of_current"] for row in trigger_group_rows }, "trigger_group", ), ) plot_cdf_series( advanced_dir / "cdf_transition_retokenized_delta_fraction_by_trigger.png", [ ( row["trigger_group"], build_cdf(trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"]), ) for row in trigger_group_rows if trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"] ], title="CDF of Retokenized Signed Context Delta by Trigger Group", xlabel="(next_retokenized_prompt_tokens - prev_retokenized_prompt_tokens) / next_retokenized_prompt_tokens", ) plot_cdf_series_with_zoom_windows( advanced_dir / "cdf_transition_retokenized_delta_fraction_by_trigger_zoom.png", [ ( row["trigger_group"], build_cdf(trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"]), ) for row in trigger_group_rows if trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"] ], title="CDF of Retokenized Signed Context Delta by Trigger Group with Central 80% / 90% Zoom", xlabel="(next_retokenized_prompt_tokens - prev_retokenized_prompt_tokens) / next_retokenized_prompt_tokens", zoom_windows=[ (0.10, 0.90, "Central 80% (p10-p90)"), (0.05, 0.95, "Central 90% (p05-p95)"), ], ) plot_cdf_series( advanced_dir / "cdf_transition_retokenized_abs_delta_fraction_by_trigger.png", [ ( row["trigger_group"], build_cdf(trigger_group_values[row["trigger_group"]]["abs_delta_fraction_of_current"]), ) for row in trigger_group_rows if trigger_group_values[row["trigger_group"]]["abs_delta_fraction_of_current"] ], title="CDF of Retokenized Absolute Context Delta by Trigger Group", xlabel="abs(next_retokenized_prompt_tokens - prev_retokenized_prompt_tokens) / next_retokenized_prompt_tokens", ) summary = { "transition_count": transition_count, "negative_delta_count": negative_delta_count, "negative_delta_fraction": safe_div(negative_delta_count, transition_count), "delta_fraction_current_stats": series_stats(delta_fraction_current_values), "abs_delta_fraction_current_stats": series_stats(abs_delta_fraction_current_values), "delta_tokens_stats": series_stats(delta_tokens_values), "abs_delta_tokens_stats": series_stats(abs_delta_tokens_values), "trigger_group_rows": trigger_group_rows, } summary_path = advanced_dir / "transition_retokenized_summary.json" with open(summary_path, "w", encoding="utf-8") as handle: json.dump(summary, handle, ensure_ascii=False, indent=2) return summary, summary_path, transition_edges_path def chunk_units(units, block_size): return [tuple(units[index:index + block_size]) for index in range(0, len(units), block_size)] def compute_theoretical_cache( records, block_size=256, segment_mode="tokenizer", tokenizer_path=None, model_family="auto", model_meta_dir=None, show_progress=False, ): segment, resolved_tokenizer_path = load_segmenter( segment_mode=segment_mode, tokenizer_path=tokenizer_path, model_family=model_family, model_meta_dir=model_meta_dir, records=records, ) sorted_records = sort_records_for_time(records) cache = {} request_rows = [] reuse_gap_rows = [] process = psutil.Process(os.getpid()) if show_progress else None peak_rss_mb = 0.0 iterator = sorted_records progress = None if show_progress: progress = tqdm( total=len(sorted_records), desc="Theoretical KV reuse", unit="req", dynamic_ncols=True, ) for record in iterator: units = segment(record.canonical_prompt) blocks = chunk_units(units, block_size) prev_hash = None prefix_match_blocks = 0 prefix_active = True longest_source_request_id = "" for block_index, block in enumerate(blocks): block_hash = hash((prev_hash, *block)) meta = cache.get(block_hash) if meta and prefix_active: prefix_match_blocks += 1 longest_source_request_id = meta["last_request_id"] reuse_gap_rows.append( { "request_id": record.meta.request_id, "session_id": record.meta.session_id, "block_index": block_index, "source_request_id": meta["last_request_id"], "reuse_gap_ms": max(record.meta.request_ready_time_ms - meta["last_seen_ms"], 0), "age_from_first_seen_ms": max(record.meta.request_ready_time_ms - meta["first_seen_ms"], 0), } ) meta["last_seen_ms"] = record.meta.request_ready_time_ms meta["last_reuse_ms"] = record.meta.request_ready_time_ms meta["last_request_id"] = record.meta.request_id meta["last_reuse_request_id"] = record.meta.request_id meta["reuse_count"] += 1 else: prefix_active = False if not meta: cache[block_hash] = { "hash": block_hash, "first_seen_ms": record.meta.request_ready_time_ms, "last_seen_ms": record.meta.request_ready_time_ms, "last_reuse_ms": 0, "first_request_id": record.meta.request_id, "last_request_id": record.meta.request_id, "last_reuse_request_id": "", "reuse_count": 0, "block_index": block_index, } else: meta["last_seen_ms"] = record.meta.request_ready_time_ms meta["last_request_id"] = record.meta.request_id prev_hash = block_hash total_units = len(units) theoretical_hit_units = min(prefix_match_blocks * block_size, total_units) request_rows.append( { "request_id": record.meta.request_id, "session_id": record.meta.session_id, "request_ready_time_ms": record.meta.request_ready_time_ms, "segment_mode": segment_mode, "tokenizer_path": resolved_tokenizer_path, "block_size": block_size, "prompt_unit_length": total_units, "prompt_block_count": len(blocks), "theoretical_prefix_hit_blocks": prefix_match_blocks, "theoretical_prefix_hit_units": theoretical_hit_units, "theoretical_prefix_hit_ratio": safe_div(theoretical_hit_units, total_units), "theoretical_source_request_id": longest_source_request_id, } ) if progress is not None: progress.update(1) postfix, peak_rss_mb = _progress_postfix( process, peak_rss_mb, progress.n / progress.total if progress.total else 0.0, cache_blocks=len(cache), reuse_edges=len(reuse_gap_rows), ) progress.set_postfix(postfix) if progress is not None: progress.close() block_rows = [] for meta in cache.values(): lifecycle_end_ms = meta["last_reuse_ms"] if meta["reuse_count"] > 0 else meta["first_seen_ms"] lifetime_ms = max(lifecycle_end_ms - meta["first_seen_ms"], 0) span_end_ms = lifecycle_end_ms span_ms = max(span_end_ms - meta["first_seen_ms"], 0) block_rows.append( { "hash": meta["hash"], "first_request_id": meta["first_request_id"], "last_request_id": meta["last_request_id"], "first_seen_ms": meta["first_seen_ms"], "last_seen_ms": meta["last_seen_ms"], "last_reuse_ms": meta["last_reuse_ms"], "last_reuse_request_id": meta["last_reuse_request_id"], "reuse_count": meta["reuse_count"], "lifetime_ms": lifetime_ms, "span_end_ms": span_end_ms, "span_ms": span_ms, "reused": 1 if meta["reuse_count"] > 0 else 0, } ) return { "request_rows": request_rows, "reuse_gap_rows": reuse_gap_rows, "block_rows": block_rows, "resolved_tokenizer_path": resolved_tokenizer_path, } def build_tool_timing(records, features): features_by_request = {feature.request_id: feature for feature in features} catalog = Counter() session_edges = [] per_tool_edge_counter = Counter() per_tool_gap_sum = Counter() for record in records: for tool in record.declared_tools: if tool.name: catalog[tool.name] += 1 records_by_session = defaultdict(list) for record in records: records_by_session[record.meta.session_id].append(record) for session_id, session_records in records_by_session.items(): ordered = sort_records_for_time(session_records) for previous, current in zip(ordered, ordered[1:]): current_feature = features_by_request[current.meta.request_id] previous_feature = features_by_request[previous.meta.request_id] gap_from_prev_ready_ms = max(current.meta.request_ready_time_ms - previous.meta.request_ready_time_ms, 0) gap_from_prev_end_ms = max(current.meta.request_ready_time_ms - previous.meta.request_end_time_ms, 0) row = { "session_id": session_id, "prev_request_id": previous.meta.request_id, "next_request_id": current.meta.request_id, "prev_request_ready_time_ms": previous.meta.request_ready_time_ms, "prev_request_end_time_ms": previous.meta.request_end_time_ms, "next_request_ready_time_ms": current.meta.request_ready_time_ms, "gap_from_prev_ready_ms": gap_from_prev_ready_ms, "gap_from_prev_end_ms": gap_from_prev_end_ms, "next_tool_msg_count": current_feature.tool_msg_count, "next_assistant_msg_count": current_feature.assistant_msg_count, "next_declared_tool_count": current_feature.declared_tool_count, "next_declared_tool_names": ";".join(tool.name for tool in current.declared_tools if tool.name), "prev_declared_tool_names": ";".join(tool.name for tool in previous.declared_tools if tool.name), "is_tool_round": 1 if current_feature.tool_msg_count > 0 else 0, } session_edges.append(row) if row["is_tool_round"]: for tool in current.declared_tools: if tool.name: per_tool_edge_counter[tool.name] += 1 per_tool_gap_sum[tool.name] += gap_from_prev_ready_ms catalog_rows = [] total_requests = len(records) for tool_name, count in catalog.most_common(): catalog_rows.append( { "tool_name": tool_name, "declared_count": count, "request_fraction": safe_div(count, total_requests), "tool_round_count": per_tool_edge_counter.get(tool_name, 0), "avg_tool_round_gap_ms": safe_div( per_tool_gap_sum.get(tool_name, 0), per_tool_edge_counter.get(tool_name, 0), ), } ) return { "catalog_rows": catalog_rows, "session_edges": session_edges, } def augment_request_metrics(records, features, theoretical_rows): theoretical_by_request = {row["request_id"]: row for row in theoretical_rows} rows = [] for record, feature in zip(records, features): row = feature_to_row(feature) theory = theoretical_by_request.get(record.meta.request_id, {}) row.update( { "line_number": record.meta.line_number, "request_ready_time_ms": record.meta.request_ready_time_ms, "request_end_time_ms": record.meta.request_end_time_ms, "declared_tool_names": ";".join(tool.name for tool in record.declared_tools if tool.name), "canonical_prompt_chars": len(record.canonical_prompt), "theoretical_prompt_unit_length": theory.get("prompt_unit_length", 0), "theoretical_prefix_hit_blocks": theory.get("theoretical_prefix_hit_blocks", 0), "theoretical_prefix_hit_units": theory.get("theoretical_prefix_hit_units", 0), "theoretical_prefix_hit_ratio": theory.get("theoretical_prefix_hit_ratio", 0.0), "theoretical_source_request_id": theory.get("theoretical_source_request_id", ""), } ) rows.append(row) return rows def write_study_outputs( records, features, output_dir, source_path, block_size=256, segment_mode="tokenizer", tokenizer_path=None, input_length_bucket_thresholds=None, show_progress=False, ): output_dir = ensure_output_dir(output_dir) advanced_dir = ensure_output_dir(preferred_details_dir(output_dir)) process = psutil.Process(os.getpid()) if show_progress else None peak_rss_mb = 0.0 if show_progress: tqdm.write("Stage 1/4: theoretical cache analysis") theoretical = compute_theoretical_cache( records, block_size=block_size, segment_mode=segment_mode, tokenizer_path=tokenizer_path, show_progress=show_progress, ) if show_progress: current_rss_mb = process.memory_info().rss / (1024 * 1024) peak_rss_mb = max(peak_rss_mb, current_rss_mb) tqdm.write( f"Stage 1/4 done: rss_mb={current_rss_mb:.0f} " f"est_peak_mb={_estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, 1.0):.0f}" ) tqdm.write("Stage 2/4: tool timing") tool_stats = build_tool_timing(records, features) if show_progress: current_rss_mb = process.memory_info().rss / (1024 * 1024) peak_rss_mb = max(peak_rss_mb, current_rss_mb) tqdm.write( f"Stage 2/4 done: rss_mb={current_rss_mb:.0f} " f"est_peak_mb={_estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, 1.0):.0f}" ) tqdm.write("Stage 3/4: merge request metrics") request_metric_rows = augment_request_metrics(records, features, theoretical["request_rows"]) if show_progress: current_rss_mb = process.memory_info().rss / (1024 * 1024) peak_rss_mb = max(peak_rss_mb, current_rss_mb) tqdm.write( f"Stage 3/4 done: rss_mb={current_rss_mb:.0f} " f"est_peak_mb={_estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, 1.0):.0f}" ) tqdm.write("Stage 4/4: writing outputs and plots") paths = {} paths["request_metrics"] = write_csv(advanced_dir / "request_metrics.csv", request_metric_rows) paths["theoretical_request_cache"] = write_csv( advanced_dir / "theoretical_request_cache.csv", theoretical["request_rows"], ) paths["theoretical_reuse_gaps"] = write_csv( advanced_dir / "theoretical_reuse_gaps.csv", theoretical["reuse_gap_rows"], ) paths["theoretical_block_lifetimes"] = write_csv( advanced_dir / "theoretical_block_lifetimes.csv", theoretical["block_rows"], ) paths["tools_catalog"] = write_csv(advanced_dir / "tools_catalog.csv", tool_stats["catalog_rows"]) paths["tool_round_edges"] = write_csv(advanced_dir / "tool_round_edges.csv", tool_stats["session_edges"]) cdf_specs = { "cdf_input_tokens.csv": [row["input_tokens"] for row in request_metric_rows], "cdf_output_tokens.csv": [row["output_tokens"] for row in request_metric_rows], "cdf_actual_cache_hit_ratio.csv": [row["cache_hit_ratio"] for row in request_metric_rows], "cdf_theoretical_cache_hit_ratio.csv": [ row["theoretical_prefix_hit_ratio"] for row in request_metric_rows ], "cdf_theoretical_reuse_gap_ms.csv": [ row["reuse_gap_ms"] for row in theoretical["reuse_gap_rows"] ], "cdf_theoretical_block_lifetime_ms.csv": [ row["lifetime_ms"] for row in theoretical["block_rows"] if row["reuse_count"] > 0 ], "cdf_tool_round_gap_from_prev_ready_ms.csv": [ row["gap_from_prev_ready_ms"] for row in tool_stats["session_edges"] if row["is_tool_round"] ], } cdf_rows = {} for filename, values in cdf_specs.items(): rows = build_cdf(values) cdf_rows[filename] = rows paths[filename] = write_csv(advanced_dir / filename, rows) paths["cdf_lengths_png"] = plot_cdf_series( advanced_dir / "cdf_lengths.png", [ ("input_tokens", cdf_rows["cdf_input_tokens.csv"]), ("output_tokens", cdf_rows["cdf_output_tokens.csv"]), ], title="CDF of Input / Output Length", xlabel="Tokens", ) paths["cdf_cache_png"] = plot_cdf_series( advanced_dir / "cdf_cache_hit_ratio.png", [ ("actual_cache_hit_ratio", cdf_rows["cdf_actual_cache_hit_ratio.csv"]), ("theoretical_cache_hit_ratio", cdf_rows["cdf_theoretical_cache_hit_ratio.csv"]), ], title="CDF of Actual vs Theoretical Cache Hit Ratio", xlabel="Hit Ratio", ) paths["cdf_reuse_gap_png"] = plot_cdf_series( advanced_dir / "cdf_theoretical_reuse_gap_ms.png", [("reuse_gap_ms", cdf_rows["cdf_theoretical_reuse_gap_ms.csv"])], title="CDF of Theoretical Cache Reuse Gap", xlabel="Milliseconds", ) paths["cdf_block_lifetime_png"] = plot_cdf_series( advanced_dir / "cdf_theoretical_block_lifetime_ms.png", [("block_lifetime_ms", cdf_rows["cdf_theoretical_block_lifetime_ms.csv"])], title="CDF of Theoretical Cache Block Lifetime", xlabel="Milliseconds", ) paths["cdf_tool_gap_png"] = plot_cdf_series( advanced_dir / "cdf_tool_round_gap_from_prev_ready_ms.png", [("tool_round_gap_from_prev_ready_ms", cdf_rows["cdf_tool_round_gap_from_prev_ready_ms.csv"])], title="CDF of Tool Round Inter-API Gap", xlabel="Milliseconds", ) paths["tools_catalog_png"] = plot_bar_chart( advanced_dir / "tools_catalog_top_declared.png", tool_stats["catalog_rows"], label_key="tool_name", value_key="declared_count", title="Top Declared Tools", xlabel="Declared Count", ylabel="Tool", ) input_length_comparison_summary, input_length_comparison_summary_path = write_input_length_comparison_from_rows( request_metric_rows, advanced_dir, ) paths["input_length_comparison_summary"] = input_length_comparison_summary_path ( input_length_bucket_cache_reuse_summary, input_length_bucket_cache_reuse_summary_path, input_length_bucket_cache_reuse_csv_path, input_length_bucket_cache_reuse_plot_path, ) = write_cache_reuse_by_input_length_bucket_from_rows( request_metric_rows, advanced_dir, bucket_defs=build_input_length_bucket_defs(input_length_bucket_thresholds), ) paths["input_length_bucket_cache_reuse_summary"] = input_length_bucket_cache_reuse_summary_path paths["input_length_bucket_cache_reuse_csv"] = input_length_bucket_cache_reuse_csv_path paths["input_length_bucket_cache_reuse_plot"] = input_length_bucket_cache_reuse_plot_path agentic_summary, agentic_summary_path = write_agentic_outputs_from_rows(request_metric_rows, advanced_dir) paths["agentic_patterns_summary"] = agentic_summary_path transition_summary, transition_summary_path, transition_edges_path = write_transition_outputs_from_existing( source_path, advanced_dir / "request_metrics.csv", advanced_dir, ) paths["transition_patterns_summary"] = transition_summary_path paths["session_transition_edges"] = transition_edges_path retokenized_transition_summary, retokenized_transition_summary_path, retokenized_transition_edges_path = ( write_retokenized_transition_outputs_from_existing( source_path, advanced_dir / "request_metrics.csv", advanced_dir, ) ) paths["retokenized_transition_summary"] = retokenized_transition_summary_path paths["session_transition_retokenized_edges"] = retokenized_transition_edges_path ( context_change_summary, context_change_summary_path, context_change_casebook_path, context_change_representative_pairs_path, ) = ( write_context_change_deep_dive_from_existing( source_path, advanced_dir / "request_metrics.csv", advanced_dir, ) ) paths["context_change_summary"] = context_change_summary_path paths["context_change_casebook"] = context_change_casebook_path paths["context_change_representative_pairs"] = context_change_representative_pairs_path tools_summary = { "top_declared_tools": tool_stats["catalog_rows"][:20], "tool_round_edge_count": sum(row["is_tool_round"] for row in tool_stats["session_edges"]), "longest_tool_rounds": sorted( [row for row in tool_stats["session_edges"] if row["is_tool_round"]], key=lambda row: row["gap_from_prev_ready_ms"], reverse=True, )[:20], } tools_summary_path = advanced_dir / "tools_summary.json" with open(tools_summary_path, "w", encoding="utf-8") as handle: json.dump(tools_summary, handle, ensure_ascii=False, indent=2) paths["tools_summary"] = tools_summary_path summary = { "segment_mode": segment_mode, "tokenizer_path": theoretical["resolved_tokenizer_path"], "block_size": block_size, "request_count": len(records), "tool_round_edge_count": tools_summary["tool_round_edge_count"], "top_declared_tools": tools_summary["top_declared_tools"], "input_length_comparison_summary": input_length_comparison_summary, "input_length_bucket_cache_reuse_summary": input_length_bucket_cache_reuse_summary, "agentic_patterns_summary": agentic_summary, "transition_patterns_summary": transition_summary, "retokenized_transition_summary": retokenized_transition_summary, "context_change_summary": context_change_summary, "cdf_files": sorted(path.name for path in advanced_dir.glob("cdf*.csv")), } summary_path = advanced_dir / DETAILS_SUMMARY_FILENAME with open(summary_path, "w", encoding="utf-8") as handle: json.dump(summary, handle, ensure_ascii=False, indent=2) paths["details_summary"] = summary_path progress_path = advanced_dir / "progress.json" progress_path.write_text( json.dumps( { "mode": "in_memory_study", "processed_requests": len(records), "total_requests": len(records), "fraction_done": 1.0, }, ensure_ascii=False, indent=2, ), encoding="utf-8", ) paths["progress"] = progress_path if show_progress: current_rss_mb = process.memory_info().rss / (1024 * 1024) peak_rss_mb = max(peak_rss_mb, current_rss_mb) tqdm.write( f"Stage 4/4 done: rss_mb={current_rss_mb:.0f} " f"est_peak_mb={_estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, 1.0):.0f}" ) return paths, summary, tools_summary, theoretical def build_agentic_markdown_section(agentic_summary): if not agentic_summary: return "" lines = [ "## Agentic Workload Patterns", "- This section focuses on coding-agent specific behavior rather than generic chat metrics.", f"- Session turn stats: {json.dumps(agentic_summary['session_turn_stats'], ensure_ascii=False)}", f"- Session inter-request gap stats from previous ready time (ms): {json.dumps(agentic_summary['pair_gap_ready_ms_stats'], ensure_ascii=False)}", f"- Session inter-request gap stats from previous end time (ms): {json.dumps(agentic_summary['pair_gap_end_ms_stats'], ensure_ascii=False)}", f"- Append-like proxy: `{agentic_summary['append_like_proxy_definition']}`", f"- Theoretical source scope: {json.dumps(agentic_summary['theoretical_source_scope'], ensure_ascii=False)}", "", "Session turn buckets:", "| bucket | session_count | session_fraction | request_count | request_fraction |", "| --- | --- | --- | --- | --- |", ] for row in agentic_summary["session_turn_bucket_rows"]: lines.append( f"| {row['bucket']} | {row['session_count']} | {row['session_fraction']:.4f} | " f"{row['request_count']} | {row['request_fraction']:.4f} |" ) lines.extend( [ "", "Request-level agentic fractions:", "| metric | fraction |", "| --- | --- |", ] ) for row in agentic_summary["request_level_fraction_rows"]: lines.append(f"| {row['metric']} | {row['fraction']:.4f} |") lines.extend( [ "", "Pair-level agentic fractions:", "| metric | fraction |", "| --- | --- |", ] ) for row in agentic_summary["pair_level_fraction_rows"]: lines.append(f"| {row['metric']} | {row['fraction']:.4f} |") return "\n".join(lines) def build_input_length_comparison_markdown_section(input_length_comparison_summary): if not input_length_comparison_summary: return "" lines = [ "## Input Length Comparison", "- This section compares two input-length definitions: provider-reported `usage.input_tokens` vs analyzer-retokenized `canonical_prompt` units.", f"- Request count: {input_length_comparison_summary['request_count']}", f"- Exact-match fraction: {input_length_comparison_summary['same_fraction']:.4f}", f"- Retokenized > provider fraction: {input_length_comparison_summary['retokenized_gt_provider_fraction']:.4f}", f"- Retokenized < provider fraction: {input_length_comparison_summary['retokenized_lt_provider_fraction']:.4f}", f"- Provider input token stats: {json.dumps(input_length_comparison_summary['provider_input_tokens_stats'], ensure_ascii=False)}", f"- Retokenized prompt token stats: {json.dumps(input_length_comparison_summary['retokenized_prompt_tokens_stats'], ensure_ascii=False)}", f"- Delta stats (`retokenized - provider`): {json.dumps(input_length_comparison_summary['delta_tokens_stats'], ensure_ascii=False)}", f"- Ratio stats (`retokenized / provider`): {json.dumps(input_length_comparison_summary['ratio_stats'], ensure_ascii=False)}", f"- Relative delta stats vs provider: {json.dumps(input_length_comparison_summary['relative_delta_vs_provider_stats'], ensure_ascii=False)}", ] return "\n".join(lines) def build_input_length_bucket_cache_reuse_markdown_section(input_length_bucket_cache_reuse_summary): if not input_length_bucket_cache_reuse_summary: return "" bucket_defs = input_length_bucket_cache_reuse_summary.get("bucket_definition", {}).get("buckets", []) bucket_spec = "; ".join( ( f"{row['input_tokens_min_inclusive']} <= L < {row['input_tokens_max_exclusive']}" if row.get("input_tokens_max_exclusive") is not None else f"{row['input_tokens_min_inclusive']} <= L" ) for row in bucket_defs ) lines = [ "## Cache Reuse by Provider Input-Length Bucket", f"- Bucket ranges: `{bucket_spec}`" if bucket_spec else "- Bucket ranges: _n/a_", "- Bucket assignment uses provider `usage.input_tokens`; theoretical reuse still uses analyzer-retokenized prompt prefix hits.", "- `weighted_theoretical_cache_hit_ratio` is the global infinite-cache upper bound.", "- `weighted_bucketed_theoretical_cache_hit_ratio` is the upper bound after splitting cache by input-length bucket.", "", "| bucket | request_count | request_fraction | weighted_actual_cache_hit_ratio | weighted_theoretical_cache_hit_ratio | weighted_bucketed_theoretical_cache_hit_ratio | weighted_bucket_boundary_loss_ratio | actual_reused_request_fraction | theoretical_reused_request_fraction | bucketed_theoretical_reused_request_fraction |", "| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |", ] for row in input_length_bucket_cache_reuse_summary["bucket_rows"]: lines.append( f"| {row['bucket']} | {row['request_count']} | {row['request_fraction']:.4f} | " f"{row['weighted_actual_cache_hit_ratio']:.4f} | {row['weighted_theoretical_cache_hit_ratio']:.4f} | " f"{row['weighted_bucketed_theoretical_cache_hit_ratio']:.4f} | {row['weighted_bucket_boundary_loss_ratio']:.4f} | " f"{row['actual_reused_request_fraction']:.4f} | {row['theoretical_reused_request_fraction']:.4f} | " f"{row['bucketed_theoretical_reused_request_fraction']:.4f} |" ) return "\n".join(lines) def build_session_bucket_boundary_markdown_section(session_bucket_boundary_miss_summary): if not session_bucket_boundary_miss_summary: return "" lines = [ "## Session Cross-Bucket Miss", "- This section uses the `parent_chat_id -> chat_id` chain from the release trace.", "- `cross_bucket_shared_prefix_unit_fraction` measures how much parent-child reusable prefix is lost when parent and child fall into different input-length buckets.", f"- Cross-bucket edge fraction: {session_bucket_boundary_miss_summary['cross_bucket_edge_fraction']:.4f}", ( "- Cross-bucket shared-prefix miss fraction: " f"{session_bucket_boundary_miss_summary['cross_bucket_shared_prefix_unit_fraction']:.4f}" ), "", "| bucket | edge_count | edge_fraction | reusable_edge_count | cross_bucket_edge_count | cross_bucket_edge_fraction | shared_prefix_units_sum | cross_bucket_shared_prefix_units_sum | cross_bucket_shared_prefix_unit_fraction |", "| --- | --- | --- | --- | --- | --- | --- | --- | --- |", ] for row in session_bucket_boundary_miss_summary.get("bucket_rows", []): lines.append( f"| {row['bucket']} | {row['edge_count']} | {row['edge_fraction']:.4f} | " f"{row['reusable_edge_count']} | {row['cross_bucket_edge_count']} | " f"{row['cross_bucket_edge_fraction']:.4f} | {row['shared_prefix_units_sum']} | " f"{row['cross_bucket_shared_prefix_units_sum']} | {row['cross_bucket_shared_prefix_unit_fraction']:.4f} |" ) return "\n".join(lines) def build_alive_block_timeline_markdown_section(alive_block_timeline_summary): if not alive_block_timeline_summary: return "" return "\n".join( [ "## Live KV-Cache Blocks Over Time", "- Each block span starts at first appearance and ends at its last reuse.", f"- Peak live blocks: {alive_block_timeline_summary['peak_alive_blocks']}", f"- Timeline events: {alive_block_timeline_summary['event_count']}", ] ) def build_study_markdown( base_report_markdown, output_dir, advanced_summary, tools_summary, theoretical, theoretical_summary=None, input_length_comparison_summary=None, input_length_bucket_cache_reuse_summary=None, session_bucket_boundary_miss_summary=None, alive_block_timeline_summary=None, agentic_summary=None, transition_summary=None, retokenized_transition_summary=None, context_change_summary=None, ): artifact_dir = DETAILS_DIR_NAME if theoretical_summary is None: theoretical_summary = { "request_hit_ratio_stats": series_stats( [row["theoretical_prefix_hit_ratio"] for row in theoretical["request_rows"]] ), "reuse_gap_stats": series_stats([row["reuse_gap_ms"] for row in theoretical.get("reuse_gap_rows", [])]), "block_lifetime_stats": series_stats( [row["lifetime_ms"] for row in theoretical.get("block_rows", []) if row["reuse_count"] > 0] ), } request_hit_stats = theoretical_summary["request_hit_ratio_stats"] reuse_gap_stats = theoretical_summary["reuse_gap_stats"] block_lifetime_stats = theoretical_summary["block_lifetime_stats"] lines = [ base_report_markdown.rstrip(), "", "## Study Outputs", f"- Output root: `{output_dir}`", f"- Segment mode: `{advanced_summary['segment_mode']}`", f"- Tokenizer path: `{advanced_summary['tokenizer_path']}`" if advanced_summary["tokenizer_path"] else "- Tokenizer path: _n/a_", f"- Theoretical cache block size: {advanced_summary['block_size']}", f"- Request count: {advanced_summary['request_count']}", f"- Tool round edges: {advanced_summary['tool_round_edge_count']}", "", "Generated figures:", f"- ![CDF of Input and Output Length]({artifact_dir}/cdf_lengths.png)", f"- ![CDF of Actual vs Theoretical Cache Hit Ratio]({artifact_dir}/cdf_cache_hit_ratio.png)", f"- ![CDF of Theoretical Cache Reuse Gap]({artifact_dir}/cdf_theoretical_reuse_gap_ms.png)", f"- ![CDF of Theoretical Cache Block Lifetime]({artifact_dir}/cdf_theoretical_block_lifetime_ms.png)", f"- ![Live KV-Cache Blocks Over Time]({artifact_dir}/theoretical_alive_block_timeline.png)", f"- ![CDF of Tool Round Inter-API Gap]({artifact_dir}/cdf_tool_round_gap_from_prev_ready_ms.png)", f"- ![CDF of Provider vs Retokenized Input Length]({artifact_dir}/cdf_input_length_provider_vs_retokenized.png)", f"- ![CDF of Retokenized Minus Provider Input Length]({artifact_dir}/cdf_input_length_delta_tokens.png)", f"- ![CDF of Retokenized Over Provider Input Length]({artifact_dir}/cdf_input_length_ratio.png)", f"- ![Weighted Cache Hit Ratio by Input-Length Bucket]({artifact_dir}/input_length_bucket_cache_reuse.png)", f"- ![Session Bucket Boundary Miss by Child Bucket]({artifact_dir}/session_bucket_boundary_miss.png)", f"- ![Top Declared Tools]({artifact_dir}/tools_catalog_top_declared.png)", f"- ![Session Turn Buckets]({artifact_dir}/agentic_session_turn_buckets.png)", f"- ![CDF of Session Inter-Request Gap]({artifact_dir}/cdf_session_inter_request_gap_ms.png)", f"- ![CDF of Session Inter-Request Gap Ready to Ready Zoom 80]({artifact_dir}/cdf_session_inter_request_gap_ready_ms_zoom80.png)", f"- ![CDF of Session Inter-Request Gap End to Ready Zoom 80]({artifact_dir}/cdf_session_inter_request_gap_end_ms_zoom80.png)", f"- ![CDF of Session Inter-Request Gap Ready to Ready Zoom 90]({artifact_dir}/cdf_session_inter_request_gap_ready_ms_zoom90.png)", f"- ![CDF of Session Inter-Request Gap End to Ready Zoom 90]({artifact_dir}/cdf_session_inter_request_gap_end_ms_zoom90.png)", f"- ![Request-Level Agentic Fractions]({artifact_dir}/agentic_request_level_fractions.png)", f"- ![Pair-Level Agentic Fractions]({artifact_dir}/agentic_pair_level_fractions.png)", f"- ![Theoretical Prefix Reuse Scope]({artifact_dir}/agentic_theoretical_source_scope.png)", f"- ![Session Transition Trigger Groups]({artifact_dir}/transition_trigger_groups.png)", f"- ![Context Change by Trigger Group]({artifact_dir}/transition_context_change_by_trigger.png)", f"- ![KV-Cache Source Scope by Transition]({artifact_dir}/transition_source_scope.png)", f"- ![CDF of Signed Context Delta by Trigger Group]({artifact_dir}/cdf_transition_delta_fraction_by_trigger.png)", f"- ![CDF of Signed Context Delta by Trigger Group Zoom]({artifact_dir}/cdf_transition_delta_fraction_by_trigger_zoom.png)", f"- ![CDF of Absolute Context Delta by Trigger Group]({artifact_dir}/cdf_transition_abs_delta_fraction_by_trigger.png)", f"- ![CDF of Uncached Prompt Fraction by Trigger Group]({artifact_dir}/cdf_transition_uncached_fraction_by_trigger.png)", f"- ![CDF of KV-Cache Source Gap by Scope]({artifact_dir}/cdf_transition_source_gap_ms_by_scope.png)", f"- ![CDF of Retokenized Signed Context Delta by Trigger Group]({artifact_dir}/cdf_transition_retokenized_delta_fraction_by_trigger.png)", f"- ![CDF of Retokenized Signed Context Delta by Trigger Group Zoom]({artifact_dir}/cdf_transition_retokenized_delta_fraction_by_trigger_zoom.png)", f"- ![CDF of Retokenized Absolute Context Delta by Trigger Group]({artifact_dir}/cdf_transition_retokenized_abs_delta_fraction_by_trigger.png)", "", "## Theoretical Cache", f"- Request-level theoretical hit ratio stats: {json.dumps(request_hit_stats, ensure_ascii=False)}", f"- Reuse gap stats (ms): {json.dumps(reuse_gap_stats, ensure_ascii=False)}", f"- Block lifetime stats (ms): {json.dumps(block_lifetime_stats, ensure_ascii=False)}", f"- Detailed data: `{artifact_dir}/theoretical_request_cache.csv`, `{artifact_dir}/theoretical_block_lifetimes.csv`", "", "## Tool Timing", "- Main timing metric is inter-API gap: previous request ready time -> next request ready time.", f"- Raw edge table: `{artifact_dir}/tool_round_edges.csv`", f"- Tool catalog: `{artifact_dir}/tools_catalog.csv`", "", "Top declared tools:", "| tool_name | declared_count | request_fraction | tool_round_count | avg_tool_round_gap_ms |", "| --- | --- | --- | --- | --- |", ] for row in tools_summary["top_declared_tools"][:10]: lines.append( f"| {row['tool_name']} | {row['declared_count']} | {row['request_fraction']:.4f} | " f"{row['tool_round_count']} | {row['avg_tool_round_gap_ms']:.2f} |" ) lines.extend( [ "", "Longest tool rounds:", "| session_id | prev_request_id | next_request_id | gap_from_prev_ready_ms | next_declared_tool_count |", "| --- | --- | --- | --- | --- |", ] ) for row in tools_summary["longest_tool_rounds"][:10]: lines.append( f"| {row['session_id']} | {row['prev_request_id']} | {row['next_request_id']} | " f"{row['gap_from_prev_ready_ms']} | {row['next_declared_tool_count']} |" ) input_length_section = build_input_length_comparison_markdown_section(input_length_comparison_summary) if input_length_section: lines.extend(["", input_length_section]) input_length_bucket_section = build_input_length_bucket_cache_reuse_markdown_section( input_length_bucket_cache_reuse_summary ) if input_length_bucket_section: lines.extend(["", input_length_bucket_section]) session_bucket_boundary_section = build_session_bucket_boundary_markdown_section( session_bucket_boundary_miss_summary ) if session_bucket_boundary_section: lines.extend(["", session_bucket_boundary_section]) alive_block_timeline_section = build_alive_block_timeline_markdown_section(alive_block_timeline_summary) if alive_block_timeline_section: lines.extend(["", alive_block_timeline_section]) agentic_section = build_agentic_markdown_section(agentic_summary) if agentic_section: lines.extend(["", agentic_section]) transition_section = build_transition_markdown_section(transition_summary) if transition_section: lines.extend(["", transition_section]) retokenized_transition_section = build_retokenized_transition_markdown_section( retokenized_transition_summary, provider_transition_summary=transition_summary, ) if retokenized_transition_section: lines.extend(["", retokenized_transition_section]) context_change_section = build_context_change_markdown_section(context_change_summary) if context_change_section: lines.extend(["", context_change_section]) lines.extend( [ "", "## Data Files", f"- `{artifact_dir}/request_metrics.csv` combines base request metrics with theoretical cache metrics.", f"- `{artifact_dir}/cdf_input_tokens.csv` and `{artifact_dir}/cdf_output_tokens.csv` contain the length CDFs.", f"- `{artifact_dir}/cdf_retokenized_prompt_tokens.csv`, `{artifact_dir}/cdf_input_length_delta_tokens.csv`, `{artifact_dir}/cdf_input_length_ratio_retokenized_over_provider.csv`, and `{artifact_dir}/cdf_input_length_relative_delta_vs_provider.csv` compare provider vs retokenized input length.", f"- `{artifact_dir}/input_length_bucket_cache_reuse.csv` and `{artifact_dir}/input_length_bucket_cache_reuse_summary.json` summarize actual/theoretical cache reuse by provider input-length bucket.", f"- `{artifact_dir}/cdf_actual_cache_hit_ratio.csv` and `{artifact_dir}/cdf_theoretical_cache_hit_ratio.csv` contain actual/theoretical cache-hit CDFs.", f"- `{artifact_dir}/cdf_theoretical_reuse_gap_ms.csv` contains the reuse-gap CDF.", f"- `{artifact_dir}/cdf_theoretical_block_lifetime_ms.csv` contains the block lifecycle CDF from first appearance to last reuse.", f"- `{artifact_dir}/theoretical_alive_block_timeline.csv` contains the time-series of live KV-cache blocks.", f"- `{artifact_dir}/session_bucket_boundary_miss.csv` and `{artifact_dir}/session_bucket_boundary_miss_summary.json` quantify parent-child cache misses caused by cross-bucket routing.", f"- `{artifact_dir}/cdf_tool_round_gap_from_prev_ready_ms.csv` contains the tool-round inter-API gap CDF.", f"- `{artifact_dir}/agentic_session_turn_buckets.csv` contains session-turn bucket shares for sessions and requests.", f"- `{artifact_dir}/agentic_request_level_fractions.csv` contains request-level agentic workload fractions.", f"- `{artifact_dir}/agentic_pair_level_fractions.csv` contains pair-level agentic workload fractions.", f"- `{artifact_dir}/agentic_theoretical_source_scope.csv` contains same-session vs cross-session theoretical prefix-reuse scope.", f"- `{artifact_dir}/cdf_session_inter_request_gap_ready_ms.csv` and `{artifact_dir}/cdf_session_inter_request_gap_end_ms.csv` contain session transition gap CDFs.", f"- `{artifact_dir}/session_transition_edges.csv` contains per-session `prev_request -> next_request` trigger, context delta, and KV-cache source annotations.", f"- `{artifact_dir}/transition_trigger_groups.csv` contains trigger-group proportions and context-change summaries.", f"- `{artifact_dir}/transition_source_scope.csv` contains KV-cache source scope proportions and reuse-gap summaries.", f"- `{artifact_dir}/cdf_transition_delta_fraction_of_current.csv` and `{artifact_dir}/cdf_transition_abs_delta_fraction_of_current.csv` contain overall signed/absolute context-delta CDFs.", f"- `{artifact_dir}/cdf_transition_delta_fraction_by_trigger.csv`, `{artifact_dir}/cdf_transition_abs_delta_fraction_by_trigger.csv`, and `{artifact_dir}/cdf_transition_uncached_fraction_by_trigger.csv` contain trigger-group CDFs.", f"- `{artifact_dir}/cdf_transition_source_gap_ms.csv` and `{artifact_dir}/cdf_transition_source_gap_ms_by_scope.csv` contain overall/grouped source-gap CDFs.", f"- `{artifact_dir}/session_transition_retokenized_edges.csv`, `{artifact_dir}/transition_retokenized_trigger_groups.csv`, and `{artifact_dir}/transition_retokenized_summary.json` contain the same session-transition analysis recomputed with retokenized prompt length.", f"- `{artifact_dir}/cdf_transition_retokenized_delta_fraction_of_current.csv`, `{artifact_dir}/cdf_transition_retokenized_abs_delta_fraction_of_current.csv`, `{artifact_dir}/cdf_transition_retokenized_delta_fraction_by_trigger.csv`, and `{artifact_dir}/cdf_transition_retokenized_abs_delta_fraction_by_trigger.csv` contain retokenized transition-length CDFs.", f"- `{artifact_dir}/context_change_mechanism_summary.csv` and `{artifact_dir}/context_change_summary.json` summarize why context shrinks happen between consecutive requests.", f"- `{artifact_dir}/context_change_casebook.md` contains representative per-mechanism cases with message-structure summaries from the source trace (`*-raw.jsonl` or legacy `normalized.jsonl`).", ] ) return "\n".join(lines) + "\n" def run_study( records, output_dir, normalized_format="jsonl", source_path=None, block_size=256, segment_mode="tokenizer", tokenizer_path=None, model_family="auto", model_meta_dir=None, input_length_bucket_thresholds=None, tokenizer_batch_size=64, show_progress=False, ): if show_progress: tqdm.write("Stage 0/4: base outputs") source_path = Path(source_path) if source_path else None normalized_path = None if source_path is None: normalized_path = write_normalized(records, output_dir, output_format=normalized_format) source_path = normalized_path features = compute_features(records) features_path = write_features(features, output_dir) summary_path, report_path = write_report(records, features, output_dir) from .resume_advanced import run_advanced_from_existing release_path = None if source_path is not None and source_path.name.endswith("-raw.jsonl"): candidate = source_path.with_name(source_path.name[:-len("-raw.jsonl")] + ".jsonl") if path_looks_like_release_trace(candidate): release_path = candidate if release_path is not None: # Release the large in-memory trace objects before advanced analysis. del records del features gc.collect() advanced_paths = run_advanced_from_existing( source_path, release_path, features_path, output_dir, input_length_bucket_thresholds=input_length_bucket_thresholds, show_progress=show_progress, ) else: advanced_paths, advanced_summary, tools_summary, theoretical = write_study_outputs( records, features, output_dir, source_path=source_path, block_size=block_size, segment_mode=segment_mode, tokenizer_path=tokenizer_path, input_length_bucket_thresholds=input_length_bucket_thresholds, show_progress=show_progress, ) report_path.write_text( build_study_markdown( report_path.read_text(encoding="utf-8"), output_dir, advanced_summary, tools_summary, theoretical, input_length_comparison_summary=advanced_summary.get("input_length_comparison_summary"), input_length_bucket_cache_reuse_summary=advanced_summary.get("input_length_bucket_cache_reuse_summary"), agentic_summary=advanced_summary.get("agentic_patterns_summary"), transition_summary=advanced_summary.get("transition_patterns_summary"), retokenized_transition_summary=advanced_summary.get("retokenized_transition_summary"), context_change_summary=advanced_summary.get("context_change_summary"), ), encoding="utf-8", ) paths = { "features": features_path, "summary": summary_path, "report": report_path, **advanced_paths, } if normalized_path is not None: paths["normalized"] = normalized_path return paths