3265 lines
152 KiB
Python
3265 lines
152 KiB
Python
import csv
|
|
import gc
|
|
import json
|
|
import os
|
|
import subprocess
|
|
from collections import Counter, defaultdict
|
|
from pathlib import Path
|
|
|
|
import matplotlib
|
|
import psutil
|
|
|
|
matplotlib.use("Agg")
|
|
import matplotlib.pyplot as plt
|
|
from tqdm.auto import tqdm
|
|
|
|
from .features import compute_features, feature_to_row
|
|
from .helpers import percentile, safe_div, safe_float, safe_int, series_stats
|
|
from .layout import DETAILS_DIR_NAME, DETAILS_SUMMARY_FILENAME, preferred_details_dir
|
|
from .parser import path_looks_like_release_trace
|
|
from trace_model_meta import resolve_tokenizer_path
|
|
from .report import ensure_output_dir, write_features, write_normalized, write_report
|
|
|
|
|
|
def _estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, fraction_done):
|
|
baseline = max(current_rss_mb, peak_rss_mb)
|
|
headroom = 1.0 + 0.25 * max(0.0, 1.0 - fraction_done)
|
|
return baseline * headroom
|
|
|
|
|
|
def _progress_postfix(process, peak_rss_mb, fraction_done, **extra):
|
|
current_rss_mb = process.memory_info().rss / (1024 * 1024)
|
|
peak_rss_mb = max(peak_rss_mb, current_rss_mb)
|
|
postfix = {
|
|
"rss_mb": f"{current_rss_mb:.0f}",
|
|
"est_peak_mb": f"{_estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, fraction_done):.0f}",
|
|
}
|
|
postfix.update(extra)
|
|
return postfix, peak_rss_mb
|
|
|
|
|
|
def sort_records_for_time(records):
|
|
return sorted(
|
|
records,
|
|
key=lambda record: (
|
|
record.meta.request_ready_time_ms or 0,
|
|
record.meta.line_number,
|
|
),
|
|
)
|
|
|
|
|
|
def resolve_study_tokenizer_path(tokenizer_path=None, *, model_family="auto", model_meta_dir=None, records=None):
|
|
return resolve_tokenizer_path(
|
|
tokenizer_path,
|
|
model_family=model_family,
|
|
model_meta_dir=model_meta_dir,
|
|
records=records,
|
|
)
|
|
|
|
|
|
def load_segmenter(segment_mode="tokenizer", tokenizer_path=None, *, model_family="auto", model_meta_dir=None, records=None):
|
|
if segment_mode == "bytes":
|
|
return (lambda text: list(text.encode("utf-8"))), ""
|
|
if segment_mode != "tokenizer":
|
|
raise ValueError(f"Unsupported segment mode: {segment_mode}")
|
|
from tokenizers import Tokenizer
|
|
from transformers import AutoTokenizer, PreTrainedTokenizerFast
|
|
|
|
resolved_tokenizer_path = resolve_study_tokenizer_path(
|
|
tokenizer_path,
|
|
model_family=model_family,
|
|
model_meta_dir=model_meta_dir,
|
|
records=records,
|
|
)
|
|
path_obj = Path(resolved_tokenizer_path)
|
|
tokenizer_file = path_obj / "tokenizer.json" if path_obj.is_dir() else path_obj
|
|
if tokenizer_file.exists():
|
|
try:
|
|
raw_tokenizer = Tokenizer.from_file(str(tokenizer_file))
|
|
return (lambda text: raw_tokenizer.encode(text).ids), resolved_tokenizer_path
|
|
except Exception:
|
|
pass
|
|
try:
|
|
tokenizer = AutoTokenizer.from_pretrained(
|
|
resolved_tokenizer_path,
|
|
trust_remote_code=True,
|
|
local_files_only=path_obj.exists(),
|
|
use_fast=True,
|
|
)
|
|
except Exception:
|
|
tokenizer = PreTrainedTokenizerFast(
|
|
tokenizer_file=str(tokenizer_file),
|
|
)
|
|
return (lambda text: tokenizer.encode(text, add_special_tokens=False)), resolved_tokenizer_path
|
|
|
|
|
|
def build_cdf(values):
|
|
cleaned = sorted(value for value in values if value is not None)
|
|
if not cleaned:
|
|
return []
|
|
total = len(cleaned)
|
|
rows = []
|
|
for index, value in enumerate(cleaned, start=1):
|
|
rows.append({"value": value, "cdf": index / total})
|
|
return rows
|
|
|
|
|
|
def write_csv(path, rows):
|
|
if not rows:
|
|
with open(path, "w", encoding="utf-8", newline="") as handle:
|
|
handle.write("")
|
|
return path
|
|
fieldnames = list(rows[0].keys())
|
|
with open(path, "w", encoding="utf-8", newline="") as handle:
|
|
writer = csv.DictWriter(handle, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
writer.writerows(rows)
|
|
return path
|
|
|
|
|
|
def plot_cdf_series(path, series, title, xlabel, ylabel="CDF"):
|
|
plt.figure(figsize=(8, 5))
|
|
for label, rows in series:
|
|
if not rows:
|
|
continue
|
|
xs = [row["value"] for row in rows]
|
|
ys = [row["cdf"] for row in rows]
|
|
plt.step(xs, ys, where="post", label=label)
|
|
plt.title(title)
|
|
plt.xlabel(xlabel)
|
|
plt.ylabel(ylabel)
|
|
plt.grid(True, alpha=0.3)
|
|
if len(series) > 1:
|
|
plt.legend()
|
|
plt.tight_layout()
|
|
plt.savefig(path, dpi=600)
|
|
plt.close()
|
|
return path
|
|
|
|
|
|
def plot_cdf_series_with_zoom_windows(
|
|
path,
|
|
series,
|
|
title,
|
|
xlabel,
|
|
zoom_windows,
|
|
ylabel="CDF",
|
|
start_y_at_value=None,
|
|
):
|
|
nonempty_series = [(label, rows) for label, rows in series if rows]
|
|
if not nonempty_series:
|
|
return path
|
|
|
|
all_values = sorted(row["value"] for _, rows in nonempty_series for row in rows)
|
|
panel_count = 1 + len(zoom_windows)
|
|
fig, axes = plt.subplots(1, panel_count, figsize=(6 * panel_count, 5), sharey=True)
|
|
if panel_count == 1:
|
|
axes = [axes]
|
|
|
|
def _trim_rows(rows):
|
|
if start_y_at_value is None:
|
|
return rows, None
|
|
trimmed = rows
|
|
baseline_cdf = None
|
|
last_match_index = -1
|
|
for index, row in enumerate(rows):
|
|
if row["value"] <= start_y_at_value:
|
|
last_match_index = index
|
|
else:
|
|
break
|
|
if last_match_index >= 0:
|
|
baseline_cdf = rows[last_match_index]["cdf"]
|
|
trimmed = rows[last_match_index:]
|
|
return trimmed, baseline_cdf
|
|
|
|
def _plot(ax, panel_title, xlim=None):
|
|
baseline_candidates = []
|
|
for label, rows in nonempty_series:
|
|
trimmed_rows, baseline_cdf = _trim_rows(rows)
|
|
xs = [row["value"] for row in trimmed_rows]
|
|
ys = [row["cdf"] for row in trimmed_rows]
|
|
ax.step(xs, ys, where="post", label=label)
|
|
if baseline_cdf is not None:
|
|
baseline_candidates.append(baseline_cdf)
|
|
ax.set_title(panel_title)
|
|
ax.set_xlabel(xlabel)
|
|
ax.set_ylabel(ylabel)
|
|
ax.grid(True, alpha=0.3)
|
|
if xlim is not None:
|
|
lo, hi = xlim
|
|
if lo == hi:
|
|
pad = max(abs(lo) * 0.05, 1e-6)
|
|
lo -= pad
|
|
hi += pad
|
|
ax.set_xlim(lo, hi)
|
|
if baseline_candidates:
|
|
ymin = min(baseline_candidates)
|
|
if ymin >= 1.0:
|
|
ymin = 0.999
|
|
if ymin > 0.0:
|
|
ax.set_ylim(ymin, 1.0)
|
|
|
|
_plot(axes[0], "Full Range")
|
|
for axis, (lo_pct, hi_pct, label) in zip(axes[1:], zoom_windows):
|
|
lo = percentile(all_values, lo_pct)
|
|
hi = percentile(all_values, hi_pct)
|
|
_plot(axis, label, xlim=(lo, hi))
|
|
|
|
handles, labels = axes[0].get_legend_handles_labels()
|
|
if len(nonempty_series) > 1 and handles:
|
|
fig.legend(handles, labels, loc="upper center", ncol=min(len(labels), 6))
|
|
fig.suptitle(title)
|
|
fig.tight_layout(rect=(0, 0, 1, 0.93))
|
|
fig.savefig(path, dpi=600)
|
|
plt.close(fig)
|
|
return path
|
|
|
|
|
|
def plot_bar_chart(path, rows, label_key, value_key, title, xlabel, ylabel, top_n=20):
|
|
limited = rows[:top_n]
|
|
labels = [row[label_key] for row in limited]
|
|
values = [row[value_key] for row in limited]
|
|
plt.figure(figsize=(10, 6))
|
|
plt.barh(labels[::-1], values[::-1])
|
|
plt.title(title)
|
|
plt.xlabel(xlabel)
|
|
plt.ylabel(ylabel)
|
|
plt.tight_layout()
|
|
plt.savefig(path, dpi=600)
|
|
plt.close()
|
|
return path
|
|
|
|
|
|
def plot_grouped_bar_chart(path, rows, label_key, series_keys, title, xlabel, ylabel):
|
|
if not rows:
|
|
return path
|
|
labels = [row[label_key] for row in rows]
|
|
x = range(len(labels))
|
|
width = 0.8 / max(len(series_keys), 1)
|
|
plt.figure(figsize=(10, 6))
|
|
for idx, (key, label) in enumerate(series_keys):
|
|
offsets = [value + (idx - (len(series_keys) - 1) / 2) * width for value in x]
|
|
values = [row[key] for row in rows]
|
|
plt.bar(offsets, values, width=width, label=label)
|
|
plt.title(title)
|
|
plt.xlabel(xlabel)
|
|
plt.ylabel(ylabel)
|
|
plt.xticks(list(x), labels, rotation=20)
|
|
if len(series_keys) > 1:
|
|
plt.legend()
|
|
plt.tight_layout()
|
|
plt.savefig(path, dpi=600)
|
|
plt.close()
|
|
return path
|
|
|
|
|
|
def plot_fraction_bar_chart(path, rows, label_key, value_key, title, xlabel="Fraction", ylabel="Metric"):
|
|
if not rows:
|
|
return path
|
|
labels = [row[label_key] for row in rows]
|
|
values = [row[value_key] for row in rows]
|
|
plt.figure(figsize=(10, 6))
|
|
plt.barh(labels[::-1], values[::-1])
|
|
plt.xlim(0.0, 1.0)
|
|
plt.title(title)
|
|
plt.xlabel(xlabel)
|
|
plt.ylabel(ylabel)
|
|
plt.tight_layout()
|
|
plt.savefig(path, dpi=600)
|
|
plt.close()
|
|
return path
|
|
|
|
|
|
def plot_time_series(path, rows, x_key, y_key, title, xlabel, ylabel):
|
|
if not rows:
|
|
return path
|
|
xs = [row[x_key] for row in rows]
|
|
ys = [row[y_key] for row in rows]
|
|
plt.figure(figsize=(10, 5.5))
|
|
plt.step(xs, ys, where="post")
|
|
plt.title(title)
|
|
plt.xlabel(xlabel)
|
|
plt.ylabel(ylabel)
|
|
plt.grid(True, alpha=0.3)
|
|
plt.tight_layout()
|
|
plt.savefig(path, dpi=600)
|
|
plt.close()
|
|
return path
|
|
|
|
|
|
def build_grouped_cdf_rows(series_by_group, group_key):
|
|
rows = []
|
|
for group, values in series_by_group.items():
|
|
for cdf_row in build_cdf(values):
|
|
rows.append(
|
|
{
|
|
group_key: group,
|
|
"value": cdf_row["value"],
|
|
"cdf": cdf_row["cdf"],
|
|
}
|
|
)
|
|
return rows
|
|
|
|
|
|
def _normalize_request_metric_row(row):
|
|
pattern_labels = row.get("pattern_labels", "")
|
|
if isinstance(pattern_labels, str):
|
|
pattern_labels = {label for label in pattern_labels.split(";") if label}
|
|
else:
|
|
pattern_labels = set(pattern_labels or [])
|
|
return {
|
|
"request_id": row.get("request_id", ""),
|
|
"session_id": row.get("session_id", ""),
|
|
"declared_tool_count": safe_int(row.get("declared_tool_count")),
|
|
"tool_msg_count": safe_int(row.get("tool_msg_count")),
|
|
"input_tokens": safe_int(row.get("input_tokens")),
|
|
"uncached_prompt_tokens": safe_int(row.get("uncached_prompt_tokens")),
|
|
"cache_hit_ratio": safe_float(row.get("cache_hit_ratio")),
|
|
"request_ready_time_ms": safe_int(row.get("request_ready_time_ms")),
|
|
"request_end_time_ms": safe_int(row.get("request_end_time_ms")),
|
|
"theoretical_source_request_id": row.get("theoretical_source_request_id", ""),
|
|
"pattern_labels": pattern_labels,
|
|
}
|
|
|
|
|
|
def _compute_agentic_patterns(request_metric_rows):
|
|
rows = [_normalize_request_metric_row(row) for row in request_metric_rows]
|
|
sessions = defaultdict(list)
|
|
request_to_session = {}
|
|
for row in rows:
|
|
sessions[row["session_id"]].append(row)
|
|
request_to_session[row["request_id"]] = row["session_id"]
|
|
|
|
for session_rows in sessions.values():
|
|
session_rows.sort(key=lambda row: (row["request_ready_time_ms"], row["request_id"]))
|
|
|
|
session_sizes = sorted(len(session_rows) for session_rows in sessions.values())
|
|
total_requests = len(rows)
|
|
total_sessions = len(session_sizes)
|
|
|
|
bucket_defs = [
|
|
("1-2", 1, 2),
|
|
("3-9", 3, 9),
|
|
("10-19", 10, 19),
|
|
("20-49", 20, 49),
|
|
("50-99", 50, 99),
|
|
("100-149", 100, 149),
|
|
("150+", 150, None),
|
|
]
|
|
session_turn_bucket_rows = []
|
|
for label, low, high in bucket_defs:
|
|
matched = [size for size in session_sizes if size >= low and (high is None or size <= high)]
|
|
request_count = sum(matched)
|
|
session_turn_bucket_rows.append(
|
|
{
|
|
"bucket": label,
|
|
"session_count": len(matched),
|
|
"session_fraction": safe_div(len(matched), total_sessions),
|
|
"request_count": request_count,
|
|
"request_fraction": safe_div(request_count, total_requests),
|
|
}
|
|
)
|
|
|
|
request_fraction_rows = [
|
|
{
|
|
"metric": "request_has_tool_msg",
|
|
"fraction": safe_div(sum(row["tool_msg_count"] > 0 for row in rows), total_requests),
|
|
},
|
|
{
|
|
"metric": "request_declares_tools",
|
|
"fraction": safe_div(sum(row["declared_tool_count"] > 0 for row in rows), total_requests),
|
|
},
|
|
{
|
|
"metric": "request_tool_heavy",
|
|
"fraction": safe_div(sum("tool-heavy" in row["pattern_labels"] for row in rows), total_requests),
|
|
},
|
|
{
|
|
"metric": "request_tool_burst",
|
|
"fraction": safe_div(sum("tool-burst" in row["pattern_labels"] for row in rows), total_requests),
|
|
},
|
|
{
|
|
"metric": "request_cache_efficient",
|
|
"fraction": safe_div(sum("cache-efficient" in row["pattern_labels"] for row in rows), total_requests),
|
|
},
|
|
]
|
|
|
|
pair_gap_ready_values = []
|
|
pair_gap_end_values = []
|
|
pair_count = 0
|
|
prev_has_tool = 0
|
|
next_has_tool = 0
|
|
both_have_tool = 0
|
|
next_cache_95 = 0
|
|
next_cache_99 = 0
|
|
append_like = 0
|
|
append_like_after_tool = 0
|
|
append_like_after_tool_pairs = 0
|
|
same_or_longer_input = 0
|
|
short_gap_pairs = 0
|
|
append_like_within_30s = 0
|
|
|
|
for session_rows in sessions.values():
|
|
for previous, current in zip(session_rows, session_rows[1:]):
|
|
pair_count += 1
|
|
gap_ready = max(current["request_ready_time_ms"] - previous["request_ready_time_ms"], 0)
|
|
gap_end = max(current["request_ready_time_ms"] - previous["request_end_time_ms"], 0)
|
|
pair_gap_ready_values.append(gap_ready)
|
|
pair_gap_end_values.append(gap_end)
|
|
|
|
if current["input_tokens"] >= previous["input_tokens"]:
|
|
same_or_longer_input += 1
|
|
if previous["tool_msg_count"] > 0:
|
|
prev_has_tool += 1
|
|
if current["tool_msg_count"] > 0:
|
|
next_has_tool += 1
|
|
if previous["tool_msg_count"] > 0 and current["tool_msg_count"] > 0:
|
|
both_have_tool += 1
|
|
if current["cache_hit_ratio"] >= 0.95:
|
|
next_cache_95 += 1
|
|
if current["cache_hit_ratio"] >= 0.99:
|
|
next_cache_99 += 1
|
|
|
|
is_append_like = (
|
|
current["input_tokens"] >= previous["input_tokens"]
|
|
and current["cache_hit_ratio"] >= 0.95
|
|
and current["uncached_prompt_tokens"] <= 4096
|
|
)
|
|
if is_append_like:
|
|
append_like += 1
|
|
if previous["tool_msg_count"] > 0:
|
|
append_like_after_tool_pairs += 1
|
|
if is_append_like:
|
|
append_like_after_tool += 1
|
|
if gap_ready <= 30000:
|
|
short_gap_pairs += 1
|
|
if is_append_like:
|
|
append_like_within_30s += 1
|
|
|
|
pair_fraction_rows = [
|
|
{"metric": "pair_prev_has_tool", "fraction": safe_div(prev_has_tool, pair_count)},
|
|
{"metric": "pair_next_has_tool", "fraction": safe_div(next_has_tool, pair_count)},
|
|
{"metric": "pair_both_have_tool", "fraction": safe_div(both_have_tool, pair_count)},
|
|
{"metric": "pair_next_cache_hit_ge_0_95", "fraction": safe_div(next_cache_95, pair_count)},
|
|
{"metric": "pair_next_cache_hit_ge_0_99", "fraction": safe_div(next_cache_99, pair_count)},
|
|
{"metric": "pair_same_or_longer_input", "fraction": safe_div(same_or_longer_input, pair_count)},
|
|
{"metric": "pair_append_like_proxy", "fraction": safe_div(append_like, pair_count)},
|
|
{
|
|
"metric": "pair_append_like_after_tool",
|
|
"fraction": safe_div(append_like_after_tool, append_like_after_tool_pairs),
|
|
},
|
|
{
|
|
"metric": "pair_append_like_within_30s",
|
|
"fraction": safe_div(append_like_within_30s, short_gap_pairs),
|
|
},
|
|
]
|
|
|
|
source_known = 0
|
|
source_same_session = 0
|
|
source_cross_session = 0
|
|
for row in rows:
|
|
source_request_id = row["theoretical_source_request_id"]
|
|
if not source_request_id:
|
|
continue
|
|
source_known += 1
|
|
if request_to_session.get(source_request_id) == row["session_id"]:
|
|
source_same_session += 1
|
|
else:
|
|
source_cross_session += 1
|
|
|
|
source_scope_rows = [
|
|
{"scope": "same_session", "fraction": safe_div(source_same_session, source_known)},
|
|
{"scope": "cross_session", "fraction": safe_div(source_cross_session, source_known)},
|
|
]
|
|
|
|
gap_cdf_rows = {
|
|
"cdf_session_inter_request_gap_ready_ms.csv": build_cdf(pair_gap_ready_values),
|
|
"cdf_session_inter_request_gap_end_ms.csv": build_cdf(pair_gap_end_values),
|
|
}
|
|
|
|
summary = {
|
|
"request_count": total_requests,
|
|
"session_count": total_sessions,
|
|
"session_turn_stats": {
|
|
"min": min(session_sizes) if session_sizes else 0,
|
|
"max": max(session_sizes) if session_sizes else 0,
|
|
"p50": percentile(session_sizes, 0.5) if session_sizes else 0.0,
|
|
"p90": percentile(session_sizes, 0.9) if session_sizes else 0.0,
|
|
"p99": percentile(session_sizes, 0.99) if session_sizes else 0.0,
|
|
},
|
|
"session_turn_bucket_rows": session_turn_bucket_rows,
|
|
"request_level_fraction_rows": request_fraction_rows,
|
|
"pair_level_fraction_rows": pair_fraction_rows,
|
|
"pair_gap_ready_ms_stats": series_stats(pair_gap_ready_values),
|
|
"pair_gap_end_ms_stats": series_stats(pair_gap_end_values),
|
|
"theoretical_source_scope": {
|
|
"known_fraction": safe_div(source_known, total_requests),
|
|
"same_session_fraction_of_known": safe_div(source_same_session, source_known),
|
|
"cross_session_fraction_of_known": safe_div(source_cross_session, source_known),
|
|
},
|
|
"append_like_proxy_definition": (
|
|
"next_input_tokens >= prev_input_tokens AND "
|
|
"next_cache_hit_ratio >= 0.95 AND next_uncached_prompt_tokens <= 4096"
|
|
),
|
|
}
|
|
return {
|
|
"summary": summary,
|
|
"session_turn_bucket_rows": session_turn_bucket_rows,
|
|
"request_fraction_rows": request_fraction_rows,
|
|
"pair_fraction_rows": pair_fraction_rows,
|
|
"source_scope_rows": source_scope_rows,
|
|
"gap_cdf_rows": gap_cdf_rows,
|
|
}
|
|
|
|
|
|
def write_agentic_outputs_from_rows(request_metric_rows, advanced_dir):
|
|
advanced_dir = ensure_output_dir(advanced_dir)
|
|
agentic = _compute_agentic_patterns(request_metric_rows)
|
|
|
|
write_csv(advanced_dir / "agentic_session_turn_buckets.csv", agentic["session_turn_bucket_rows"])
|
|
write_csv(advanced_dir / "agentic_request_level_fractions.csv", agentic["request_fraction_rows"])
|
|
write_csv(advanced_dir / "agentic_pair_level_fractions.csv", agentic["pair_fraction_rows"])
|
|
write_csv(advanced_dir / "agentic_theoretical_source_scope.csv", agentic["source_scope_rows"])
|
|
for filename, rows in agentic["gap_cdf_rows"].items():
|
|
write_csv(advanced_dir / filename, rows)
|
|
|
|
plot_grouped_bar_chart(
|
|
advanced_dir / "agentic_session_turn_buckets.png",
|
|
agentic["session_turn_bucket_rows"],
|
|
label_key="bucket",
|
|
series_keys=[
|
|
("session_fraction", "session_fraction"),
|
|
("request_fraction", "request_fraction"),
|
|
],
|
|
title="Session Turn Buckets: Session Share vs Request Share",
|
|
xlabel="Session turn bucket",
|
|
ylabel="Fraction",
|
|
)
|
|
plot_cdf_series(
|
|
advanced_dir / "cdf_session_inter_request_gap_ms.png",
|
|
[
|
|
("gap_from_prev_ready_ms", agentic["gap_cdf_rows"]["cdf_session_inter_request_gap_ready_ms.csv"]),
|
|
("gap_from_prev_end_ms", agentic["gap_cdf_rows"]["cdf_session_inter_request_gap_end_ms.csv"]),
|
|
],
|
|
title="CDF of Session Inter-Request Gap",
|
|
xlabel="Milliseconds",
|
|
)
|
|
plot_cdf_series_with_zoom_windows(
|
|
advanced_dir / "cdf_session_inter_request_gap_ready_ms_zoom80.png",
|
|
[
|
|
("gap_from_prev_ready_ms", agentic["gap_cdf_rows"]["cdf_session_inter_request_gap_ready_ms.csv"]),
|
|
],
|
|
title="CDF of Session Inter-Request Gap: Ready to Ready",
|
|
xlabel="Milliseconds",
|
|
zoom_windows=[
|
|
(0.10, 0.90, "Central 80% (p10-p90)"),
|
|
],
|
|
start_y_at_value=0.0,
|
|
)
|
|
plot_cdf_series_with_zoom_windows(
|
|
advanced_dir / "cdf_session_inter_request_gap_end_ms_zoom80.png",
|
|
[
|
|
("gap_from_prev_end_ms", agentic["gap_cdf_rows"]["cdf_session_inter_request_gap_end_ms.csv"]),
|
|
],
|
|
title="CDF of Session Inter-Request Gap: End to Ready",
|
|
xlabel="Milliseconds",
|
|
zoom_windows=[
|
|
(0.10, 0.90, "Central 80% (p10-p90)"),
|
|
],
|
|
start_y_at_value=0.0,
|
|
)
|
|
plot_cdf_series_with_zoom_windows(
|
|
advanced_dir / "cdf_session_inter_request_gap_ready_ms_zoom90.png",
|
|
[
|
|
("gap_from_prev_ready_ms", agentic["gap_cdf_rows"]["cdf_session_inter_request_gap_ready_ms.csv"]),
|
|
],
|
|
title="CDF of Session Inter-Request Gap: Ready to Ready",
|
|
xlabel="Milliseconds",
|
|
zoom_windows=[
|
|
(0.05, 0.95, "Central 90% (p05-p95)"),
|
|
],
|
|
start_y_at_value=0.0,
|
|
)
|
|
plot_cdf_series_with_zoom_windows(
|
|
advanced_dir / "cdf_session_inter_request_gap_end_ms_zoom90.png",
|
|
[
|
|
("gap_from_prev_end_ms", agentic["gap_cdf_rows"]["cdf_session_inter_request_gap_end_ms.csv"]),
|
|
],
|
|
title="CDF of Session Inter-Request Gap: End to Ready",
|
|
xlabel="Milliseconds",
|
|
zoom_windows=[
|
|
(0.05, 0.95, "Central 90% (p05-p95)"),
|
|
],
|
|
start_y_at_value=0.0,
|
|
)
|
|
plot_fraction_bar_chart(
|
|
advanced_dir / "agentic_request_level_fractions.png",
|
|
agentic["request_fraction_rows"],
|
|
label_key="metric",
|
|
value_key="fraction",
|
|
title="Request-Level Agentic Fractions",
|
|
)
|
|
plot_fraction_bar_chart(
|
|
advanced_dir / "agentic_pair_level_fractions.png",
|
|
agentic["pair_fraction_rows"],
|
|
label_key="metric",
|
|
value_key="fraction",
|
|
title="Pair-Level Agentic Fractions",
|
|
)
|
|
plot_fraction_bar_chart(
|
|
advanced_dir / "agentic_theoretical_source_scope.png",
|
|
agentic["source_scope_rows"],
|
|
label_key="scope",
|
|
value_key="fraction",
|
|
title="Theoretical Prefix Reuse Scope",
|
|
)
|
|
|
|
summary_path = advanced_dir / "agentic_patterns_summary.json"
|
|
with open(summary_path, "w", encoding="utf-8") as handle:
|
|
json.dump(agentic["summary"], handle, ensure_ascii=False, indent=2)
|
|
return agentic["summary"], summary_path
|
|
|
|
|
|
def write_agentic_outputs_from_request_metrics_path(request_metrics_path, advanced_dir):
|
|
with open(request_metrics_path, "r", encoding="utf-8") as handle:
|
|
rows = []
|
|
for row in csv.DictReader(handle):
|
|
rows.append(
|
|
{
|
|
"request_id": row.get("request_id", ""),
|
|
"session_id": row.get("session_id", ""),
|
|
"declared_tool_count": row.get("declared_tool_count", 0),
|
|
"tool_msg_count": row.get("tool_msg_count", 0),
|
|
"input_tokens": row.get("input_tokens", 0),
|
|
"uncached_prompt_tokens": row.get("uncached_prompt_tokens", 0),
|
|
"cache_hit_ratio": row.get("cache_hit_ratio", 0.0),
|
|
"request_ready_time_ms": row.get("request_ready_time_ms", 0),
|
|
"request_end_time_ms": row.get("request_end_time_ms", 0),
|
|
"theoretical_source_request_id": row.get("theoretical_source_request_id", ""),
|
|
"pattern_labels": row.get("pattern_labels", ""),
|
|
}
|
|
)
|
|
return write_agentic_outputs_from_rows(rows, advanced_dir)
|
|
|
|
|
|
def _normalize_request_metric_input_length_row(row):
|
|
return {
|
|
"request_id": row.get("request_id", ""),
|
|
"input_tokens": safe_int(row.get("input_tokens")),
|
|
"theoretical_prompt_unit_length": safe_int(row.get("theoretical_prompt_unit_length")),
|
|
}
|
|
|
|
|
|
def _build_input_length_comparison(request_metric_rows):
|
|
rows = [_normalize_request_metric_input_length_row(row) for row in request_metric_rows]
|
|
provider_values = []
|
|
retokenized_values = []
|
|
delta_values = []
|
|
ratio_values = []
|
|
relative_delta_values = []
|
|
same_count = 0
|
|
retokenized_gt_count = 0
|
|
retokenized_lt_count = 0
|
|
provider_zero_count = 0
|
|
retokenized_zero_count = 0
|
|
|
|
for row in rows:
|
|
provider = row["input_tokens"]
|
|
retokenized = row["theoretical_prompt_unit_length"]
|
|
provider_values.append(provider)
|
|
retokenized_values.append(retokenized)
|
|
delta_values.append(retokenized - provider)
|
|
if provider > 0:
|
|
ratio_values.append(retokenized / provider)
|
|
relative_delta_values.append((retokenized - provider) / provider)
|
|
if provider == retokenized:
|
|
same_count += 1
|
|
elif retokenized > provider:
|
|
retokenized_gt_count += 1
|
|
else:
|
|
retokenized_lt_count += 1
|
|
if provider == 0:
|
|
provider_zero_count += 1
|
|
if retokenized == 0:
|
|
retokenized_zero_count += 1
|
|
|
|
summary = {
|
|
"request_count": len(rows),
|
|
"same_count": same_count,
|
|
"same_fraction": safe_div(same_count, len(rows)),
|
|
"retokenized_gt_provider_count": retokenized_gt_count,
|
|
"retokenized_gt_provider_fraction": safe_div(retokenized_gt_count, len(rows)),
|
|
"retokenized_lt_provider_count": retokenized_lt_count,
|
|
"retokenized_lt_provider_fraction": safe_div(retokenized_lt_count, len(rows)),
|
|
"provider_zero_count": provider_zero_count,
|
|
"retokenized_zero_count": retokenized_zero_count,
|
|
"provider_input_tokens_stats": series_stats(provider_values),
|
|
"retokenized_prompt_tokens_stats": series_stats(retokenized_values),
|
|
"delta_tokens_stats": series_stats(delta_values),
|
|
"ratio_stats": series_stats(ratio_values),
|
|
"relative_delta_vs_provider_stats": series_stats(relative_delta_values),
|
|
}
|
|
cdf_rows = {
|
|
"cdf_retokenized_prompt_tokens.csv": build_cdf(retokenized_values),
|
|
"cdf_input_length_delta_tokens.csv": build_cdf(delta_values),
|
|
"cdf_input_length_ratio_retokenized_over_provider.csv": build_cdf(ratio_values),
|
|
"cdf_input_length_relative_delta_vs_provider.csv": build_cdf(relative_delta_values),
|
|
}
|
|
provider_cdf_rows = build_cdf(provider_values)
|
|
return summary, cdf_rows, provider_cdf_rows
|
|
|
|
|
|
def write_input_length_comparison_from_rows(request_metric_rows, advanced_dir):
|
|
advanced_dir = ensure_output_dir(advanced_dir)
|
|
summary, cdf_rows, provider_cdf_rows = _build_input_length_comparison(request_metric_rows)
|
|
for filename, rows in cdf_rows.items():
|
|
write_csv(advanced_dir / filename, rows)
|
|
|
|
plot_cdf_series(
|
|
advanced_dir / "cdf_input_length_provider_vs_retokenized.png",
|
|
[
|
|
("provider_input_tokens", provider_cdf_rows),
|
|
("retokenized_prompt_tokens", cdf_rows["cdf_retokenized_prompt_tokens.csv"]),
|
|
],
|
|
title="CDF of Provider vs Retokenized Input Length",
|
|
xlabel="Tokens",
|
|
)
|
|
plot_cdf_series(
|
|
advanced_dir / "cdf_input_length_delta_tokens.png",
|
|
[("retokenized_minus_provider", cdf_rows["cdf_input_length_delta_tokens.csv"])],
|
|
title="CDF of Retokenized - Provider Input Length",
|
|
xlabel="Tokens",
|
|
)
|
|
plot_cdf_series(
|
|
advanced_dir / "cdf_input_length_ratio.png",
|
|
[("retokenized_over_provider", cdf_rows["cdf_input_length_ratio_retokenized_over_provider.csv"])],
|
|
title="CDF of Retokenized / Provider Input Length",
|
|
xlabel="Ratio",
|
|
)
|
|
|
|
summary_path = advanced_dir / "input_length_comparison_summary.json"
|
|
with open(summary_path, "w", encoding="utf-8") as handle:
|
|
json.dump(summary, handle, ensure_ascii=False, indent=2)
|
|
return summary, summary_path
|
|
|
|
|
|
def write_input_length_comparison_from_request_metrics_path(request_metrics_path, advanced_dir):
|
|
with open(request_metrics_path, "r", encoding="utf-8") as handle:
|
|
rows = list(csv.DictReader(handle))
|
|
return write_input_length_comparison_from_rows(rows, advanced_dir)
|
|
|
|
|
|
DEFAULT_INPUT_LENGTH_BUCKET_THRESHOLDS = [
|
|
32 * 1024,
|
|
85 * 1024,
|
|
128 * 1024,
|
|
]
|
|
|
|
|
|
def parse_input_length_bucket_thresholds(spec):
|
|
if spec is None:
|
|
return list(DEFAULT_INPUT_LENGTH_BUCKET_THRESHOLDS)
|
|
thresholds = []
|
|
for chunk in str(spec).split(";"):
|
|
text = chunk.strip()
|
|
if not text:
|
|
continue
|
|
normalized = text.lower()
|
|
multiplier = 1
|
|
if normalized.endswith("ki"):
|
|
multiplier = 1024
|
|
normalized = normalized[:-2]
|
|
elif normalized.endswith("mi"):
|
|
multiplier = 1024 * 1024
|
|
normalized = normalized[:-2]
|
|
try:
|
|
value = int(normalized) * multiplier
|
|
except ValueError as exc:
|
|
raise ValueError(
|
|
f"Invalid input-length bucket threshold `{text}`. "
|
|
"Use semicolon-separated token counts such as `32768;87040;131072` or `32Ki;85Ki;128Ki`."
|
|
) from exc
|
|
if value <= 0:
|
|
raise ValueError("Input-length bucket thresholds must be positive integers.")
|
|
thresholds.append(value)
|
|
if not thresholds:
|
|
raise ValueError("At least one input-length bucket threshold is required.")
|
|
return sorted(set(thresholds))
|
|
|
|
|
|
def _format_bucket_boundary(value):
|
|
if value == 0:
|
|
return "0"
|
|
if value % (1024 * 1024) == 0:
|
|
return f"{value // (1024 * 1024)}Mi"
|
|
if value % 1024 == 0:
|
|
return f"{value // 1024}Ki"
|
|
return str(value)
|
|
|
|
|
|
def build_input_length_bucket_defs(thresholds=None):
|
|
parsed_thresholds = (
|
|
list(DEFAULT_INPUT_LENGTH_BUCKET_THRESHOLDS)
|
|
if thresholds is None
|
|
else sorted(set(int(value) for value in thresholds))
|
|
)
|
|
if not parsed_thresholds:
|
|
raise ValueError("At least one input-length bucket threshold is required.")
|
|
if any(value <= 0 for value in parsed_thresholds):
|
|
raise ValueError("Input-length bucket thresholds must be positive integers.")
|
|
if parsed_thresholds == DEFAULT_INPUT_LENGTH_BUCKET_THRESHOLDS:
|
|
return [
|
|
("0-32Ki", 0, 32 * 1024),
|
|
("32-85Ki", 32 * 1024, 85 * 1024),
|
|
("85-128Ki", 85 * 1024, 128 * 1024),
|
|
("128Ki+", 128 * 1024, None),
|
|
]
|
|
bucket_defs = []
|
|
lower_bound = 0
|
|
for upper_bound in parsed_thresholds:
|
|
bucket_defs.append(
|
|
(
|
|
f"{_format_bucket_boundary(lower_bound)}-{_format_bucket_boundary(upper_bound)}",
|
|
lower_bound,
|
|
upper_bound,
|
|
)
|
|
)
|
|
lower_bound = upper_bound
|
|
bucket_defs.append((f"{_format_bucket_boundary(lower_bound)}+", lower_bound, None))
|
|
return bucket_defs
|
|
|
|
|
|
def _rows_in_input_bucket(request_metric_rows, lower_bound, upper_bound):
|
|
return [
|
|
row
|
|
for row in request_metric_rows
|
|
if row["input_tokens"] >= lower_bound and (upper_bound is None or row["input_tokens"] < upper_bound)
|
|
]
|
|
|
|
|
|
def assign_input_length_bucket(input_tokens, bucket_defs=None):
|
|
bucket_defs = bucket_defs or build_input_length_bucket_defs()
|
|
for bucket_label, lower_bound, upper_bound in bucket_defs:
|
|
if input_tokens >= lower_bound and (upper_bound is None or input_tokens < upper_bound):
|
|
return bucket_label
|
|
return bucket_defs[-1][0]
|
|
|
|
|
|
def summarize_cache_reuse_by_input_length_bucket(request_metric_rows, bucket_defs=None):
|
|
bucket_defs = bucket_defs or build_input_length_bucket_defs()
|
|
total_requests = len(request_metric_rows)
|
|
bucket_rows = []
|
|
for bucket_label, lower_bound, upper_bound in bucket_defs:
|
|
matched_rows = _rows_in_input_bucket(request_metric_rows, lower_bound, upper_bound)
|
|
input_token_values = [row["input_tokens"] for row in matched_rows]
|
|
retokenized_values = [row["theoretical_prompt_unit_length"] for row in matched_rows]
|
|
actual_ratio_values = [row["cache_hit_ratio"] for row in matched_rows]
|
|
theoretical_ratio_values = [row["theoretical_prefix_hit_ratio"] for row in matched_rows]
|
|
bucketed_theoretical_ratio_values = [
|
|
row.get("bucketed_theoretical_prefix_hit_ratio", row["theoretical_prefix_hit_ratio"])
|
|
for row in matched_rows
|
|
]
|
|
input_token_sum = sum(input_token_values)
|
|
retokenized_sum = sum(retokenized_values)
|
|
cached_token_sum = sum(row["cached_tokens"] for row in matched_rows)
|
|
theoretical_hit_sum = sum(row["theoretical_prefix_hit_units"] for row in matched_rows)
|
|
bucketed_theoretical_hit_sum = sum(
|
|
row.get("bucketed_theoretical_prefix_hit_units", row["theoretical_prefix_hit_units"])
|
|
for row in matched_rows
|
|
)
|
|
bucket_rows.append(
|
|
{
|
|
"bucket": bucket_label,
|
|
"input_tokens_min_inclusive": lower_bound,
|
|
"input_tokens_max_exclusive": upper_bound if upper_bound is not None else "",
|
|
"request_count": len(matched_rows),
|
|
"request_fraction": safe_div(len(matched_rows), total_requests),
|
|
"provider_input_tokens_mean": series_stats(input_token_values)["mean"],
|
|
"provider_input_tokens_median": series_stats(input_token_values)["median"],
|
|
"provider_input_tokens_p90": series_stats(input_token_values)["p90"],
|
|
"retokenized_prompt_tokens_mean": series_stats(retokenized_values)["mean"],
|
|
"retokenized_prompt_tokens_median": series_stats(retokenized_values)["median"],
|
|
"retokenized_prompt_tokens_p90": series_stats(retokenized_values)["p90"],
|
|
"actual_cache_hit_ratio_mean": series_stats(actual_ratio_values)["mean"],
|
|
"actual_cache_hit_ratio_median": series_stats(actual_ratio_values)["median"],
|
|
"actual_cache_hit_ratio_p90": series_stats(actual_ratio_values)["p90"],
|
|
"theoretical_cache_hit_ratio_mean": series_stats(theoretical_ratio_values)["mean"],
|
|
"theoretical_cache_hit_ratio_median": series_stats(theoretical_ratio_values)["median"],
|
|
"theoretical_cache_hit_ratio_p90": series_stats(theoretical_ratio_values)["p90"],
|
|
"bucketed_theoretical_cache_hit_ratio_mean": series_stats(bucketed_theoretical_ratio_values)["mean"],
|
|
"bucketed_theoretical_cache_hit_ratio_median": series_stats(bucketed_theoretical_ratio_values)[
|
|
"median"
|
|
],
|
|
"bucketed_theoretical_cache_hit_ratio_p90": series_stats(bucketed_theoretical_ratio_values)["p90"],
|
|
"weighted_actual_cache_hit_ratio": safe_div(cached_token_sum, input_token_sum),
|
|
"weighted_theoretical_cache_hit_ratio": safe_div(theoretical_hit_sum, retokenized_sum),
|
|
"weighted_bucketed_theoretical_cache_hit_ratio": safe_div(
|
|
bucketed_theoretical_hit_sum, retokenized_sum
|
|
),
|
|
"weighted_bucket_boundary_loss_ratio": safe_div(
|
|
max(theoretical_hit_sum - bucketed_theoretical_hit_sum, 0),
|
|
retokenized_sum,
|
|
),
|
|
"actual_reused_request_fraction": safe_div(
|
|
sum(row["cached_tokens"] > 0 for row in matched_rows),
|
|
len(matched_rows),
|
|
),
|
|
"theoretical_reused_request_fraction": safe_div(
|
|
sum(row["theoretical_prefix_hit_units"] > 0 for row in matched_rows),
|
|
len(matched_rows),
|
|
),
|
|
"bucketed_theoretical_reused_request_fraction": safe_div(
|
|
sum(
|
|
row.get("bucketed_theoretical_prefix_hit_units", row["theoretical_prefix_hit_units"]) > 0
|
|
for row in matched_rows
|
|
),
|
|
len(matched_rows),
|
|
),
|
|
"actual_cached_tokens_sum": cached_token_sum,
|
|
"theoretical_hit_units_sum": theoretical_hit_sum,
|
|
"bucketed_theoretical_hit_units_sum": bucketed_theoretical_hit_sum,
|
|
}
|
|
)
|
|
|
|
summary = {
|
|
"bucket_definition": {
|
|
"unit": "tokens",
|
|
"k_definition": 1024,
|
|
"buckets": [
|
|
{
|
|
"bucket": bucket_label,
|
|
"input_tokens_min_inclusive": lower_bound,
|
|
"input_tokens_max_exclusive": upper_bound,
|
|
}
|
|
for bucket_label, lower_bound, upper_bound in bucket_defs
|
|
],
|
|
},
|
|
"request_count": total_requests,
|
|
"bucket_rows": bucket_rows,
|
|
}
|
|
return summary, bucket_rows
|
|
|
|
|
|
def write_cache_reuse_by_input_length_bucket_from_rows(request_metric_rows, advanced_dir, bucket_defs=None):
|
|
advanced_dir = ensure_output_dir(advanced_dir)
|
|
summary, bucket_rows = summarize_cache_reuse_by_input_length_bucket(
|
|
request_metric_rows,
|
|
bucket_defs=bucket_defs,
|
|
)
|
|
csv_path = write_csv(advanced_dir / "input_length_bucket_cache_reuse.csv", bucket_rows)
|
|
plot_path = plot_grouped_bar_chart(
|
|
advanced_dir / "input_length_bucket_cache_reuse.png",
|
|
bucket_rows,
|
|
label_key="bucket",
|
|
series_keys=[
|
|
("weighted_actual_cache_hit_ratio", "actual_weighted_hit_ratio"),
|
|
("weighted_theoretical_cache_hit_ratio", "theoretical_weighted_hit_ratio"),
|
|
("weighted_bucketed_theoretical_cache_hit_ratio", "bucketed_theoretical_weighted_hit_ratio"),
|
|
],
|
|
title="Weighted Cache Hit Ratio by Provider Input-Length Bucket",
|
|
xlabel="Input-length bucket",
|
|
ylabel="Weighted hit ratio",
|
|
)
|
|
summary_path = advanced_dir / "input_length_bucket_cache_reuse_summary.json"
|
|
with open(summary_path, "w", encoding="utf-8") as handle:
|
|
json.dump(summary, handle, ensure_ascii=False, indent=2)
|
|
return summary, summary_path, csv_path, plot_path
|
|
|
|
|
|
def write_cache_reuse_by_input_length_bucket_from_request_metrics_path(
|
|
request_metrics_path,
|
|
advanced_dir,
|
|
bucket_defs=None,
|
|
):
|
|
with open(request_metrics_path, "r", encoding="utf-8") as handle:
|
|
rows = []
|
|
for row in csv.DictReader(handle):
|
|
rows.append(
|
|
{
|
|
"input_tokens": safe_int(row.get("input_tokens")),
|
|
"cached_tokens": safe_int(row.get("cached_tokens")),
|
|
"cache_hit_ratio": safe_float(row.get("cache_hit_ratio")),
|
|
"theoretical_prompt_unit_length": safe_int(row.get("theoretical_prompt_unit_length")),
|
|
"theoretical_prefix_hit_units": safe_int(row.get("theoretical_prefix_hit_units")),
|
|
"theoretical_prefix_hit_ratio": safe_float(row.get("theoretical_prefix_hit_ratio")),
|
|
"bucketed_theoretical_prefix_hit_units": safe_int(
|
|
row.get("bucketed_theoretical_prefix_hit_units", row.get("theoretical_prefix_hit_units"))
|
|
),
|
|
"bucketed_theoretical_prefix_hit_ratio": safe_float(
|
|
row.get("bucketed_theoretical_prefix_hit_ratio", row.get("theoretical_prefix_hit_ratio"))
|
|
),
|
|
}
|
|
)
|
|
return write_cache_reuse_by_input_length_bucket_from_rows(rows, advanced_dir, bucket_defs=bucket_defs)
|
|
|
|
|
|
def summarize_session_bucket_boundary_miss(parent_child_rows, bucket_defs=None):
|
|
bucket_defs = bucket_defs or build_input_length_bucket_defs()
|
|
normalized_rows = []
|
|
for row in parent_child_rows:
|
|
child_input_tokens = safe_int(row.get("child_input_tokens"))
|
|
child_bucket = row.get("child_bucket") or assign_input_length_bucket(child_input_tokens, bucket_defs)
|
|
normalized_rows.append(
|
|
{
|
|
"session_id": row.get("session_id", ""),
|
|
"parent_request_id": row.get("parent_request_id", ""),
|
|
"child_request_id": row.get("child_request_id", ""),
|
|
"parent_bucket": row.get("parent_bucket", ""),
|
|
"child_bucket": child_bucket,
|
|
"shared_prefix_units": safe_int(row.get("shared_prefix_units")),
|
|
"is_cross_bucket": safe_int(row.get("is_cross_bucket")),
|
|
}
|
|
)
|
|
|
|
total_edge_count = len(normalized_rows)
|
|
total_reusable_edge_count = sum(row["shared_prefix_units"] > 0 for row in normalized_rows)
|
|
total_cross_bucket_edge_count = sum(row["is_cross_bucket"] for row in normalized_rows)
|
|
total_shared_prefix_units = sum(row["shared_prefix_units"] for row in normalized_rows)
|
|
total_cross_bucket_shared_prefix_units = sum(
|
|
row["shared_prefix_units"] for row in normalized_rows if row["is_cross_bucket"]
|
|
)
|
|
|
|
bucket_rows = []
|
|
for bucket_label, _, _ in bucket_defs:
|
|
matched_rows = [row for row in normalized_rows if row["child_bucket"] == bucket_label]
|
|
shared_prefix_units = sum(row["shared_prefix_units"] for row in matched_rows)
|
|
cross_bucket_shared_prefix_units = sum(
|
|
row["shared_prefix_units"] for row in matched_rows if row["is_cross_bucket"]
|
|
)
|
|
reusable_edge_count = sum(row["shared_prefix_units"] > 0 for row in matched_rows)
|
|
cross_bucket_edge_count = sum(row["is_cross_bucket"] for row in matched_rows)
|
|
bucket_rows.append(
|
|
{
|
|
"bucket": bucket_label,
|
|
"edge_count": len(matched_rows),
|
|
"edge_fraction": safe_div(len(matched_rows), total_edge_count),
|
|
"reusable_edge_count": reusable_edge_count,
|
|
"cross_bucket_edge_count": cross_bucket_edge_count,
|
|
"cross_bucket_edge_fraction": safe_div(cross_bucket_edge_count, len(matched_rows)),
|
|
"shared_prefix_units_sum": shared_prefix_units,
|
|
"cross_bucket_shared_prefix_units_sum": cross_bucket_shared_prefix_units,
|
|
"cross_bucket_shared_prefix_unit_fraction": safe_div(
|
|
cross_bucket_shared_prefix_units, shared_prefix_units
|
|
),
|
|
}
|
|
)
|
|
|
|
summary = {
|
|
"edge_count": total_edge_count,
|
|
"reusable_edge_count": total_reusable_edge_count,
|
|
"cross_bucket_edge_count": total_cross_bucket_edge_count,
|
|
"cross_bucket_edge_fraction": safe_div(total_cross_bucket_edge_count, total_edge_count),
|
|
"shared_prefix_units_sum": total_shared_prefix_units,
|
|
"cross_bucket_shared_prefix_units_sum": total_cross_bucket_shared_prefix_units,
|
|
"cross_bucket_shared_prefix_unit_fraction": safe_div(
|
|
total_cross_bucket_shared_prefix_units, total_shared_prefix_units
|
|
),
|
|
"bucket_rows": bucket_rows,
|
|
}
|
|
return summary, bucket_rows
|
|
|
|
|
|
def write_session_bucket_boundary_miss_from_rows(parent_child_rows, advanced_dir, bucket_defs=None):
|
|
advanced_dir = ensure_output_dir(advanced_dir)
|
|
summary, bucket_rows = summarize_session_bucket_boundary_miss(parent_child_rows, bucket_defs=bucket_defs)
|
|
csv_path = write_csv(advanced_dir / "session_bucket_boundary_miss.csv", bucket_rows)
|
|
plot_path = plot_grouped_bar_chart(
|
|
advanced_dir / "session_bucket_boundary_miss.png",
|
|
bucket_rows,
|
|
label_key="bucket",
|
|
series_keys=[
|
|
("cross_bucket_edge_fraction", "cross_bucket_edge_fraction"),
|
|
("cross_bucket_shared_prefix_unit_fraction", "cross_bucket_shared_prefix_unit_fraction"),
|
|
],
|
|
title="Session Bucket Boundary Miss by Child Input-Length Bucket",
|
|
xlabel="Child input-length bucket",
|
|
ylabel="Fraction",
|
|
)
|
|
summary_path = advanced_dir / "session_bucket_boundary_miss_summary.json"
|
|
with open(summary_path, "w", encoding="utf-8") as handle:
|
|
json.dump(summary, handle, ensure_ascii=False, indent=2)
|
|
return summary, summary_path, csv_path, plot_path
|
|
|
|
|
|
def build_alive_block_timeline(block_rows):
|
|
events = Counter()
|
|
for row in block_rows:
|
|
start_ms = safe_int(row.get("first_seen_ms"))
|
|
end_ms = safe_int(row.get("span_end_ms", row.get("last_reuse_ms", row.get("first_seen_ms"))))
|
|
if start_ms <= 0 and end_ms <= 0:
|
|
continue
|
|
if end_ms < start_ms:
|
|
end_ms = start_ms
|
|
events[start_ms] += 1
|
|
events[end_ms + 1] -= 1
|
|
|
|
alive_rows = []
|
|
alive_count = 0
|
|
peak_alive_blocks = 0
|
|
for timestamp_ms in sorted(events):
|
|
alive_count += events[timestamp_ms]
|
|
peak_alive_blocks = max(peak_alive_blocks, alive_count)
|
|
alive_rows.append(
|
|
{
|
|
"timestamp_ms": timestamp_ms,
|
|
"delta_alive_blocks": events[timestamp_ms],
|
|
"alive_block_count": alive_count,
|
|
}
|
|
)
|
|
|
|
summary = {
|
|
"event_count": len(alive_rows),
|
|
"peak_alive_blocks": peak_alive_blocks,
|
|
"first_timestamp_ms": alive_rows[0]["timestamp_ms"] if alive_rows else 0,
|
|
"last_timestamp_ms": alive_rows[-1]["timestamp_ms"] if alive_rows else 0,
|
|
}
|
|
return summary, alive_rows
|
|
|
|
|
|
def _normalize_source_payload(row):
|
|
meta = row.get("meta", {}) if isinstance(row.get("meta", {}), dict) else {}
|
|
usage = row.get("usage", {}) if isinstance(row.get("usage", {}), dict) else {}
|
|
messages = row.get("messages")
|
|
if not isinstance(messages, list):
|
|
messages = row.get("message_events", [])
|
|
role_sequence = row.get("role_sequence", [])
|
|
if not role_sequence and isinstance(messages, list):
|
|
role_sequence = [str(message.get("role", "")) for message in messages if isinstance(message, dict)]
|
|
declared_tools = row.get("declared_tools", [])
|
|
return {
|
|
"meta": meta,
|
|
"usage": usage,
|
|
"canonical_prompt": str(row.get("canonical_prompt", "")),
|
|
"declared_tools": [tool for tool in declared_tools if isinstance(tool, dict)],
|
|
"messages": [message for message in messages if isinstance(message, dict)],
|
|
"role_sequence": [str(role) for role in role_sequence],
|
|
}
|
|
|
|
|
|
def _iter_source_message_minimal(source_path):
|
|
with open(source_path, "r", encoding="utf-8") as handle:
|
|
for line in handle:
|
|
payload = _normalize_source_payload(json.loads(line))
|
|
meta = payload["meta"]
|
|
yield {
|
|
"request_id": meta["request_id"],
|
|
"session_id": meta["session_id"],
|
|
"request_ready_time_ms": safe_int(meta.get("request_ready_time_ms")),
|
|
"request_end_time_ms": safe_int(meta.get("request_end_time_ms")),
|
|
"messages": payload["messages"],
|
|
"role_sequence": payload["role_sequence"],
|
|
}
|
|
|
|
|
|
def _message_signature(message):
|
|
return message.get("role", "")
|
|
|
|
|
|
def _common_prefix_message_count(previous_messages, current_messages):
|
|
count = 0
|
|
for previous, current in zip(previous_messages, current_messages):
|
|
if _message_signature(previous) != _message_signature(current):
|
|
break
|
|
count += 1
|
|
return count
|
|
|
|
|
|
def _classify_transition(previous_messages, current_messages):
|
|
common_prefix_count = _common_prefix_message_count(previous_messages, current_messages)
|
|
appended_messages = current_messages[common_prefix_count:]
|
|
appended_roles = [message.get("role", "unknown") for message in appended_messages]
|
|
first_new_role = appended_roles[0] if appended_roles else ""
|
|
|
|
if not appended_messages:
|
|
if len(current_messages) < len(previous_messages):
|
|
return {
|
|
"common_prefix_message_count": common_prefix_count,
|
|
"appended_message_count": 0,
|
|
"appended_roles": "",
|
|
"first_new_role": "",
|
|
"trigger_group": "compaction",
|
|
"trigger_detail": "context_shrunk_without_append",
|
|
"appended_text_len": 0,
|
|
}
|
|
return {
|
|
"common_prefix_message_count": common_prefix_count,
|
|
"appended_message_count": 0,
|
|
"appended_roles": "",
|
|
"first_new_role": "",
|
|
"trigger_group": "no_change",
|
|
"trigger_detail": "no_new_messages",
|
|
"appended_text_len": 0,
|
|
}
|
|
|
|
appended_text_len = sum(safe_int(message.get("text_len")) for message in appended_messages)
|
|
appended_roles_head = ";".join(appended_roles[:8])
|
|
if len(appended_roles) > 8:
|
|
appended_roles_head += ";..."
|
|
trigger_group = "unknown"
|
|
trigger_detail = "unknown"
|
|
|
|
if first_new_role == "user":
|
|
trigger_group = "user"
|
|
trigger_detail = "user_first"
|
|
elif first_new_role == "tool":
|
|
trigger_group = "tool"
|
|
trigger_detail = "tool_first"
|
|
elif first_new_role == "assistant":
|
|
roles_after_first = appended_roles[1:]
|
|
if "tool" in roles_after_first and ("user" not in roles_after_first or roles_after_first.index("tool") < roles_after_first.index("user")):
|
|
trigger_group = "tool"
|
|
trigger_detail = "assistant_then_tool"
|
|
elif "user" in roles_after_first:
|
|
trigger_group = "user"
|
|
trigger_detail = "assistant_then_user"
|
|
else:
|
|
trigger_group = "assistant"
|
|
trigger_detail = "assistant_first"
|
|
elif first_new_role == "system":
|
|
trigger_group = "system"
|
|
trigger_detail = "system_first"
|
|
else:
|
|
trigger_group = first_new_role or "unknown"
|
|
trigger_detail = f"{trigger_group}_first" if first_new_role else "unknown"
|
|
|
|
return {
|
|
"common_prefix_message_count": common_prefix_count,
|
|
"appended_message_count": len(appended_messages),
|
|
"appended_roles": appended_roles_head,
|
|
"first_new_role": first_new_role,
|
|
"trigger_group": trigger_group,
|
|
"trigger_detail": trigger_detail,
|
|
"appended_text_len": appended_text_len,
|
|
}
|
|
|
|
|
|
def build_transition_markdown_section(transition_summary):
|
|
if not transition_summary:
|
|
return ""
|
|
lines = [
|
|
"## Session Transition Analysis",
|
|
"- This section analyzes each `prev_request -> next_request` transition inside a session.",
|
|
f"- Transition count: {transition_summary['transition_count']}",
|
|
f"- Negative signed-delta transitions: {transition_summary['negative_delta_count']} ({transition_summary['negative_delta_fraction']:.4f})",
|
|
f"- Trigger proportions: {json.dumps(transition_summary['trigger_group_stats'], ensure_ascii=False)}",
|
|
f"- Source scope proportions: {json.dumps(transition_summary['source_scope_stats'], ensure_ascii=False)}",
|
|
f"- Signed context delta ratio stats vs current context: {json.dumps(transition_summary['delta_fraction_current_stats'], ensure_ascii=False)}",
|
|
f"- Absolute context delta ratio stats vs current context: {json.dumps(transition_summary['abs_delta_fraction_current_stats'], ensure_ascii=False)}",
|
|
f"- Uncached prompt fraction stats vs current context: {json.dumps(transition_summary['uncached_fraction_current_stats'], ensure_ascii=False)}",
|
|
f"- Source gap stats (ms): {json.dumps(transition_summary['source_gap_ms_stats'], ensure_ascii=False)}",
|
|
"",
|
|
"Trigger groups:",
|
|
"| trigger_group | count | fraction | negative_delta_fraction | p50_abs_delta_fraction_of_current | p90_abs_delta_fraction_of_current | p50_uncached_fraction_of_current | p90_uncached_fraction_of_current |",
|
|
"| --- | --- | --- | --- | --- | --- | --- | --- |",
|
|
]
|
|
for row in transition_summary["trigger_group_rows"]:
|
|
lines.append(
|
|
f"| {row['trigger_group']} | {row['count']} | {row['fraction']:.4f} | "
|
|
f"{row['negative_delta_fraction']:.4f} | "
|
|
f"{row['p50_abs_delta_fraction_of_current']:.4f} | {row['p90_abs_delta_fraction_of_current']:.4f} | "
|
|
f"{row['p50_uncached_fraction_of_current']:.4f} | {row['p90_uncached_fraction_of_current']:.4f} |"
|
|
)
|
|
|
|
lines.extend(
|
|
[
|
|
"",
|
|
"KV-cache source scope:",
|
|
"| source_scope | count | fraction | negative_delta_fraction | p50_source_gap_ms | p90_source_gap_ms |",
|
|
"| --- | --- | --- | --- | --- | --- |",
|
|
]
|
|
)
|
|
for row in transition_summary["source_scope_rows"]:
|
|
lines.append(
|
|
f"| {row['source_scope']} | {row['count']} | {row['fraction']:.4f} | "
|
|
f"{row['negative_delta_fraction']:.4f} | "
|
|
f"{row['p50_source_gap_ms']:.1f} | {row['p90_source_gap_ms']:.1f} |"
|
|
)
|
|
return "\n".join(lines)
|
|
|
|
|
|
def build_retokenized_transition_markdown_section(retokenized_transition_summary, provider_transition_summary=None):
|
|
if not retokenized_transition_summary:
|
|
return ""
|
|
lines = [
|
|
"## Retokenized Transition Length Analysis",
|
|
"- This section recomputes session transition length change using analyzer-retokenized `canonical_prompt` length (`theoretical_prompt_unit_length`) instead of provider `usage.input_tokens`.",
|
|
f"- Transition count: {retokenized_transition_summary['transition_count']}",
|
|
f"- Negative signed-delta transitions: {retokenized_transition_summary['negative_delta_count']} ({retokenized_transition_summary['negative_delta_fraction']:.4f})",
|
|
f"- Signed context delta ratio stats vs current retokenized context: {json.dumps(retokenized_transition_summary['delta_fraction_current_stats'], ensure_ascii=False)}",
|
|
f"- Absolute context delta ratio stats vs current retokenized context: {json.dumps(retokenized_transition_summary['abs_delta_fraction_current_stats'], ensure_ascii=False)}",
|
|
f"- Delta token stats (`next_retokenized - prev_retokenized`): {json.dumps(retokenized_transition_summary['delta_tokens_stats'], ensure_ascii=False)}",
|
|
f"- Absolute delta token stats: {json.dumps(retokenized_transition_summary['abs_delta_tokens_stats'], ensure_ascii=False)}",
|
|
]
|
|
if provider_transition_summary:
|
|
lines.extend(
|
|
[
|
|
f"- Provider-length negative signed-delta fraction: {provider_transition_summary['negative_delta_fraction']:.4f}",
|
|
f"- Retokenized-length negative signed-delta fraction: {retokenized_transition_summary['negative_delta_fraction']:.4f}",
|
|
]
|
|
)
|
|
|
|
lines.extend(
|
|
[
|
|
"",
|
|
"Trigger groups:",
|
|
"| trigger_group | count | fraction | negative_delta_fraction | p10_signed_delta_fraction_of_current | p50_signed_delta_fraction_of_current | p90_signed_delta_fraction_of_current | p95_signed_delta_fraction_of_current | p50_abs_delta_fraction_of_current | p90_abs_delta_fraction_of_current |",
|
|
"| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |",
|
|
]
|
|
)
|
|
for row in retokenized_transition_summary["trigger_group_rows"]:
|
|
lines.append(
|
|
f"| {row['trigger_group']} | {row['count']} | {row['fraction']:.4f} | {row['negative_delta_fraction']:.4f} | "
|
|
f"{row['p10_signed_delta_fraction_of_current']:.4f} | {row['p50_signed_delta_fraction_of_current']:.4f} | "
|
|
f"{row['p90_signed_delta_fraction_of_current']:.4f} | {row['p95_signed_delta_fraction_of_current']:.4f} | "
|
|
f"{row['p50_abs_delta_fraction_of_current']:.4f} | {row['p90_abs_delta_fraction_of_current']:.4f} |"
|
|
)
|
|
return "\n".join(lines)
|
|
|
|
|
|
def _load_request_metric_lookup(request_metrics_path):
|
|
lookup = {}
|
|
with open(request_metrics_path, "r", encoding="utf-8") as handle:
|
|
for row in csv.DictReader(handle):
|
|
lookup[row.get("request_id", "")] = {
|
|
"request_id": row.get("request_id", ""),
|
|
"session_id": row.get("session_id", ""),
|
|
"assistant_msg_count": safe_int(row.get("assistant_msg_count")),
|
|
"tool_msg_count": safe_int(row.get("tool_msg_count")),
|
|
"user_msg_count": safe_int(row.get("user_msg_count")),
|
|
"system_msg_count": safe_int(row.get("system_msg_count")),
|
|
"declared_tool_count": safe_int(row.get("declared_tool_count")),
|
|
"input_tokens": safe_int(row.get("input_tokens")),
|
|
"theoretical_prompt_unit_length": safe_int(row.get("theoretical_prompt_unit_length")),
|
|
"cache_hit_ratio": safe_float(row.get("cache_hit_ratio")),
|
|
"uncached_prompt_tokens": safe_int(row.get("uncached_prompt_tokens")),
|
|
}
|
|
return lookup
|
|
|
|
|
|
def _load_retokenized_transition_lookup(path):
|
|
lookup = {}
|
|
with open(path, "r", encoding="utf-8") as handle:
|
|
for row in csv.DictReader(handle):
|
|
lookup[(row.get("prev_request_id", ""), row.get("next_request_id", ""))] = {
|
|
"delta_retokenized_prompt_tokens": safe_int(row.get("delta_retokenized_prompt_tokens")),
|
|
"delta_retokenized_fraction_of_current": safe_float(
|
|
row.get("delta_retokenized_fraction_of_current")
|
|
),
|
|
"prev_retokenized_prompt_tokens": safe_int(row.get("prev_retokenized_prompt_tokens")),
|
|
"next_retokenized_prompt_tokens": safe_int(row.get("next_retokenized_prompt_tokens")),
|
|
}
|
|
return lookup
|
|
|
|
|
|
def _classify_context_change_mechanism(row):
|
|
trigger_group = row["trigger_group"]
|
|
prev_count = row["prev_message_count"]
|
|
next_count = row["next_message_count"]
|
|
common_prefix = row["common_prefix_message_count"]
|
|
|
|
if trigger_group == "compaction":
|
|
return "compaction"
|
|
|
|
if trigger_group == "no_change" and row["appended_message_count"] == 0:
|
|
return "no_change_role_stable"
|
|
|
|
hard_reset_next_threshold = max(4, int(prev_count * 0.20))
|
|
hard_reset_prefix_threshold = max(2, int(prev_count * 0.10))
|
|
if next_count <= hard_reset_next_threshold and common_prefix <= hard_reset_prefix_threshold:
|
|
return f"{trigger_group}_hard_reset"
|
|
if next_count < prev_count:
|
|
return f"{trigger_group}_history_trimmed"
|
|
return f"{trigger_group}_context_rebuilt_shorter"
|
|
|
|
|
|
def _message_slice_summary(messages, offset=0, top_n=5):
|
|
role_counts = Counter()
|
|
role_text_lens = Counter()
|
|
total_text_len = 0
|
|
items = []
|
|
for index, message in enumerate(messages):
|
|
role = message.get("role", "unknown")
|
|
text_len = safe_int(message.get("text_len"))
|
|
role_counts[role] += 1
|
|
role_text_lens[role] += text_len
|
|
total_text_len += text_len
|
|
items.append(
|
|
{
|
|
"index": offset + index,
|
|
"role": role,
|
|
"text_len": text_len,
|
|
"content_type": message.get("content_type", ""),
|
|
"item_count": safe_int(message.get("item_count")),
|
|
"has_cache_control": bool(message.get("has_cache_control")),
|
|
}
|
|
)
|
|
items.sort(key=lambda item: item["text_len"], reverse=True)
|
|
return {
|
|
"count": len(messages),
|
|
"total_text_len": total_text_len,
|
|
"role_counts": dict(role_counts),
|
|
"role_text_lens": dict(role_text_lens),
|
|
"top_messages": items[:top_n],
|
|
}
|
|
|
|
|
|
def _format_counter(counter_dict):
|
|
if not counter_dict:
|
|
return "-"
|
|
items = sorted(counter_dict.items(), key=lambda item: (-item[1], item[0]))
|
|
return ", ".join(f"{key}={value}" for key, value in items)
|
|
|
|
|
|
def _format_top_messages(items):
|
|
if not items:
|
|
return "-"
|
|
parts = []
|
|
for item in items:
|
|
cache_tag = " cache" if item["has_cache_control"] else ""
|
|
parts.append(
|
|
f"#{item['index']}:{item['role']} len={item['text_len']} "
|
|
f"type={item['content_type']} items={item['item_count']}{cache_tag}"
|
|
)
|
|
return " | ".join(parts)
|
|
|
|
|
|
def _load_source_payloads(source_path, request_ids):
|
|
source_path = Path(source_path)
|
|
wanted_request_ids = {request_id for request_id in request_ids if request_id}
|
|
payloads = {}
|
|
if not wanted_request_ids:
|
|
return payloads
|
|
|
|
try:
|
|
cmd = ["rg", "-F", "--no-heading"]
|
|
for request_id in sorted(wanted_request_ids):
|
|
cmd.extend(["-e", request_id])
|
|
cmd.append(str(source_path))
|
|
result = subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
encoding="utf-8",
|
|
check=False,
|
|
)
|
|
if result.returncode in {0, 1}:
|
|
for line in result.stdout.splitlines():
|
|
payload = _normalize_source_payload(json.loads(line))
|
|
request_id = payload["meta"].get("request_id", "")
|
|
if request_id in wanted_request_ids:
|
|
payloads[request_id] = payload
|
|
except Exception:
|
|
pass
|
|
|
|
if len(payloads) >= len(wanted_request_ids):
|
|
return payloads
|
|
|
|
with open(source_path, "r", encoding="utf-8") as handle:
|
|
for line in handle:
|
|
payload = _normalize_source_payload(json.loads(line))
|
|
request_id = payload["meta"].get("request_id", "")
|
|
if request_id in wanted_request_ids:
|
|
payloads[request_id] = payload
|
|
if len(payloads) >= len(wanted_request_ids):
|
|
break
|
|
return payloads
|
|
|
|
|
|
def write_representative_pair_raw_messages(
|
|
source_path,
|
|
representative_pairs_path,
|
|
output_path,
|
|
):
|
|
source_path = Path(source_path)
|
|
representative_pairs_path = Path(representative_pairs_path)
|
|
output_path = Path(output_path)
|
|
|
|
with open(representative_pairs_path, "r", encoding="utf-8") as handle:
|
|
representative_pairs = json.load(handle)
|
|
|
|
pairs = representative_pairs.get("pairs", [])
|
|
request_to_pair_refs = defaultdict(list)
|
|
for pair in pairs:
|
|
prev_request_id = pair.get("prev_request_id", "")
|
|
next_request_id = pair.get("next_request_id", "")
|
|
if prev_request_id:
|
|
request_to_pair_refs[prev_request_id].append(("prev", pair))
|
|
if next_request_id:
|
|
request_to_pair_refs[next_request_id].append(("next", pair))
|
|
|
|
request_payloads = _load_source_payloads(source_path, request_to_pair_refs.keys())
|
|
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(output_path, "w", encoding="utf-8") as handle:
|
|
for pair in pairs:
|
|
prev_payload = request_payloads.get(pair.get("prev_request_id", ""), {})
|
|
next_payload = request_payloads.get(pair.get("next_request_id", ""), {})
|
|
row = {
|
|
"category": pair.get("category", ""),
|
|
"session_id": pair.get("session_id", ""),
|
|
"trigger_group": pair.get("trigger_group", ""),
|
|
"trigger_detail": pair.get("trigger_detail", ""),
|
|
"source_scope": pair.get("source_scope", ""),
|
|
"source_gap_ms": pair.get("source_gap_ms", 0),
|
|
"gap_from_prev_ready_ms": pair.get("gap_from_prev_ready_ms", 0),
|
|
"gap_from_prev_end_ms": pair.get("gap_from_prev_end_ms", 0),
|
|
"prev_request_id": pair.get("prev_request_id", ""),
|
|
"next_request_id": pair.get("next_request_id", ""),
|
|
"provider": {
|
|
"prev_input_tokens": pair.get("prev_input_tokens", 0),
|
|
"next_input_tokens": pair.get("next_input_tokens", 0),
|
|
"delta_input_tokens": pair.get("delta_input_tokens", 0),
|
|
"delta_input_fraction_of_current": pair.get("delta_input_fraction_of_current", 0.0),
|
|
},
|
|
"retokenized": {
|
|
"prev_prompt_tokens": pair.get("prev_retokenized_prompt_tokens", 0),
|
|
"next_prompt_tokens": pair.get("next_retokenized_prompt_tokens", 0),
|
|
"delta_prompt_tokens": pair.get("delta_retokenized_prompt_tokens", 0),
|
|
"delta_fraction_of_current": pair.get("delta_retokenized_fraction_of_current", 0.0),
|
|
},
|
|
"prev": {
|
|
"meta": prev_payload.get("meta", {}),
|
|
"usage": prev_payload.get("usage", {}),
|
|
"role_sequence": prev_payload.get("role_sequence", []),
|
|
"messages": prev_payload.get("messages", []),
|
|
"declared_tools": prev_payload.get("declared_tools", []),
|
|
},
|
|
"next": {
|
|
"meta": next_payload.get("meta", {}),
|
|
"usage": next_payload.get("usage", {}),
|
|
"role_sequence": next_payload.get("role_sequence", []),
|
|
"messages": next_payload.get("messages", []),
|
|
"declared_tools": next_payload.get("declared_tools", []),
|
|
},
|
|
}
|
|
handle.write(json.dumps(row, ensure_ascii=False) + "\n")
|
|
return output_path
|
|
|
|
|
|
def build_context_change_markdown_section(context_change_summary):
|
|
if not context_change_summary:
|
|
return ""
|
|
lines = [
|
|
"## Context Change Deep Dive",
|
|
"- This section focuses on why `input_tokens` can shrink between two consecutive requests in the same session.",
|
|
"- We join provider-length transitions with retokenized transitions to separate true prompt shrink from provider-only accounting / serialization shrink.",
|
|
"- Important caveat: `common_prefix_message_count` is based on message-role alignment, not exact message-content equality. The representative cases below therefore explain structural change at the message-summary level (`role`, `text_len`, `item_count`).",
|
|
f"- Negative provider-length transitions: {context_change_summary['negative_provider_transition_count']} "
|
|
f"({context_change_summary['negative_provider_fraction_of_all_transitions']:.4f} of all transitions)",
|
|
f"- Sign agreement on those negative provider transitions: {json.dumps(context_change_summary['agreement_stats'], ensure_ascii=False)}",
|
|
f"- Representative cases: `{DETAILS_DIR_NAME}/context_change_casebook.md`",
|
|
"",
|
|
"Generated figures:",
|
|
f"- ",
|
|
f"- ",
|
|
"",
|
|
"Mechanism summary:",
|
|
"| category | count | fraction_of_negative_provider | both_shrink_fraction | provider_only_shrink_fraction | p50_provider_delta_fraction_of_current | p50_retokenized_delta_fraction_of_current | p50_prev_message_count | p50_next_message_count | p50_common_prefix_message_count | p50_prev_tool_msg_count | p50_next_tool_msg_count |",
|
|
"| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |",
|
|
]
|
|
for row in context_change_summary["category_rows"]:
|
|
lines.append(
|
|
f"| {row['category']} | {row['count']} | {row['fraction_of_negative_provider']:.4f} | "
|
|
f"{row['both_shrink_fraction']:.4f} | {row['provider_only_shrink_fraction']:.4f} | "
|
|
f"{row['p50_provider_delta_fraction_of_current']:.4f} | "
|
|
f"{row['p50_retokenized_delta_fraction_of_current']:.4f} | "
|
|
f"{row['p50_prev_message_count']:.1f} | {row['p50_next_message_count']:.1f} | "
|
|
f"{row['p50_common_prefix_message_count']:.1f} | "
|
|
f"{row['p50_prev_tool_msg_count']:.1f} | {row['p50_next_tool_msg_count']:.1f} |"
|
|
)
|
|
return "\n".join(lines)
|
|
|
|
|
|
def write_context_change_deep_dive_from_existing(source_path, request_metrics_path, advanced_dir):
|
|
advanced_dir = ensure_output_dir(Path(advanced_dir))
|
|
provider_edges_path = advanced_dir / "session_transition_edges.csv"
|
|
retokenized_edges_path = advanced_dir / "session_transition_retokenized_edges.csv"
|
|
request_lookup = _load_request_metric_lookup(request_metrics_path)
|
|
retokenized_lookup = _load_retokenized_transition_lookup(retokenized_edges_path)
|
|
|
|
negative_rows = []
|
|
category_values = defaultdict(
|
|
lambda: {
|
|
"provider_delta_fraction_of_current": [],
|
|
"retokenized_delta_fraction_of_current": [],
|
|
"prev_message_count": [],
|
|
"next_message_count": [],
|
|
"common_prefix_message_count": [],
|
|
"prev_tool_msg_count": [],
|
|
"next_tool_msg_count": [],
|
|
"both_shrink_count": 0,
|
|
"provider_only_shrink_count": 0,
|
|
}
|
|
)
|
|
agreement_counter = Counter()
|
|
transition_count = 0
|
|
|
|
with open(provider_edges_path, "r", encoding="utf-8") as handle:
|
|
for row in csv.DictReader(handle):
|
|
transition_count += 1
|
|
provider_delta_fraction = safe_float(row.get("delta_input_fraction_of_current"))
|
|
if provider_delta_fraction >= 0:
|
|
continue
|
|
|
|
prev_request_id = row.get("prev_request_id", "")
|
|
next_request_id = row.get("next_request_id", "")
|
|
retokenized = retokenized_lookup.get((prev_request_id, next_request_id), {})
|
|
retokenized_delta_fraction = safe_float(retokenized.get("delta_retokenized_fraction_of_current"))
|
|
retokenized_negative = retokenized_delta_fraction < 0
|
|
category = _classify_context_change_mechanism(
|
|
{
|
|
"trigger_group": row.get("trigger_group", ""),
|
|
"prev_message_count": safe_int(row.get("prev_message_count")),
|
|
"next_message_count": safe_int(row.get("next_message_count")),
|
|
"common_prefix_message_count": safe_int(row.get("common_prefix_message_count")),
|
|
"appended_message_count": safe_int(row.get("appended_message_count")),
|
|
}
|
|
)
|
|
|
|
prev_metrics = request_lookup.get(prev_request_id, {})
|
|
next_metrics = request_lookup.get(next_request_id, {})
|
|
joined = {
|
|
"session_id": row.get("session_id", ""),
|
|
"prev_request_id": prev_request_id,
|
|
"next_request_id": next_request_id,
|
|
"trigger_group": row.get("trigger_group", ""),
|
|
"trigger_detail": row.get("trigger_detail", ""),
|
|
"source_scope": row.get("source_scope", ""),
|
|
"source_gap_ms": safe_int(row.get("source_gap_ms")),
|
|
"gap_from_prev_ready_ms": safe_int(row.get("gap_from_prev_ready_ms")),
|
|
"gap_from_prev_end_ms": safe_int(row.get("gap_from_prev_end_ms")),
|
|
"prev_input_tokens": safe_int(row.get("prev_input_tokens")),
|
|
"next_input_tokens": safe_int(row.get("next_input_tokens")),
|
|
"delta_input_tokens": safe_int(row.get("delta_input_tokens")),
|
|
"delta_input_fraction_of_current": provider_delta_fraction,
|
|
"next_cache_hit_ratio": safe_float(row.get("next_cache_hit_ratio")),
|
|
"uncached_fraction_of_current": safe_float(row.get("uncached_fraction_of_current")),
|
|
"prev_message_count": safe_int(row.get("prev_message_count")),
|
|
"next_message_count": safe_int(row.get("next_message_count")),
|
|
"common_prefix_message_count": safe_int(row.get("common_prefix_message_count")),
|
|
"appended_message_count": safe_int(row.get("appended_message_count")),
|
|
"appended_roles": row.get("appended_roles", ""),
|
|
"category": category,
|
|
"prev_tool_msg_count": prev_metrics.get("tool_msg_count", 0),
|
|
"next_tool_msg_count": next_metrics.get("tool_msg_count", 0),
|
|
"prev_assistant_msg_count": prev_metrics.get("assistant_msg_count", 0),
|
|
"next_assistant_msg_count": next_metrics.get("assistant_msg_count", 0),
|
|
"prev_user_msg_count": prev_metrics.get("user_msg_count", 0),
|
|
"next_user_msg_count": next_metrics.get("user_msg_count", 0),
|
|
"prev_system_msg_count": prev_metrics.get("system_msg_count", 0),
|
|
"next_system_msg_count": next_metrics.get("system_msg_count", 0),
|
|
"prev_retokenized_prompt_tokens": retokenized.get("prev_retokenized_prompt_tokens", 0),
|
|
"next_retokenized_prompt_tokens": retokenized.get("next_retokenized_prompt_tokens", 0),
|
|
"delta_retokenized_prompt_tokens": retokenized.get("delta_retokenized_prompt_tokens", 0),
|
|
"delta_retokenized_fraction_of_current": retokenized_delta_fraction,
|
|
"retokenized_negative": 1 if retokenized_negative else 0,
|
|
}
|
|
negative_rows.append(joined)
|
|
|
|
bucket = "both_shrink" if retokenized_negative else "provider_only_shrink"
|
|
agreement_counter[bucket] += 1
|
|
|
|
values = category_values[category]
|
|
values["provider_delta_fraction_of_current"].append(provider_delta_fraction)
|
|
values["retokenized_delta_fraction_of_current"].append(retokenized_delta_fraction)
|
|
values["prev_message_count"].append(joined["prev_message_count"])
|
|
values["next_message_count"].append(joined["next_message_count"])
|
|
values["common_prefix_message_count"].append(joined["common_prefix_message_count"])
|
|
values["prev_tool_msg_count"].append(joined["prev_tool_msg_count"])
|
|
values["next_tool_msg_count"].append(joined["next_tool_msg_count"])
|
|
if retokenized_negative:
|
|
values["both_shrink_count"] += 1
|
|
else:
|
|
values["provider_only_shrink_count"] += 1
|
|
|
|
category_rows = []
|
|
for category, values in sorted(
|
|
category_values.items(),
|
|
key=lambda item: len(item[1]["provider_delta_fraction_of_current"]),
|
|
reverse=True,
|
|
):
|
|
count = len(values["provider_delta_fraction_of_current"])
|
|
category_rows.append(
|
|
{
|
|
"category": category,
|
|
"count": count,
|
|
"fraction_of_negative_provider": safe_div(count, len(negative_rows)),
|
|
"both_shrink_fraction": safe_div(values["both_shrink_count"], count),
|
|
"provider_only_shrink_fraction": safe_div(values["provider_only_shrink_count"], count),
|
|
"p50_provider_delta_fraction_of_current": percentile(
|
|
values["provider_delta_fraction_of_current"], 0.50
|
|
),
|
|
"p50_retokenized_delta_fraction_of_current": percentile(
|
|
values["retokenized_delta_fraction_of_current"], 0.50
|
|
),
|
|
"p50_prev_message_count": percentile(values["prev_message_count"], 0.50),
|
|
"p50_next_message_count": percentile(values["next_message_count"], 0.50),
|
|
"p50_common_prefix_message_count": percentile(
|
|
values["common_prefix_message_count"], 0.50
|
|
),
|
|
"p50_prev_tool_msg_count": percentile(values["prev_tool_msg_count"], 0.50),
|
|
"p50_next_tool_msg_count": percentile(values["next_tool_msg_count"], 0.50),
|
|
}
|
|
)
|
|
|
|
write_csv(advanced_dir / "context_change_mechanism_summary.csv", category_rows)
|
|
plot_bar_chart(
|
|
advanced_dir / "context_change_mechanism_counts.png",
|
|
category_rows,
|
|
label_key="category",
|
|
value_key="count",
|
|
title="Negative Provider-Length Transition Mechanisms",
|
|
xlabel="Transition count",
|
|
ylabel="Mechanism",
|
|
top_n=min(12, len(category_rows)),
|
|
)
|
|
plot_grouped_bar_chart(
|
|
advanced_dir / "context_change_sign_agreement.png",
|
|
category_rows[: min(12, len(category_rows))],
|
|
label_key="category",
|
|
series_keys=[
|
|
("both_shrink_fraction", "both_shrink_fraction"),
|
|
("provider_only_shrink_fraction", "provider_only_shrink_fraction"),
|
|
],
|
|
title="Provider vs Retokenized Shrink Agreement by Mechanism",
|
|
xlabel="Mechanism",
|
|
ylabel="Fraction of negative provider transitions",
|
|
)
|
|
|
|
chosen_rows = {}
|
|
for category in [row["category"] for row in category_rows]:
|
|
matched = [row for row in negative_rows if row["category"] == category]
|
|
if not matched:
|
|
continue
|
|
median_delta = percentile(
|
|
[row["delta_input_fraction_of_current"] for row in matched],
|
|
0.50,
|
|
)
|
|
chosen_rows[category] = min(
|
|
matched,
|
|
key=lambda row: abs(row["delta_input_fraction_of_current"] - median_delta),
|
|
)
|
|
|
|
wanted_request_ids = set()
|
|
for row in chosen_rows.values():
|
|
wanted_request_ids.add(row["prev_request_id"])
|
|
wanted_request_ids.add(row["next_request_id"])
|
|
|
|
request_messages = {
|
|
request_id: payload.get("messages", [])
|
|
for request_id, payload in _load_source_payloads(source_path, wanted_request_ids).items()
|
|
}
|
|
|
|
casebook_lines = [
|
|
"# Context Change Deep Dive",
|
|
"",
|
|
"This file explains why provider-length context shrinks happen between consecutive requests inside the same session.",
|
|
"",
|
|
"- Scope: only transitions with `delta_input_fraction_of_current < 0` under provider `usage.input_tokens`.",
|
|
"- Comparison: each transition is joined with retokenized prompt length to separate structural shrink from provider-only shrink.",
|
|
"- Caveat: the prefix comparison is role-based, not content-hash based, so the case explanations are structural summaries rather than exact diff hunks.",
|
|
"",
|
|
"## Mechanism Summary",
|
|
"| category | count | fraction_of_negative_provider | both_shrink_fraction | provider_only_shrink_fraction | p50_provider_delta_fraction_of_current | p50_retokenized_delta_fraction_of_current | p50_prev_message_count | p50_next_message_count | p50_common_prefix_message_count |",
|
|
"| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |",
|
|
]
|
|
for row in category_rows:
|
|
casebook_lines.append(
|
|
f"| {row['category']} | {row['count']} | {row['fraction_of_negative_provider']:.4f} | "
|
|
f"{row['both_shrink_fraction']:.4f} | {row['provider_only_shrink_fraction']:.4f} | "
|
|
f"{row['p50_provider_delta_fraction_of_current']:.4f} | "
|
|
f"{row['p50_retokenized_delta_fraction_of_current']:.4f} | "
|
|
f"{row['p50_prev_message_count']:.1f} | {row['p50_next_message_count']:.1f} | "
|
|
f"{row['p50_common_prefix_message_count']:.1f} |"
|
|
)
|
|
|
|
for row in category_rows:
|
|
category = row["category"]
|
|
chosen = chosen_rows.get(category)
|
|
if not chosen:
|
|
continue
|
|
prev_messages = request_messages.get(chosen["prev_request_id"], [])
|
|
next_messages = request_messages.get(chosen["next_request_id"], [])
|
|
common_prefix = min(chosen["common_prefix_message_count"], len(prev_messages), len(next_messages))
|
|
prev_prefix_summary = _message_slice_summary(prev_messages[:common_prefix], 0)
|
|
removed_summary = _message_slice_summary(prev_messages[common_prefix:], common_prefix)
|
|
added_summary = _message_slice_summary(next_messages[common_prefix:], common_prefix)
|
|
prev_full_summary = _message_slice_summary(prev_messages, 0)
|
|
next_full_summary = _message_slice_summary(next_messages, 0)
|
|
|
|
casebook_lines.extend(
|
|
[
|
|
"",
|
|
f"## {category}",
|
|
f"- Pair: `{chosen['prev_request_id']}` -> `{chosen['next_request_id']}` in session `{chosen['session_id']}`",
|
|
f"- Trigger: `{chosen['trigger_group']}` / `{chosen['trigger_detail']}`; source scope: `{chosen['source_scope']}`; source gap: `{chosen['source_gap_ms']} ms`",
|
|
f"- Provider tokens: `{chosen['prev_input_tokens']}` -> `{chosen['next_input_tokens']}` (`{chosen['delta_input_tokens']}`), signed delta/current=`{chosen['delta_input_fraction_of_current']:.4f}`",
|
|
f"- Retokenized tokens: `{chosen['prev_retokenized_prompt_tokens']}` -> `{chosen['next_retokenized_prompt_tokens']}` (`{chosen['delta_retokenized_prompt_tokens']}`), signed delta/current=`{chosen['delta_retokenized_fraction_of_current']:.4f}`",
|
|
f"- Message counts: `{chosen['prev_message_count']}` -> `{chosen['next_message_count']}`, role-prefix-aligned common prefix=`{chosen['common_prefix_message_count']}`, appended count=`{chosen['appended_message_count']}`",
|
|
f"- Cache on next request: hit=`{chosen['next_cache_hit_ratio']:.4f}`, uncached/current=`{chosen['uncached_fraction_of_current']:.4f}`, gap ready->ready=`{chosen['gap_from_prev_ready_ms']} ms`, end->ready=`{chosen['gap_from_prev_end_ms']} ms`",
|
|
f"- Prev role counts: `{_format_counter(prev_full_summary['role_counts'])}`; next role counts: `{_format_counter(next_full_summary['role_counts'])}`",
|
|
f"- Prev role text lens: `{_format_counter(prev_full_summary['role_text_lens'])}`; next role text lens: `{_format_counter(next_full_summary['role_text_lens'])}`",
|
|
f"- Prefix kept: `{prev_prefix_summary['count']}` msgs, total_text_len=`{prev_prefix_summary['total_text_len']}`, roles=`{_format_counter(prev_prefix_summary['role_counts'])}`",
|
|
f"- Removed tail from previous prompt: `{removed_summary['count']}` msgs, total_text_len=`{removed_summary['total_text_len']}`, roles=`{_format_counter(removed_summary['role_counts'])}`",
|
|
f"- Added tail in next prompt: `{added_summary['count']}` msgs, total_text_len=`{added_summary['total_text_len']}`, roles=`{_format_counter(added_summary['role_counts'])}`",
|
|
f"- Largest removed messages: `{_format_top_messages(removed_summary['top_messages'])}`",
|
|
f"- Largest added messages: `{_format_top_messages(added_summary['top_messages'])}`",
|
|
]
|
|
)
|
|
|
|
casebook_path = advanced_dir / "context_change_casebook.md"
|
|
casebook_path.write_text("\n".join(casebook_lines) + "\n", encoding="utf-8")
|
|
representative_pairs = {
|
|
"pair_count": len(chosen_rows),
|
|
"pairs": [
|
|
{
|
|
"category": category,
|
|
**chosen_rows[category],
|
|
}
|
|
for category in [row["category"] for row in category_rows]
|
|
if category in chosen_rows
|
|
],
|
|
}
|
|
representative_pairs_path = advanced_dir / "context_change_representative_pairs.json"
|
|
representative_pairs_path.write_text(
|
|
json.dumps(representative_pairs, ensure_ascii=False, indent=2),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
summary = {
|
|
"negative_provider_transition_count": len(negative_rows),
|
|
"negative_provider_fraction_of_all_transitions": safe_div(len(negative_rows), transition_count),
|
|
"agreement_stats": {
|
|
"both_shrink_fraction": safe_div(agreement_counter["both_shrink"], len(negative_rows)),
|
|
"provider_only_shrink_fraction": safe_div(
|
|
agreement_counter["provider_only_shrink"], len(negative_rows)
|
|
),
|
|
},
|
|
"category_rows": category_rows,
|
|
"casebook_path": str(casebook_path),
|
|
"representative_pairs_path": str(representative_pairs_path),
|
|
}
|
|
summary_path = advanced_dir / "context_change_summary.json"
|
|
with open(summary_path, "w", encoding="utf-8") as handle:
|
|
json.dump(summary, handle, ensure_ascii=False, indent=2)
|
|
return summary, summary_path, casebook_path, representative_pairs_path
|
|
|
|
|
|
def write_transition_outputs_from_existing(source_path, request_metrics_path, advanced_dir):
|
|
advanced_dir = ensure_output_dir(advanced_dir)
|
|
transition_edges_path = advanced_dir / "session_transition_edges.csv"
|
|
|
|
request_lookup = {}
|
|
session_last = {}
|
|
trigger_group_counter = Counter()
|
|
source_scope_counter = Counter()
|
|
negative_delta_count = 0
|
|
negative_delta_by_trigger = Counter()
|
|
negative_delta_by_source_scope = Counter()
|
|
trigger_group_values = defaultdict(
|
|
lambda: {
|
|
"delta_fraction_of_current": [],
|
|
"abs_delta_fraction_of_current": [],
|
|
"uncached_fraction_of_current": [],
|
|
}
|
|
)
|
|
source_scope_gaps = defaultdict(list)
|
|
delta_fraction_current_values = []
|
|
abs_delta_fraction_current_values = []
|
|
uncached_fraction_current_values = []
|
|
source_gap_ms_values = []
|
|
|
|
with open(request_metrics_path, "r", encoding="utf-8") as metrics_handle, open(
|
|
transition_edges_path, "w", encoding="utf-8", newline=""
|
|
) as output_handle:
|
|
metrics_reader = csv.DictReader(metrics_handle)
|
|
normalized_iter = _iter_source_message_minimal(source_path)
|
|
writer = None
|
|
|
|
for normalized_row, metric_row in zip(normalized_iter, metrics_reader):
|
|
if normalized_row["request_id"] != metric_row.get("request_id", ""):
|
|
raise ValueError(
|
|
f"request order mismatch between source trace and request metrics: "
|
|
f"{normalized_row['request_id']} != {metric_row.get('request_id', '')}"
|
|
)
|
|
|
|
current = {
|
|
"request_id": metric_row["request_id"],
|
|
"session_id": metric_row["session_id"],
|
|
"request_ready_time_ms": safe_int(metric_row.get("request_ready_time_ms")),
|
|
"request_end_time_ms": safe_int(metric_row.get("request_end_time_ms")),
|
|
"input_tokens": safe_int(metric_row.get("input_tokens")),
|
|
"uncached_prompt_tokens": safe_int(metric_row.get("uncached_prompt_tokens")),
|
|
"cache_hit_ratio": safe_float(metric_row.get("cache_hit_ratio")),
|
|
"theoretical_source_request_id": metric_row.get("theoretical_source_request_id", ""),
|
|
"messages": normalized_row["messages"],
|
|
}
|
|
|
|
request_lookup[current["request_id"]] = {
|
|
"session_id": current["session_id"],
|
|
"request_ready_time_ms": current["request_ready_time_ms"],
|
|
}
|
|
|
|
previous = session_last.get(current["session_id"])
|
|
if previous is not None:
|
|
transition = _classify_transition(previous["messages"], current["messages"])
|
|
delta_input_tokens = current["input_tokens"] - previous["input_tokens"]
|
|
abs_delta_fraction_of_current = safe_div(abs(delta_input_tokens), current["input_tokens"])
|
|
delta_fraction_of_prev = safe_div(delta_input_tokens, previous["input_tokens"])
|
|
delta_fraction_of_current = safe_div(delta_input_tokens, current["input_tokens"])
|
|
uncached_fraction_of_current = safe_div(current["uncached_prompt_tokens"], current["input_tokens"])
|
|
|
|
source_request_id = current["theoretical_source_request_id"]
|
|
source_info = request_lookup.get(source_request_id)
|
|
source_gap_ms = 0
|
|
if not source_request_id:
|
|
source_scope = "none"
|
|
elif source_request_id == previous["request_id"]:
|
|
source_scope = "prev_round"
|
|
source_gap_ms = max(current["request_ready_time_ms"] - previous["request_ready_time_ms"], 0)
|
|
elif source_info is None:
|
|
source_scope = "unknown"
|
|
elif source_info["session_id"] == current["session_id"]:
|
|
source_scope = "same_session_earlier"
|
|
source_gap_ms = max(current["request_ready_time_ms"] - source_info["request_ready_time_ms"], 0)
|
|
else:
|
|
source_scope = "cross_session"
|
|
source_gap_ms = max(current["request_ready_time_ms"] - source_info["request_ready_time_ms"], 0)
|
|
|
|
row = {
|
|
"session_id": current["session_id"],
|
|
"prev_request_id": previous["request_id"],
|
|
"next_request_id": current["request_id"],
|
|
"prev_request_ready_time_ms": previous["request_ready_time_ms"],
|
|
"prev_request_end_time_ms": previous["request_end_time_ms"],
|
|
"next_request_ready_time_ms": current["request_ready_time_ms"],
|
|
"gap_from_prev_ready_ms": max(current["request_ready_time_ms"] - previous["request_ready_time_ms"], 0),
|
|
"gap_from_prev_end_ms": max(current["request_ready_time_ms"] - previous["request_end_time_ms"], 0),
|
|
"prev_input_tokens": previous["input_tokens"],
|
|
"next_input_tokens": current["input_tokens"],
|
|
"delta_input_tokens": delta_input_tokens,
|
|
"delta_input_fraction_of_prev": delta_fraction_of_prev,
|
|
"delta_input_fraction_of_current": delta_fraction_of_current,
|
|
"abs_delta_input_fraction_of_current": abs_delta_fraction_of_current,
|
|
"next_uncached_prompt_tokens": current["uncached_prompt_tokens"],
|
|
"uncached_fraction_of_current": uncached_fraction_of_current,
|
|
"next_cache_hit_ratio": current["cache_hit_ratio"],
|
|
"prev_message_count": len(previous["messages"]),
|
|
"next_message_count": len(current["messages"]),
|
|
**transition,
|
|
"theoretical_source_request_id": source_request_id,
|
|
"source_scope": source_scope,
|
|
"source_gap_ms": source_gap_ms,
|
|
}
|
|
|
|
if writer is None:
|
|
writer = csv.DictWriter(output_handle, fieldnames=list(row.keys()))
|
|
writer.writeheader()
|
|
writer.writerow(row)
|
|
|
|
trigger_group_counter[row["trigger_group"]] += 1
|
|
source_scope_counter[source_scope] += 1
|
|
if delta_fraction_of_current < 0:
|
|
negative_delta_count += 1
|
|
negative_delta_by_trigger[row["trigger_group"]] += 1
|
|
negative_delta_by_source_scope[source_scope] += 1
|
|
trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"].append(delta_fraction_of_current)
|
|
trigger_group_values[row["trigger_group"]]["abs_delta_fraction_of_current"].append(abs_delta_fraction_of_current)
|
|
trigger_group_values[row["trigger_group"]]["uncached_fraction_of_current"].append(uncached_fraction_of_current)
|
|
source_scope_gaps[source_scope].append(source_gap_ms)
|
|
delta_fraction_current_values.append(delta_fraction_of_current)
|
|
abs_delta_fraction_current_values.append(abs_delta_fraction_of_current)
|
|
uncached_fraction_current_values.append(uncached_fraction_of_current)
|
|
if source_scope != "none":
|
|
source_gap_ms_values.append(source_gap_ms)
|
|
|
|
session_last[current["session_id"]] = current
|
|
|
|
transition_count = sum(trigger_group_counter.values())
|
|
trigger_group_rows = []
|
|
for trigger_group, count in trigger_group_counter.most_common():
|
|
abs_delta_values = trigger_group_values[trigger_group]["abs_delta_fraction_of_current"]
|
|
uncached_values = trigger_group_values[trigger_group]["uncached_fraction_of_current"]
|
|
trigger_group_rows.append(
|
|
{
|
|
"trigger_group": trigger_group,
|
|
"count": count,
|
|
"fraction": safe_div(count, transition_count),
|
|
"negative_delta_count": negative_delta_by_trigger[trigger_group],
|
|
"negative_delta_fraction": safe_div(negative_delta_by_trigger[trigger_group], count),
|
|
"p50_abs_delta_fraction_of_current": percentile(abs_delta_values, 0.5) if abs_delta_values else 0.0,
|
|
"p90_abs_delta_fraction_of_current": percentile(abs_delta_values, 0.9) if abs_delta_values else 0.0,
|
|
"p50_uncached_fraction_of_current": percentile(uncached_values, 0.5) if uncached_values else 0.0,
|
|
"p90_uncached_fraction_of_current": percentile(uncached_values, 0.9) if uncached_values else 0.0,
|
|
}
|
|
)
|
|
source_scope_rows = []
|
|
for source_scope, count in source_scope_counter.most_common():
|
|
gap_values = [value for value in source_scope_gaps[source_scope] if value is not None]
|
|
source_scope_rows.append(
|
|
{
|
|
"source_scope": source_scope,
|
|
"count": count,
|
|
"fraction": safe_div(count, transition_count),
|
|
"negative_delta_count": negative_delta_by_source_scope[source_scope],
|
|
"negative_delta_fraction": safe_div(negative_delta_by_source_scope[source_scope], count),
|
|
"p50_source_gap_ms": percentile(gap_values, 0.5) if gap_values else 0.0,
|
|
"p90_source_gap_ms": percentile(gap_values, 0.9) if gap_values else 0.0,
|
|
}
|
|
)
|
|
|
|
write_csv(advanced_dir / "transition_trigger_groups.csv", trigger_group_rows)
|
|
write_csv(advanced_dir / "transition_source_scope.csv", source_scope_rows)
|
|
write_csv(
|
|
advanced_dir / "cdf_transition_delta_fraction_of_current.csv",
|
|
build_cdf(delta_fraction_current_values),
|
|
)
|
|
write_csv(
|
|
advanced_dir / "cdf_transition_source_gap_ms.csv",
|
|
build_cdf(source_gap_ms_values),
|
|
)
|
|
write_csv(
|
|
advanced_dir / "cdf_transition_abs_delta_fraction_of_current.csv",
|
|
build_cdf(abs_delta_fraction_current_values),
|
|
)
|
|
write_csv(
|
|
advanced_dir / "cdf_transition_delta_fraction_by_trigger.csv",
|
|
build_grouped_cdf_rows(
|
|
{
|
|
row["trigger_group"]: trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"]
|
|
for row in trigger_group_rows
|
|
},
|
|
"trigger_group",
|
|
),
|
|
)
|
|
write_csv(
|
|
advanced_dir / "cdf_transition_abs_delta_fraction_by_trigger.csv",
|
|
build_grouped_cdf_rows(
|
|
{
|
|
row["trigger_group"]: trigger_group_values[row["trigger_group"]]["abs_delta_fraction_of_current"]
|
|
for row in trigger_group_rows
|
|
},
|
|
"trigger_group",
|
|
),
|
|
)
|
|
write_csv(
|
|
advanced_dir / "cdf_transition_uncached_fraction_by_trigger.csv",
|
|
build_grouped_cdf_rows(
|
|
{
|
|
row["trigger_group"]: trigger_group_values[row["trigger_group"]]["uncached_fraction_of_current"]
|
|
for row in trigger_group_rows
|
|
},
|
|
"trigger_group",
|
|
),
|
|
)
|
|
write_csv(
|
|
advanced_dir / "cdf_transition_source_gap_ms_by_scope.csv",
|
|
build_grouped_cdf_rows(
|
|
{
|
|
row["source_scope"]: source_scope_gaps[row["source_scope"]]
|
|
for row in source_scope_rows
|
|
if row["source_scope"] not in {"none"} and source_scope_gaps[row["source_scope"]]
|
|
},
|
|
"source_scope",
|
|
),
|
|
)
|
|
|
|
plot_bar_chart(
|
|
advanced_dir / "transition_trigger_groups.png",
|
|
trigger_group_rows,
|
|
label_key="trigger_group",
|
|
value_key="count",
|
|
title="Session Transition Trigger Groups",
|
|
xlabel="Transition count",
|
|
ylabel="Trigger",
|
|
top_n=min(12, len(trigger_group_rows)),
|
|
)
|
|
plot_grouped_bar_chart(
|
|
advanced_dir / "transition_context_change_by_trigger.png",
|
|
trigger_group_rows[: min(8, len(trigger_group_rows))],
|
|
label_key="trigger_group",
|
|
series_keys=[
|
|
("p50_abs_delta_fraction_of_current", "p50_abs_delta/current"),
|
|
("p90_abs_delta_fraction_of_current", "p90_abs_delta/current"),
|
|
("p50_uncached_fraction_of_current", "p50_uncached/current"),
|
|
],
|
|
title="Context Change by Trigger Group",
|
|
xlabel="Trigger group",
|
|
ylabel="Fraction",
|
|
)
|
|
plot_fraction_bar_chart(
|
|
advanced_dir / "transition_source_scope.png",
|
|
source_scope_rows,
|
|
label_key="source_scope",
|
|
value_key="fraction",
|
|
title="KV-Cache Source Scope by Transition",
|
|
)
|
|
plot_cdf_series(
|
|
advanced_dir / "cdf_transition_delta_fraction_by_trigger.png",
|
|
[
|
|
(
|
|
row["trigger_group"],
|
|
build_cdf(trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"]),
|
|
)
|
|
for row in trigger_group_rows
|
|
if trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"]
|
|
],
|
|
title="CDF of Signed Context Delta by Trigger Group",
|
|
xlabel="(next_input_tokens - prev_input_tokens) / next_input_tokens",
|
|
)
|
|
plot_cdf_series_with_zoom_windows(
|
|
advanced_dir / "cdf_transition_delta_fraction_by_trigger_zoom.png",
|
|
[
|
|
(
|
|
row["trigger_group"],
|
|
build_cdf(trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"]),
|
|
)
|
|
for row in trigger_group_rows
|
|
if trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"]
|
|
],
|
|
title="CDF of Signed Context Delta by Trigger Group with Central 80% / 90% Zoom",
|
|
xlabel="(next_input_tokens - prev_input_tokens) / next_input_tokens",
|
|
zoom_windows=[
|
|
(0.10, 0.90, "Central 80% (p10-p90)"),
|
|
(0.05, 0.95, "Central 90% (p05-p95)"),
|
|
],
|
|
)
|
|
plot_cdf_series(
|
|
advanced_dir / "cdf_transition_abs_delta_fraction_by_trigger.png",
|
|
[
|
|
(
|
|
row["trigger_group"],
|
|
build_cdf(trigger_group_values[row["trigger_group"]]["abs_delta_fraction_of_current"]),
|
|
)
|
|
for row in trigger_group_rows
|
|
if trigger_group_values[row["trigger_group"]]["abs_delta_fraction_of_current"]
|
|
],
|
|
title="CDF of Absolute Context Delta by Trigger Group",
|
|
xlabel="abs(next_input_tokens - prev_input_tokens) / next_input_tokens",
|
|
)
|
|
plot_cdf_series(
|
|
advanced_dir / "cdf_transition_uncached_fraction_by_trigger.png",
|
|
[
|
|
(
|
|
row["trigger_group"],
|
|
build_cdf(trigger_group_values[row["trigger_group"]]["uncached_fraction_of_current"]),
|
|
)
|
|
for row in trigger_group_rows
|
|
if trigger_group_values[row["trigger_group"]]["uncached_fraction_of_current"]
|
|
],
|
|
title="CDF of Uncached Prompt Fraction by Trigger Group",
|
|
xlabel="uncached_prompt_tokens / next_input_tokens",
|
|
)
|
|
plot_cdf_series(
|
|
advanced_dir / "cdf_transition_source_gap_ms_by_scope.png",
|
|
[
|
|
(row["source_scope"], build_cdf(source_scope_gaps[row["source_scope"]]))
|
|
for row in source_scope_rows
|
|
if row["source_scope"] not in {"none"} and source_scope_gaps[row["source_scope"]]
|
|
],
|
|
title="CDF of KV-Cache Source Gap by Scope",
|
|
xlabel="Milliseconds",
|
|
)
|
|
|
|
summary = {
|
|
"transition_count": transition_count,
|
|
"negative_delta_count": negative_delta_count,
|
|
"negative_delta_fraction": safe_div(negative_delta_count, transition_count),
|
|
"trigger_group_stats": {row["trigger_group"]: row["fraction"] for row in trigger_group_rows},
|
|
"source_scope_stats": {row["source_scope"]: row["fraction"] for row in source_scope_rows},
|
|
"delta_fraction_current_stats": series_stats(delta_fraction_current_values),
|
|
"abs_delta_fraction_current_stats": series_stats(abs_delta_fraction_current_values),
|
|
"uncached_fraction_current_stats": series_stats(uncached_fraction_current_values),
|
|
"source_gap_ms_stats": series_stats(source_gap_ms_values),
|
|
"trigger_group_rows": trigger_group_rows,
|
|
"source_scope_rows": source_scope_rows,
|
|
}
|
|
summary_path = advanced_dir / "transition_patterns_summary.json"
|
|
with open(summary_path, "w", encoding="utf-8") as handle:
|
|
json.dump(summary, handle, ensure_ascii=False, indent=2)
|
|
return summary, summary_path, transition_edges_path
|
|
|
|
|
|
def write_retokenized_transition_outputs_from_existing(source_path, request_metrics_path, advanced_dir):
|
|
advanced_dir = ensure_output_dir(advanced_dir)
|
|
transition_edges_path = advanced_dir / "session_transition_retokenized_edges.csv"
|
|
|
|
session_last = {}
|
|
trigger_group_counter = Counter()
|
|
negative_delta_count = 0
|
|
negative_delta_by_trigger = Counter()
|
|
trigger_group_values = defaultdict(
|
|
lambda: {
|
|
"delta_fraction_of_current": [],
|
|
"abs_delta_fraction_of_current": [],
|
|
}
|
|
)
|
|
delta_fraction_current_values = []
|
|
abs_delta_fraction_current_values = []
|
|
delta_tokens_values = []
|
|
abs_delta_tokens_values = []
|
|
|
|
with open(request_metrics_path, "r", encoding="utf-8") as metrics_handle, open(
|
|
transition_edges_path, "w", encoding="utf-8", newline=""
|
|
) as output_handle:
|
|
metrics_reader = csv.DictReader(metrics_handle)
|
|
normalized_iter = _iter_source_message_minimal(source_path)
|
|
writer = None
|
|
|
|
for normalized_row, metric_row in zip(normalized_iter, metrics_reader):
|
|
if normalized_row["request_id"] != metric_row.get("request_id", ""):
|
|
raise ValueError(
|
|
f"request order mismatch between source trace and request metrics: "
|
|
f"{normalized_row['request_id']} != {metric_row.get('request_id', '')}"
|
|
)
|
|
|
|
current = {
|
|
"request_id": metric_row["request_id"],
|
|
"session_id": metric_row["session_id"],
|
|
"request_ready_time_ms": safe_int(metric_row.get("request_ready_time_ms")),
|
|
"request_end_time_ms": safe_int(metric_row.get("request_end_time_ms")),
|
|
"retokenized_prompt_tokens": safe_int(metric_row.get("theoretical_prompt_unit_length")),
|
|
"messages": normalized_row["messages"],
|
|
}
|
|
|
|
previous = session_last.get(current["session_id"])
|
|
if previous is not None:
|
|
transition = _classify_transition(previous["messages"], current["messages"])
|
|
delta_tokens = current["retokenized_prompt_tokens"] - previous["retokenized_prompt_tokens"]
|
|
delta_fraction_of_prev = safe_div(delta_tokens, previous["retokenized_prompt_tokens"])
|
|
delta_fraction_of_current = safe_div(delta_tokens, current["retokenized_prompt_tokens"])
|
|
abs_delta_fraction_of_current = safe_div(abs(delta_tokens), current["retokenized_prompt_tokens"])
|
|
|
|
row = {
|
|
"session_id": current["session_id"],
|
|
"prev_request_id": previous["request_id"],
|
|
"next_request_id": current["request_id"],
|
|
"prev_request_ready_time_ms": previous["request_ready_time_ms"],
|
|
"prev_request_end_time_ms": previous["request_end_time_ms"],
|
|
"next_request_ready_time_ms": current["request_ready_time_ms"],
|
|
"gap_from_prev_ready_ms": max(current["request_ready_time_ms"] - previous["request_ready_time_ms"], 0),
|
|
"gap_from_prev_end_ms": max(current["request_ready_time_ms"] - previous["request_end_time_ms"], 0),
|
|
"prev_retokenized_prompt_tokens": previous["retokenized_prompt_tokens"],
|
|
"next_retokenized_prompt_tokens": current["retokenized_prompt_tokens"],
|
|
"delta_retokenized_prompt_tokens": delta_tokens,
|
|
"delta_retokenized_fraction_of_prev": delta_fraction_of_prev,
|
|
"delta_retokenized_fraction_of_current": delta_fraction_of_current,
|
|
"abs_delta_retokenized_fraction_of_current": abs_delta_fraction_of_current,
|
|
"prev_message_count": len(previous["messages"]),
|
|
"next_message_count": len(current["messages"]),
|
|
**transition,
|
|
}
|
|
|
|
if writer is None:
|
|
writer = csv.DictWriter(output_handle, fieldnames=list(row.keys()))
|
|
writer.writeheader()
|
|
writer.writerow(row)
|
|
|
|
trigger_group_counter[row["trigger_group"]] += 1
|
|
if delta_fraction_of_current < 0:
|
|
negative_delta_count += 1
|
|
negative_delta_by_trigger[row["trigger_group"]] += 1
|
|
trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"].append(delta_fraction_of_current)
|
|
trigger_group_values[row["trigger_group"]]["abs_delta_fraction_of_current"].append(abs_delta_fraction_of_current)
|
|
delta_fraction_current_values.append(delta_fraction_of_current)
|
|
abs_delta_fraction_current_values.append(abs_delta_fraction_of_current)
|
|
delta_tokens_values.append(delta_tokens)
|
|
abs_delta_tokens_values.append(abs(delta_tokens))
|
|
|
|
session_last[current["session_id"]] = current
|
|
|
|
transition_count = sum(trigger_group_counter.values())
|
|
trigger_group_rows = []
|
|
for trigger_group, count in trigger_group_counter.most_common():
|
|
signed_values = trigger_group_values[trigger_group]["delta_fraction_of_current"]
|
|
abs_values = trigger_group_values[trigger_group]["abs_delta_fraction_of_current"]
|
|
trigger_group_rows.append(
|
|
{
|
|
"trigger_group": trigger_group,
|
|
"count": count,
|
|
"fraction": safe_div(count, transition_count),
|
|
"negative_delta_count": negative_delta_by_trigger[trigger_group],
|
|
"negative_delta_fraction": safe_div(negative_delta_by_trigger[trigger_group], count),
|
|
"p10_signed_delta_fraction_of_current": percentile(signed_values, 0.10) if signed_values else 0.0,
|
|
"p50_signed_delta_fraction_of_current": percentile(signed_values, 0.50) if signed_values else 0.0,
|
|
"p90_signed_delta_fraction_of_current": percentile(signed_values, 0.90) if signed_values else 0.0,
|
|
"p95_signed_delta_fraction_of_current": percentile(signed_values, 0.95) if signed_values else 0.0,
|
|
"p50_abs_delta_fraction_of_current": percentile(abs_values, 0.50) if abs_values else 0.0,
|
|
"p90_abs_delta_fraction_of_current": percentile(abs_values, 0.90) if abs_values else 0.0,
|
|
}
|
|
)
|
|
|
|
write_csv(advanced_dir / "transition_retokenized_trigger_groups.csv", trigger_group_rows)
|
|
write_csv(
|
|
advanced_dir / "cdf_transition_retokenized_delta_fraction_of_current.csv",
|
|
build_cdf(delta_fraction_current_values),
|
|
)
|
|
write_csv(
|
|
advanced_dir / "cdf_transition_retokenized_abs_delta_fraction_of_current.csv",
|
|
build_cdf(abs_delta_fraction_current_values),
|
|
)
|
|
write_csv(
|
|
advanced_dir / "cdf_transition_retokenized_delta_fraction_by_trigger.csv",
|
|
build_grouped_cdf_rows(
|
|
{
|
|
row["trigger_group"]: trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"]
|
|
for row in trigger_group_rows
|
|
},
|
|
"trigger_group",
|
|
),
|
|
)
|
|
write_csv(
|
|
advanced_dir / "cdf_transition_retokenized_abs_delta_fraction_by_trigger.csv",
|
|
build_grouped_cdf_rows(
|
|
{
|
|
row["trigger_group"]: trigger_group_values[row["trigger_group"]]["abs_delta_fraction_of_current"]
|
|
for row in trigger_group_rows
|
|
},
|
|
"trigger_group",
|
|
),
|
|
)
|
|
|
|
plot_cdf_series(
|
|
advanced_dir / "cdf_transition_retokenized_delta_fraction_by_trigger.png",
|
|
[
|
|
(
|
|
row["trigger_group"],
|
|
build_cdf(trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"]),
|
|
)
|
|
for row in trigger_group_rows
|
|
if trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"]
|
|
],
|
|
title="CDF of Retokenized Signed Context Delta by Trigger Group",
|
|
xlabel="(next_retokenized_prompt_tokens - prev_retokenized_prompt_tokens) / next_retokenized_prompt_tokens",
|
|
)
|
|
plot_cdf_series_with_zoom_windows(
|
|
advanced_dir / "cdf_transition_retokenized_delta_fraction_by_trigger_zoom.png",
|
|
[
|
|
(
|
|
row["trigger_group"],
|
|
build_cdf(trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"]),
|
|
)
|
|
for row in trigger_group_rows
|
|
if trigger_group_values[row["trigger_group"]]["delta_fraction_of_current"]
|
|
],
|
|
title="CDF of Retokenized Signed Context Delta by Trigger Group with Central 80% / 90% Zoom",
|
|
xlabel="(next_retokenized_prompt_tokens - prev_retokenized_prompt_tokens) / next_retokenized_prompt_tokens",
|
|
zoom_windows=[
|
|
(0.10, 0.90, "Central 80% (p10-p90)"),
|
|
(0.05, 0.95, "Central 90% (p05-p95)"),
|
|
],
|
|
)
|
|
plot_cdf_series(
|
|
advanced_dir / "cdf_transition_retokenized_abs_delta_fraction_by_trigger.png",
|
|
[
|
|
(
|
|
row["trigger_group"],
|
|
build_cdf(trigger_group_values[row["trigger_group"]]["abs_delta_fraction_of_current"]),
|
|
)
|
|
for row in trigger_group_rows
|
|
if trigger_group_values[row["trigger_group"]]["abs_delta_fraction_of_current"]
|
|
],
|
|
title="CDF of Retokenized Absolute Context Delta by Trigger Group",
|
|
xlabel="abs(next_retokenized_prompt_tokens - prev_retokenized_prompt_tokens) / next_retokenized_prompt_tokens",
|
|
)
|
|
|
|
summary = {
|
|
"transition_count": transition_count,
|
|
"negative_delta_count": negative_delta_count,
|
|
"negative_delta_fraction": safe_div(negative_delta_count, transition_count),
|
|
"delta_fraction_current_stats": series_stats(delta_fraction_current_values),
|
|
"abs_delta_fraction_current_stats": series_stats(abs_delta_fraction_current_values),
|
|
"delta_tokens_stats": series_stats(delta_tokens_values),
|
|
"abs_delta_tokens_stats": series_stats(abs_delta_tokens_values),
|
|
"trigger_group_rows": trigger_group_rows,
|
|
}
|
|
summary_path = advanced_dir / "transition_retokenized_summary.json"
|
|
with open(summary_path, "w", encoding="utf-8") as handle:
|
|
json.dump(summary, handle, ensure_ascii=False, indent=2)
|
|
return summary, summary_path, transition_edges_path
|
|
|
|
|
|
def chunk_units(units, block_size):
|
|
return [tuple(units[index:index + block_size]) for index in range(0, len(units), block_size)]
|
|
|
|
|
|
def compute_theoretical_cache(
|
|
records,
|
|
block_size=256,
|
|
segment_mode="tokenizer",
|
|
tokenizer_path=None,
|
|
model_family="auto",
|
|
model_meta_dir=None,
|
|
show_progress=False,
|
|
):
|
|
segment, resolved_tokenizer_path = load_segmenter(
|
|
segment_mode=segment_mode,
|
|
tokenizer_path=tokenizer_path,
|
|
model_family=model_family,
|
|
model_meta_dir=model_meta_dir,
|
|
records=records,
|
|
)
|
|
sorted_records = sort_records_for_time(records)
|
|
cache = {}
|
|
request_rows = []
|
|
reuse_gap_rows = []
|
|
process = psutil.Process(os.getpid()) if show_progress else None
|
|
peak_rss_mb = 0.0
|
|
iterator = sorted_records
|
|
progress = None
|
|
if show_progress:
|
|
progress = tqdm(
|
|
total=len(sorted_records),
|
|
desc="Theoretical KV reuse",
|
|
unit="req",
|
|
dynamic_ncols=True,
|
|
)
|
|
|
|
for record in iterator:
|
|
units = segment(record.canonical_prompt)
|
|
blocks = chunk_units(units, block_size)
|
|
prev_hash = None
|
|
prefix_match_blocks = 0
|
|
prefix_active = True
|
|
longest_source_request_id = ""
|
|
for block_index, block in enumerate(blocks):
|
|
block_hash = hash((prev_hash, *block))
|
|
meta = cache.get(block_hash)
|
|
if meta and prefix_active:
|
|
prefix_match_blocks += 1
|
|
longest_source_request_id = meta["last_request_id"]
|
|
reuse_gap_rows.append(
|
|
{
|
|
"request_id": record.meta.request_id,
|
|
"session_id": record.meta.session_id,
|
|
"block_index": block_index,
|
|
"source_request_id": meta["last_request_id"],
|
|
"reuse_gap_ms": max(record.meta.request_ready_time_ms - meta["last_seen_ms"], 0),
|
|
"age_from_first_seen_ms": max(record.meta.request_ready_time_ms - meta["first_seen_ms"], 0),
|
|
}
|
|
)
|
|
meta["last_seen_ms"] = record.meta.request_ready_time_ms
|
|
meta["last_reuse_ms"] = record.meta.request_ready_time_ms
|
|
meta["last_request_id"] = record.meta.request_id
|
|
meta["last_reuse_request_id"] = record.meta.request_id
|
|
meta["reuse_count"] += 1
|
|
else:
|
|
prefix_active = False
|
|
if not meta:
|
|
cache[block_hash] = {
|
|
"hash": block_hash,
|
|
"first_seen_ms": record.meta.request_ready_time_ms,
|
|
"last_seen_ms": record.meta.request_ready_time_ms,
|
|
"last_reuse_ms": 0,
|
|
"first_request_id": record.meta.request_id,
|
|
"last_request_id": record.meta.request_id,
|
|
"last_reuse_request_id": "",
|
|
"reuse_count": 0,
|
|
"block_index": block_index,
|
|
}
|
|
else:
|
|
meta["last_seen_ms"] = record.meta.request_ready_time_ms
|
|
meta["last_request_id"] = record.meta.request_id
|
|
prev_hash = block_hash
|
|
|
|
total_units = len(units)
|
|
theoretical_hit_units = min(prefix_match_blocks * block_size, total_units)
|
|
request_rows.append(
|
|
{
|
|
"request_id": record.meta.request_id,
|
|
"session_id": record.meta.session_id,
|
|
"request_ready_time_ms": record.meta.request_ready_time_ms,
|
|
"segment_mode": segment_mode,
|
|
"tokenizer_path": resolved_tokenizer_path,
|
|
"block_size": block_size,
|
|
"prompt_unit_length": total_units,
|
|
"prompt_block_count": len(blocks),
|
|
"theoretical_prefix_hit_blocks": prefix_match_blocks,
|
|
"theoretical_prefix_hit_units": theoretical_hit_units,
|
|
"theoretical_prefix_hit_ratio": safe_div(theoretical_hit_units, total_units),
|
|
"theoretical_source_request_id": longest_source_request_id,
|
|
}
|
|
)
|
|
if progress is not None:
|
|
progress.update(1)
|
|
postfix, peak_rss_mb = _progress_postfix(
|
|
process,
|
|
peak_rss_mb,
|
|
progress.n / progress.total if progress.total else 0.0,
|
|
cache_blocks=len(cache),
|
|
reuse_edges=len(reuse_gap_rows),
|
|
)
|
|
progress.set_postfix(postfix)
|
|
|
|
if progress is not None:
|
|
progress.close()
|
|
|
|
block_rows = []
|
|
for meta in cache.values():
|
|
lifecycle_end_ms = meta["last_reuse_ms"] if meta["reuse_count"] > 0 else meta["first_seen_ms"]
|
|
lifetime_ms = max(lifecycle_end_ms - meta["first_seen_ms"], 0)
|
|
span_end_ms = lifecycle_end_ms
|
|
span_ms = max(span_end_ms - meta["first_seen_ms"], 0)
|
|
block_rows.append(
|
|
{
|
|
"hash": meta["hash"],
|
|
"first_request_id": meta["first_request_id"],
|
|
"last_request_id": meta["last_request_id"],
|
|
"first_seen_ms": meta["first_seen_ms"],
|
|
"last_seen_ms": meta["last_seen_ms"],
|
|
"last_reuse_ms": meta["last_reuse_ms"],
|
|
"last_reuse_request_id": meta["last_reuse_request_id"],
|
|
"reuse_count": meta["reuse_count"],
|
|
"lifetime_ms": lifetime_ms,
|
|
"span_end_ms": span_end_ms,
|
|
"span_ms": span_ms,
|
|
"reused": 1 if meta["reuse_count"] > 0 else 0,
|
|
}
|
|
)
|
|
|
|
return {
|
|
"request_rows": request_rows,
|
|
"reuse_gap_rows": reuse_gap_rows,
|
|
"block_rows": block_rows,
|
|
"resolved_tokenizer_path": resolved_tokenizer_path,
|
|
}
|
|
|
|
|
|
def build_tool_timing(records, features):
|
|
features_by_request = {feature.request_id: feature for feature in features}
|
|
catalog = Counter()
|
|
session_edges = []
|
|
per_tool_edge_counter = Counter()
|
|
per_tool_gap_sum = Counter()
|
|
|
|
for record in records:
|
|
for tool in record.declared_tools:
|
|
if tool.name:
|
|
catalog[tool.name] += 1
|
|
|
|
records_by_session = defaultdict(list)
|
|
for record in records:
|
|
records_by_session[record.meta.session_id].append(record)
|
|
|
|
for session_id, session_records in records_by_session.items():
|
|
ordered = sort_records_for_time(session_records)
|
|
for previous, current in zip(ordered, ordered[1:]):
|
|
current_feature = features_by_request[current.meta.request_id]
|
|
previous_feature = features_by_request[previous.meta.request_id]
|
|
gap_from_prev_ready_ms = max(current.meta.request_ready_time_ms - previous.meta.request_ready_time_ms, 0)
|
|
gap_from_prev_end_ms = max(current.meta.request_ready_time_ms - previous.meta.request_end_time_ms, 0)
|
|
row = {
|
|
"session_id": session_id,
|
|
"prev_request_id": previous.meta.request_id,
|
|
"next_request_id": current.meta.request_id,
|
|
"prev_request_ready_time_ms": previous.meta.request_ready_time_ms,
|
|
"prev_request_end_time_ms": previous.meta.request_end_time_ms,
|
|
"next_request_ready_time_ms": current.meta.request_ready_time_ms,
|
|
"gap_from_prev_ready_ms": gap_from_prev_ready_ms,
|
|
"gap_from_prev_end_ms": gap_from_prev_end_ms,
|
|
"next_tool_msg_count": current_feature.tool_msg_count,
|
|
"next_assistant_msg_count": current_feature.assistant_msg_count,
|
|
"next_declared_tool_count": current_feature.declared_tool_count,
|
|
"next_declared_tool_names": ";".join(tool.name for tool in current.declared_tools if tool.name),
|
|
"prev_declared_tool_names": ";".join(tool.name for tool in previous.declared_tools if tool.name),
|
|
"is_tool_round": 1 if current_feature.tool_msg_count > 0 else 0,
|
|
}
|
|
session_edges.append(row)
|
|
if row["is_tool_round"]:
|
|
for tool in current.declared_tools:
|
|
if tool.name:
|
|
per_tool_edge_counter[tool.name] += 1
|
|
per_tool_gap_sum[tool.name] += gap_from_prev_ready_ms
|
|
|
|
catalog_rows = []
|
|
total_requests = len(records)
|
|
for tool_name, count in catalog.most_common():
|
|
catalog_rows.append(
|
|
{
|
|
"tool_name": tool_name,
|
|
"declared_count": count,
|
|
"request_fraction": safe_div(count, total_requests),
|
|
"tool_round_count": per_tool_edge_counter.get(tool_name, 0),
|
|
"avg_tool_round_gap_ms": safe_div(
|
|
per_tool_gap_sum.get(tool_name, 0),
|
|
per_tool_edge_counter.get(tool_name, 0),
|
|
),
|
|
}
|
|
)
|
|
|
|
return {
|
|
"catalog_rows": catalog_rows,
|
|
"session_edges": session_edges,
|
|
}
|
|
|
|
|
|
def augment_request_metrics(records, features, theoretical_rows):
|
|
theoretical_by_request = {row["request_id"]: row for row in theoretical_rows}
|
|
rows = []
|
|
for record, feature in zip(records, features):
|
|
row = feature_to_row(feature)
|
|
theory = theoretical_by_request.get(record.meta.request_id, {})
|
|
row.update(
|
|
{
|
|
"line_number": record.meta.line_number,
|
|
"request_ready_time_ms": record.meta.request_ready_time_ms,
|
|
"request_end_time_ms": record.meta.request_end_time_ms,
|
|
"declared_tool_names": ";".join(tool.name for tool in record.declared_tools if tool.name),
|
|
"canonical_prompt_chars": len(record.canonical_prompt),
|
|
"theoretical_prompt_unit_length": theory.get("prompt_unit_length", 0),
|
|
"theoretical_prefix_hit_blocks": theory.get("theoretical_prefix_hit_blocks", 0),
|
|
"theoretical_prefix_hit_units": theory.get("theoretical_prefix_hit_units", 0),
|
|
"theoretical_prefix_hit_ratio": theory.get("theoretical_prefix_hit_ratio", 0.0),
|
|
"theoretical_source_request_id": theory.get("theoretical_source_request_id", ""),
|
|
}
|
|
)
|
|
rows.append(row)
|
|
return rows
|
|
|
|
|
|
def write_study_outputs(
|
|
records,
|
|
features,
|
|
output_dir,
|
|
source_path,
|
|
block_size=256,
|
|
segment_mode="tokenizer",
|
|
tokenizer_path=None,
|
|
input_length_bucket_thresholds=None,
|
|
show_progress=False,
|
|
):
|
|
output_dir = ensure_output_dir(output_dir)
|
|
advanced_dir = ensure_output_dir(preferred_details_dir(output_dir))
|
|
process = psutil.Process(os.getpid()) if show_progress else None
|
|
peak_rss_mb = 0.0
|
|
|
|
if show_progress:
|
|
tqdm.write("Stage 1/4: theoretical cache analysis")
|
|
|
|
theoretical = compute_theoretical_cache(
|
|
records,
|
|
block_size=block_size,
|
|
segment_mode=segment_mode,
|
|
tokenizer_path=tokenizer_path,
|
|
show_progress=show_progress,
|
|
)
|
|
if show_progress:
|
|
current_rss_mb = process.memory_info().rss / (1024 * 1024)
|
|
peak_rss_mb = max(peak_rss_mb, current_rss_mb)
|
|
tqdm.write(
|
|
f"Stage 1/4 done: rss_mb={current_rss_mb:.0f} "
|
|
f"est_peak_mb={_estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, 1.0):.0f}"
|
|
)
|
|
tqdm.write("Stage 2/4: tool timing")
|
|
tool_stats = build_tool_timing(records, features)
|
|
if show_progress:
|
|
current_rss_mb = process.memory_info().rss / (1024 * 1024)
|
|
peak_rss_mb = max(peak_rss_mb, current_rss_mb)
|
|
tqdm.write(
|
|
f"Stage 2/4 done: rss_mb={current_rss_mb:.0f} "
|
|
f"est_peak_mb={_estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, 1.0):.0f}"
|
|
)
|
|
tqdm.write("Stage 3/4: merge request metrics")
|
|
request_metric_rows = augment_request_metrics(records, features, theoretical["request_rows"])
|
|
if show_progress:
|
|
current_rss_mb = process.memory_info().rss / (1024 * 1024)
|
|
peak_rss_mb = max(peak_rss_mb, current_rss_mb)
|
|
tqdm.write(
|
|
f"Stage 3/4 done: rss_mb={current_rss_mb:.0f} "
|
|
f"est_peak_mb={_estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, 1.0):.0f}"
|
|
)
|
|
tqdm.write("Stage 4/4: writing outputs and plots")
|
|
|
|
paths = {}
|
|
paths["request_metrics"] = write_csv(advanced_dir / "request_metrics.csv", request_metric_rows)
|
|
paths["theoretical_request_cache"] = write_csv(
|
|
advanced_dir / "theoretical_request_cache.csv",
|
|
theoretical["request_rows"],
|
|
)
|
|
paths["theoretical_reuse_gaps"] = write_csv(
|
|
advanced_dir / "theoretical_reuse_gaps.csv",
|
|
theoretical["reuse_gap_rows"],
|
|
)
|
|
paths["theoretical_block_lifetimes"] = write_csv(
|
|
advanced_dir / "theoretical_block_lifetimes.csv",
|
|
theoretical["block_rows"],
|
|
)
|
|
paths["tools_catalog"] = write_csv(advanced_dir / "tools_catalog.csv", tool_stats["catalog_rows"])
|
|
paths["tool_round_edges"] = write_csv(advanced_dir / "tool_round_edges.csv", tool_stats["session_edges"])
|
|
|
|
cdf_specs = {
|
|
"cdf_input_tokens.csv": [row["input_tokens"] for row in request_metric_rows],
|
|
"cdf_output_tokens.csv": [row["output_tokens"] for row in request_metric_rows],
|
|
"cdf_actual_cache_hit_ratio.csv": [row["cache_hit_ratio"] for row in request_metric_rows],
|
|
"cdf_theoretical_cache_hit_ratio.csv": [
|
|
row["theoretical_prefix_hit_ratio"] for row in request_metric_rows
|
|
],
|
|
"cdf_theoretical_reuse_gap_ms.csv": [
|
|
row["reuse_gap_ms"] for row in theoretical["reuse_gap_rows"]
|
|
],
|
|
"cdf_theoretical_block_lifetime_ms.csv": [
|
|
row["lifetime_ms"] for row in theoretical["block_rows"] if row["reuse_count"] > 0
|
|
],
|
|
"cdf_tool_round_gap_from_prev_ready_ms.csv": [
|
|
row["gap_from_prev_ready_ms"] for row in tool_stats["session_edges"] if row["is_tool_round"]
|
|
],
|
|
}
|
|
|
|
cdf_rows = {}
|
|
for filename, values in cdf_specs.items():
|
|
rows = build_cdf(values)
|
|
cdf_rows[filename] = rows
|
|
paths[filename] = write_csv(advanced_dir / filename, rows)
|
|
|
|
paths["cdf_lengths_png"] = plot_cdf_series(
|
|
advanced_dir / "cdf_lengths.png",
|
|
[
|
|
("input_tokens", cdf_rows["cdf_input_tokens.csv"]),
|
|
("output_tokens", cdf_rows["cdf_output_tokens.csv"]),
|
|
],
|
|
title="CDF of Input / Output Length",
|
|
xlabel="Tokens",
|
|
)
|
|
paths["cdf_cache_png"] = plot_cdf_series(
|
|
advanced_dir / "cdf_cache_hit_ratio.png",
|
|
[
|
|
("actual_cache_hit_ratio", cdf_rows["cdf_actual_cache_hit_ratio.csv"]),
|
|
("theoretical_cache_hit_ratio", cdf_rows["cdf_theoretical_cache_hit_ratio.csv"]),
|
|
],
|
|
title="CDF of Actual vs Theoretical Cache Hit Ratio",
|
|
xlabel="Hit Ratio",
|
|
)
|
|
paths["cdf_reuse_gap_png"] = plot_cdf_series(
|
|
advanced_dir / "cdf_theoretical_reuse_gap_ms.png",
|
|
[("reuse_gap_ms", cdf_rows["cdf_theoretical_reuse_gap_ms.csv"])],
|
|
title="CDF of Theoretical Cache Reuse Gap",
|
|
xlabel="Milliseconds",
|
|
)
|
|
paths["cdf_block_lifetime_png"] = plot_cdf_series(
|
|
advanced_dir / "cdf_theoretical_block_lifetime_ms.png",
|
|
[("block_lifetime_ms", cdf_rows["cdf_theoretical_block_lifetime_ms.csv"])],
|
|
title="CDF of Theoretical Cache Block Lifetime",
|
|
xlabel="Milliseconds",
|
|
)
|
|
paths["cdf_tool_gap_png"] = plot_cdf_series(
|
|
advanced_dir / "cdf_tool_round_gap_from_prev_ready_ms.png",
|
|
[("tool_round_gap_from_prev_ready_ms", cdf_rows["cdf_tool_round_gap_from_prev_ready_ms.csv"])],
|
|
title="CDF of Tool Round Inter-API Gap",
|
|
xlabel="Milliseconds",
|
|
)
|
|
paths["tools_catalog_png"] = plot_bar_chart(
|
|
advanced_dir / "tools_catalog_top_declared.png",
|
|
tool_stats["catalog_rows"],
|
|
label_key="tool_name",
|
|
value_key="declared_count",
|
|
title="Top Declared Tools",
|
|
xlabel="Declared Count",
|
|
ylabel="Tool",
|
|
)
|
|
|
|
input_length_comparison_summary, input_length_comparison_summary_path = write_input_length_comparison_from_rows(
|
|
request_metric_rows,
|
|
advanced_dir,
|
|
)
|
|
paths["input_length_comparison_summary"] = input_length_comparison_summary_path
|
|
(
|
|
input_length_bucket_cache_reuse_summary,
|
|
input_length_bucket_cache_reuse_summary_path,
|
|
input_length_bucket_cache_reuse_csv_path,
|
|
input_length_bucket_cache_reuse_plot_path,
|
|
) = write_cache_reuse_by_input_length_bucket_from_rows(
|
|
request_metric_rows,
|
|
advanced_dir,
|
|
bucket_defs=build_input_length_bucket_defs(input_length_bucket_thresholds),
|
|
)
|
|
paths["input_length_bucket_cache_reuse_summary"] = input_length_bucket_cache_reuse_summary_path
|
|
paths["input_length_bucket_cache_reuse_csv"] = input_length_bucket_cache_reuse_csv_path
|
|
paths["input_length_bucket_cache_reuse_plot"] = input_length_bucket_cache_reuse_plot_path
|
|
agentic_summary, agentic_summary_path = write_agentic_outputs_from_rows(request_metric_rows, advanced_dir)
|
|
paths["agentic_patterns_summary"] = agentic_summary_path
|
|
transition_summary, transition_summary_path, transition_edges_path = write_transition_outputs_from_existing(
|
|
source_path,
|
|
advanced_dir / "request_metrics.csv",
|
|
advanced_dir,
|
|
)
|
|
paths["transition_patterns_summary"] = transition_summary_path
|
|
paths["session_transition_edges"] = transition_edges_path
|
|
retokenized_transition_summary, retokenized_transition_summary_path, retokenized_transition_edges_path = (
|
|
write_retokenized_transition_outputs_from_existing(
|
|
source_path,
|
|
advanced_dir / "request_metrics.csv",
|
|
advanced_dir,
|
|
)
|
|
)
|
|
paths["retokenized_transition_summary"] = retokenized_transition_summary_path
|
|
paths["session_transition_retokenized_edges"] = retokenized_transition_edges_path
|
|
(
|
|
context_change_summary,
|
|
context_change_summary_path,
|
|
context_change_casebook_path,
|
|
context_change_representative_pairs_path,
|
|
) = (
|
|
write_context_change_deep_dive_from_existing(
|
|
source_path,
|
|
advanced_dir / "request_metrics.csv",
|
|
advanced_dir,
|
|
)
|
|
)
|
|
paths["context_change_summary"] = context_change_summary_path
|
|
paths["context_change_casebook"] = context_change_casebook_path
|
|
paths["context_change_representative_pairs"] = context_change_representative_pairs_path
|
|
|
|
tools_summary = {
|
|
"top_declared_tools": tool_stats["catalog_rows"][:20],
|
|
"tool_round_edge_count": sum(row["is_tool_round"] for row in tool_stats["session_edges"]),
|
|
"longest_tool_rounds": sorted(
|
|
[row for row in tool_stats["session_edges"] if row["is_tool_round"]],
|
|
key=lambda row: row["gap_from_prev_ready_ms"],
|
|
reverse=True,
|
|
)[:20],
|
|
}
|
|
tools_summary_path = advanced_dir / "tools_summary.json"
|
|
with open(tools_summary_path, "w", encoding="utf-8") as handle:
|
|
json.dump(tools_summary, handle, ensure_ascii=False, indent=2)
|
|
paths["tools_summary"] = tools_summary_path
|
|
|
|
summary = {
|
|
"segment_mode": segment_mode,
|
|
"tokenizer_path": theoretical["resolved_tokenizer_path"],
|
|
"block_size": block_size,
|
|
"request_count": len(records),
|
|
"tool_round_edge_count": tools_summary["tool_round_edge_count"],
|
|
"top_declared_tools": tools_summary["top_declared_tools"],
|
|
"input_length_comparison_summary": input_length_comparison_summary,
|
|
"input_length_bucket_cache_reuse_summary": input_length_bucket_cache_reuse_summary,
|
|
"agentic_patterns_summary": agentic_summary,
|
|
"transition_patterns_summary": transition_summary,
|
|
"retokenized_transition_summary": retokenized_transition_summary,
|
|
"context_change_summary": context_change_summary,
|
|
"cdf_files": sorted(path.name for path in advanced_dir.glob("cdf*.csv")),
|
|
}
|
|
summary_path = advanced_dir / DETAILS_SUMMARY_FILENAME
|
|
with open(summary_path, "w", encoding="utf-8") as handle:
|
|
json.dump(summary, handle, ensure_ascii=False, indent=2)
|
|
paths["details_summary"] = summary_path
|
|
progress_path = advanced_dir / "progress.json"
|
|
progress_path.write_text(
|
|
json.dumps(
|
|
{
|
|
"mode": "in_memory_study",
|
|
"processed_requests": len(records),
|
|
"total_requests": len(records),
|
|
"fraction_done": 1.0,
|
|
},
|
|
ensure_ascii=False,
|
|
indent=2,
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
paths["progress"] = progress_path
|
|
if show_progress:
|
|
current_rss_mb = process.memory_info().rss / (1024 * 1024)
|
|
peak_rss_mb = max(peak_rss_mb, current_rss_mb)
|
|
tqdm.write(
|
|
f"Stage 4/4 done: rss_mb={current_rss_mb:.0f} "
|
|
f"est_peak_mb={_estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, 1.0):.0f}"
|
|
)
|
|
return paths, summary, tools_summary, theoretical
|
|
|
|
|
|
def build_agentic_markdown_section(agentic_summary):
|
|
if not agentic_summary:
|
|
return ""
|
|
lines = [
|
|
"## Agentic Workload Patterns",
|
|
"- This section focuses on coding-agent specific behavior rather than generic chat metrics.",
|
|
f"- Session turn stats: {json.dumps(agentic_summary['session_turn_stats'], ensure_ascii=False)}",
|
|
f"- Session inter-request gap stats from previous ready time (ms): {json.dumps(agentic_summary['pair_gap_ready_ms_stats'], ensure_ascii=False)}",
|
|
f"- Session inter-request gap stats from previous end time (ms): {json.dumps(agentic_summary['pair_gap_end_ms_stats'], ensure_ascii=False)}",
|
|
f"- Append-like proxy: `{agentic_summary['append_like_proxy_definition']}`",
|
|
f"- Theoretical source scope: {json.dumps(agentic_summary['theoretical_source_scope'], ensure_ascii=False)}",
|
|
"",
|
|
"Session turn buckets:",
|
|
"| bucket | session_count | session_fraction | request_count | request_fraction |",
|
|
"| --- | --- | --- | --- | --- |",
|
|
]
|
|
for row in agentic_summary["session_turn_bucket_rows"]:
|
|
lines.append(
|
|
f"| {row['bucket']} | {row['session_count']} | {row['session_fraction']:.4f} | "
|
|
f"{row['request_count']} | {row['request_fraction']:.4f} |"
|
|
)
|
|
|
|
lines.extend(
|
|
[
|
|
"",
|
|
"Request-level agentic fractions:",
|
|
"| metric | fraction |",
|
|
"| --- | --- |",
|
|
]
|
|
)
|
|
for row in agentic_summary["request_level_fraction_rows"]:
|
|
lines.append(f"| {row['metric']} | {row['fraction']:.4f} |")
|
|
|
|
lines.extend(
|
|
[
|
|
"",
|
|
"Pair-level agentic fractions:",
|
|
"| metric | fraction |",
|
|
"| --- | --- |",
|
|
]
|
|
)
|
|
for row in agentic_summary["pair_level_fraction_rows"]:
|
|
lines.append(f"| {row['metric']} | {row['fraction']:.4f} |")
|
|
return "\n".join(lines)
|
|
|
|
|
|
def build_input_length_comparison_markdown_section(input_length_comparison_summary):
|
|
if not input_length_comparison_summary:
|
|
return ""
|
|
lines = [
|
|
"## Input Length Comparison",
|
|
"- This section compares two input-length definitions: provider-reported `usage.input_tokens` vs analyzer-retokenized `canonical_prompt` units.",
|
|
f"- Request count: {input_length_comparison_summary['request_count']}",
|
|
f"- Exact-match fraction: {input_length_comparison_summary['same_fraction']:.4f}",
|
|
f"- Retokenized > provider fraction: {input_length_comparison_summary['retokenized_gt_provider_fraction']:.4f}",
|
|
f"- Retokenized < provider fraction: {input_length_comparison_summary['retokenized_lt_provider_fraction']:.4f}",
|
|
f"- Provider input token stats: {json.dumps(input_length_comparison_summary['provider_input_tokens_stats'], ensure_ascii=False)}",
|
|
f"- Retokenized prompt token stats: {json.dumps(input_length_comparison_summary['retokenized_prompt_tokens_stats'], ensure_ascii=False)}",
|
|
f"- Delta stats (`retokenized - provider`): {json.dumps(input_length_comparison_summary['delta_tokens_stats'], ensure_ascii=False)}",
|
|
f"- Ratio stats (`retokenized / provider`): {json.dumps(input_length_comparison_summary['ratio_stats'], ensure_ascii=False)}",
|
|
f"- Relative delta stats vs provider: {json.dumps(input_length_comparison_summary['relative_delta_vs_provider_stats'], ensure_ascii=False)}",
|
|
]
|
|
return "\n".join(lines)
|
|
|
|
|
|
def build_input_length_bucket_cache_reuse_markdown_section(input_length_bucket_cache_reuse_summary):
|
|
if not input_length_bucket_cache_reuse_summary:
|
|
return ""
|
|
bucket_defs = input_length_bucket_cache_reuse_summary.get("bucket_definition", {}).get("buckets", [])
|
|
bucket_spec = "; ".join(
|
|
(
|
|
f"{row['input_tokens_min_inclusive']} <= L < {row['input_tokens_max_exclusive']}"
|
|
if row.get("input_tokens_max_exclusive") is not None
|
|
else f"{row['input_tokens_min_inclusive']} <= L"
|
|
)
|
|
for row in bucket_defs
|
|
)
|
|
lines = [
|
|
"## Cache Reuse by Provider Input-Length Bucket",
|
|
f"- Bucket ranges: `{bucket_spec}`" if bucket_spec else "- Bucket ranges: _n/a_",
|
|
"- Bucket assignment uses provider `usage.input_tokens`; theoretical reuse still uses analyzer-retokenized prompt prefix hits.",
|
|
"- `weighted_theoretical_cache_hit_ratio` is the global infinite-cache upper bound.",
|
|
"- `weighted_bucketed_theoretical_cache_hit_ratio` is the upper bound after splitting cache by input-length bucket.",
|
|
"",
|
|
"| bucket | request_count | request_fraction | weighted_actual_cache_hit_ratio | weighted_theoretical_cache_hit_ratio | weighted_bucketed_theoretical_cache_hit_ratio | weighted_bucket_boundary_loss_ratio | actual_reused_request_fraction | theoretical_reused_request_fraction | bucketed_theoretical_reused_request_fraction |",
|
|
"| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |",
|
|
]
|
|
for row in input_length_bucket_cache_reuse_summary["bucket_rows"]:
|
|
lines.append(
|
|
f"| {row['bucket']} | {row['request_count']} | {row['request_fraction']:.4f} | "
|
|
f"{row['weighted_actual_cache_hit_ratio']:.4f} | {row['weighted_theoretical_cache_hit_ratio']:.4f} | "
|
|
f"{row['weighted_bucketed_theoretical_cache_hit_ratio']:.4f} | {row['weighted_bucket_boundary_loss_ratio']:.4f} | "
|
|
f"{row['actual_reused_request_fraction']:.4f} | {row['theoretical_reused_request_fraction']:.4f} | "
|
|
f"{row['bucketed_theoretical_reused_request_fraction']:.4f} |"
|
|
)
|
|
return "\n".join(lines)
|
|
|
|
|
|
def build_session_bucket_boundary_markdown_section(session_bucket_boundary_miss_summary):
|
|
if not session_bucket_boundary_miss_summary:
|
|
return ""
|
|
lines = [
|
|
"## Session Cross-Bucket Miss",
|
|
"- This section uses the `parent_chat_id -> chat_id` chain from the release trace.",
|
|
"- `cross_bucket_shared_prefix_unit_fraction` measures how much parent-child reusable prefix is lost when parent and child fall into different input-length buckets.",
|
|
f"- Cross-bucket edge fraction: {session_bucket_boundary_miss_summary['cross_bucket_edge_fraction']:.4f}",
|
|
(
|
|
"- Cross-bucket shared-prefix miss fraction: "
|
|
f"{session_bucket_boundary_miss_summary['cross_bucket_shared_prefix_unit_fraction']:.4f}"
|
|
),
|
|
"",
|
|
"| bucket | edge_count | edge_fraction | reusable_edge_count | cross_bucket_edge_count | cross_bucket_edge_fraction | shared_prefix_units_sum | cross_bucket_shared_prefix_units_sum | cross_bucket_shared_prefix_unit_fraction |",
|
|
"| --- | --- | --- | --- | --- | --- | --- | --- | --- |",
|
|
]
|
|
for row in session_bucket_boundary_miss_summary.get("bucket_rows", []):
|
|
lines.append(
|
|
f"| {row['bucket']} | {row['edge_count']} | {row['edge_fraction']:.4f} | "
|
|
f"{row['reusable_edge_count']} | {row['cross_bucket_edge_count']} | "
|
|
f"{row['cross_bucket_edge_fraction']:.4f} | {row['shared_prefix_units_sum']} | "
|
|
f"{row['cross_bucket_shared_prefix_units_sum']} | {row['cross_bucket_shared_prefix_unit_fraction']:.4f} |"
|
|
)
|
|
return "\n".join(lines)
|
|
|
|
|
|
def build_alive_block_timeline_markdown_section(alive_block_timeline_summary):
|
|
if not alive_block_timeline_summary:
|
|
return ""
|
|
return "\n".join(
|
|
[
|
|
"## Live KV-Cache Blocks Over Time",
|
|
"- Each block span starts at first appearance and ends at its last reuse.",
|
|
f"- Peak live blocks: {alive_block_timeline_summary['peak_alive_blocks']}",
|
|
f"- Timeline events: {alive_block_timeline_summary['event_count']}",
|
|
]
|
|
)
|
|
|
|
|
|
def build_study_markdown(
|
|
base_report_markdown,
|
|
output_dir,
|
|
advanced_summary,
|
|
tools_summary,
|
|
theoretical,
|
|
theoretical_summary=None,
|
|
input_length_comparison_summary=None,
|
|
input_length_bucket_cache_reuse_summary=None,
|
|
session_bucket_boundary_miss_summary=None,
|
|
alive_block_timeline_summary=None,
|
|
agentic_summary=None,
|
|
transition_summary=None,
|
|
retokenized_transition_summary=None,
|
|
context_change_summary=None,
|
|
):
|
|
artifact_dir = DETAILS_DIR_NAME
|
|
if theoretical_summary is None:
|
|
theoretical_summary = {
|
|
"request_hit_ratio_stats": series_stats(
|
|
[row["theoretical_prefix_hit_ratio"] for row in theoretical["request_rows"]]
|
|
),
|
|
"reuse_gap_stats": series_stats([row["reuse_gap_ms"] for row in theoretical.get("reuse_gap_rows", [])]),
|
|
"block_lifetime_stats": series_stats(
|
|
[row["lifetime_ms"] for row in theoretical.get("block_rows", []) if row["reuse_count"] > 0]
|
|
),
|
|
}
|
|
request_hit_stats = theoretical_summary["request_hit_ratio_stats"]
|
|
reuse_gap_stats = theoretical_summary["reuse_gap_stats"]
|
|
block_lifetime_stats = theoretical_summary["block_lifetime_stats"]
|
|
lines = [
|
|
base_report_markdown.rstrip(),
|
|
"",
|
|
"## Study Outputs",
|
|
f"- Output root: `{output_dir}`",
|
|
f"- Segment mode: `{advanced_summary['segment_mode']}`",
|
|
f"- Tokenizer path: `{advanced_summary['tokenizer_path']}`" if advanced_summary["tokenizer_path"] else "- Tokenizer path: _n/a_",
|
|
f"- Theoretical cache block size: {advanced_summary['block_size']}",
|
|
f"- Request count: {advanced_summary['request_count']}",
|
|
f"- Tool round edges: {advanced_summary['tool_round_edge_count']}",
|
|
"",
|
|
"Generated figures:",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
f"- ",
|
|
"",
|
|
"## Theoretical Cache",
|
|
f"- Request-level theoretical hit ratio stats: {json.dumps(request_hit_stats, ensure_ascii=False)}",
|
|
f"- Reuse gap stats (ms): {json.dumps(reuse_gap_stats, ensure_ascii=False)}",
|
|
f"- Block lifetime stats (ms): {json.dumps(block_lifetime_stats, ensure_ascii=False)}",
|
|
f"- Detailed data: `{artifact_dir}/theoretical_request_cache.csv`, `{artifact_dir}/theoretical_block_lifetimes.csv`",
|
|
"",
|
|
"## Tool Timing",
|
|
"- Main timing metric is inter-API gap: previous request ready time -> next request ready time.",
|
|
f"- Raw edge table: `{artifact_dir}/tool_round_edges.csv`",
|
|
f"- Tool catalog: `{artifact_dir}/tools_catalog.csv`",
|
|
"",
|
|
"Top declared tools:",
|
|
"| tool_name | declared_count | request_fraction | tool_round_count | avg_tool_round_gap_ms |",
|
|
"| --- | --- | --- | --- | --- |",
|
|
]
|
|
for row in tools_summary["top_declared_tools"][:10]:
|
|
lines.append(
|
|
f"| {row['tool_name']} | {row['declared_count']} | {row['request_fraction']:.4f} | "
|
|
f"{row['tool_round_count']} | {row['avg_tool_round_gap_ms']:.2f} |"
|
|
)
|
|
|
|
lines.extend(
|
|
[
|
|
"",
|
|
"Longest tool rounds:",
|
|
"| session_id | prev_request_id | next_request_id | gap_from_prev_ready_ms | next_declared_tool_count |",
|
|
"| --- | --- | --- | --- | --- |",
|
|
]
|
|
)
|
|
for row in tools_summary["longest_tool_rounds"][:10]:
|
|
lines.append(
|
|
f"| {row['session_id']} | {row['prev_request_id']} | {row['next_request_id']} | "
|
|
f"{row['gap_from_prev_ready_ms']} | {row['next_declared_tool_count']} |"
|
|
)
|
|
|
|
input_length_section = build_input_length_comparison_markdown_section(input_length_comparison_summary)
|
|
if input_length_section:
|
|
lines.extend(["", input_length_section])
|
|
input_length_bucket_section = build_input_length_bucket_cache_reuse_markdown_section(
|
|
input_length_bucket_cache_reuse_summary
|
|
)
|
|
if input_length_bucket_section:
|
|
lines.extend(["", input_length_bucket_section])
|
|
session_bucket_boundary_section = build_session_bucket_boundary_markdown_section(
|
|
session_bucket_boundary_miss_summary
|
|
)
|
|
if session_bucket_boundary_section:
|
|
lines.extend(["", session_bucket_boundary_section])
|
|
alive_block_timeline_section = build_alive_block_timeline_markdown_section(alive_block_timeline_summary)
|
|
if alive_block_timeline_section:
|
|
lines.extend(["", alive_block_timeline_section])
|
|
agentic_section = build_agentic_markdown_section(agentic_summary)
|
|
if agentic_section:
|
|
lines.extend(["", agentic_section])
|
|
transition_section = build_transition_markdown_section(transition_summary)
|
|
if transition_section:
|
|
lines.extend(["", transition_section])
|
|
retokenized_transition_section = build_retokenized_transition_markdown_section(
|
|
retokenized_transition_summary,
|
|
provider_transition_summary=transition_summary,
|
|
)
|
|
if retokenized_transition_section:
|
|
lines.extend(["", retokenized_transition_section])
|
|
context_change_section = build_context_change_markdown_section(context_change_summary)
|
|
if context_change_section:
|
|
lines.extend(["", context_change_section])
|
|
|
|
lines.extend(
|
|
[
|
|
"",
|
|
"## Data Files",
|
|
f"- `{artifact_dir}/request_metrics.csv` combines base request metrics with theoretical cache metrics.",
|
|
f"- `{artifact_dir}/cdf_input_tokens.csv` and `{artifact_dir}/cdf_output_tokens.csv` contain the length CDFs.",
|
|
f"- `{artifact_dir}/cdf_retokenized_prompt_tokens.csv`, `{artifact_dir}/cdf_input_length_delta_tokens.csv`, `{artifact_dir}/cdf_input_length_ratio_retokenized_over_provider.csv`, and `{artifact_dir}/cdf_input_length_relative_delta_vs_provider.csv` compare provider vs retokenized input length.",
|
|
f"- `{artifact_dir}/input_length_bucket_cache_reuse.csv` and `{artifact_dir}/input_length_bucket_cache_reuse_summary.json` summarize actual/theoretical cache reuse by provider input-length bucket.",
|
|
f"- `{artifact_dir}/cdf_actual_cache_hit_ratio.csv` and `{artifact_dir}/cdf_theoretical_cache_hit_ratio.csv` contain actual/theoretical cache-hit CDFs.",
|
|
f"- `{artifact_dir}/cdf_theoretical_reuse_gap_ms.csv` contains the reuse-gap CDF.",
|
|
f"- `{artifact_dir}/cdf_theoretical_block_lifetime_ms.csv` contains the block lifecycle CDF from first appearance to last reuse.",
|
|
f"- `{artifact_dir}/theoretical_alive_block_timeline.csv` contains the time-series of live KV-cache blocks.",
|
|
f"- `{artifact_dir}/session_bucket_boundary_miss.csv` and `{artifact_dir}/session_bucket_boundary_miss_summary.json` quantify parent-child cache misses caused by cross-bucket routing.",
|
|
f"- `{artifact_dir}/cdf_tool_round_gap_from_prev_ready_ms.csv` contains the tool-round inter-API gap CDF.",
|
|
f"- `{artifact_dir}/agentic_session_turn_buckets.csv` contains session-turn bucket shares for sessions and requests.",
|
|
f"- `{artifact_dir}/agentic_request_level_fractions.csv` contains request-level agentic workload fractions.",
|
|
f"- `{artifact_dir}/agentic_pair_level_fractions.csv` contains pair-level agentic workload fractions.",
|
|
f"- `{artifact_dir}/agentic_theoretical_source_scope.csv` contains same-session vs cross-session theoretical prefix-reuse scope.",
|
|
f"- `{artifact_dir}/cdf_session_inter_request_gap_ready_ms.csv` and `{artifact_dir}/cdf_session_inter_request_gap_end_ms.csv` contain session transition gap CDFs.",
|
|
f"- `{artifact_dir}/session_transition_edges.csv` contains per-session `prev_request -> next_request` trigger, context delta, and KV-cache source annotations.",
|
|
f"- `{artifact_dir}/transition_trigger_groups.csv` contains trigger-group proportions and context-change summaries.",
|
|
f"- `{artifact_dir}/transition_source_scope.csv` contains KV-cache source scope proportions and reuse-gap summaries.",
|
|
f"- `{artifact_dir}/cdf_transition_delta_fraction_of_current.csv` and `{artifact_dir}/cdf_transition_abs_delta_fraction_of_current.csv` contain overall signed/absolute context-delta CDFs.",
|
|
f"- `{artifact_dir}/cdf_transition_delta_fraction_by_trigger.csv`, `{artifact_dir}/cdf_transition_abs_delta_fraction_by_trigger.csv`, and `{artifact_dir}/cdf_transition_uncached_fraction_by_trigger.csv` contain trigger-group CDFs.",
|
|
f"- `{artifact_dir}/cdf_transition_source_gap_ms.csv` and `{artifact_dir}/cdf_transition_source_gap_ms_by_scope.csv` contain overall/grouped source-gap CDFs.",
|
|
f"- `{artifact_dir}/session_transition_retokenized_edges.csv`, `{artifact_dir}/transition_retokenized_trigger_groups.csv`, and `{artifact_dir}/transition_retokenized_summary.json` contain the same session-transition analysis recomputed with retokenized prompt length.",
|
|
f"- `{artifact_dir}/cdf_transition_retokenized_delta_fraction_of_current.csv`, `{artifact_dir}/cdf_transition_retokenized_abs_delta_fraction_of_current.csv`, `{artifact_dir}/cdf_transition_retokenized_delta_fraction_by_trigger.csv`, and `{artifact_dir}/cdf_transition_retokenized_abs_delta_fraction_by_trigger.csv` contain retokenized transition-length CDFs.",
|
|
f"- `{artifact_dir}/context_change_mechanism_summary.csv` and `{artifact_dir}/context_change_summary.json` summarize why context shrinks happen between consecutive requests.",
|
|
f"- `{artifact_dir}/context_change_casebook.md` contains representative per-mechanism cases with message-structure summaries from the source trace (`*-raw.jsonl` or legacy `normalized.jsonl`).",
|
|
]
|
|
)
|
|
return "\n".join(lines) + "\n"
|
|
|
|
|
|
def run_study(
|
|
records,
|
|
output_dir,
|
|
normalized_format="jsonl",
|
|
source_path=None,
|
|
block_size=256,
|
|
segment_mode="tokenizer",
|
|
tokenizer_path=None,
|
|
model_family="auto",
|
|
model_meta_dir=None,
|
|
input_length_bucket_thresholds=None,
|
|
tokenizer_batch_size=64,
|
|
show_progress=False,
|
|
):
|
|
if show_progress:
|
|
tqdm.write("Stage 0/4: base outputs")
|
|
source_path = Path(source_path) if source_path else None
|
|
normalized_path = None
|
|
if source_path is None:
|
|
normalized_path = write_normalized(records, output_dir, output_format=normalized_format)
|
|
source_path = normalized_path
|
|
features = compute_features(records)
|
|
features_path = write_features(features, output_dir)
|
|
summary_path, report_path = write_report(records, features, output_dir)
|
|
|
|
from .resume_advanced import run_advanced_from_existing
|
|
|
|
release_path = None
|
|
if source_path is not None and source_path.name.endswith("-raw.jsonl"):
|
|
candidate = source_path.with_name(source_path.name[:-len("-raw.jsonl")] + ".jsonl")
|
|
if path_looks_like_release_trace(candidate):
|
|
release_path = candidate
|
|
|
|
if release_path is not None:
|
|
# Release the large in-memory trace objects before advanced analysis.
|
|
del records
|
|
del features
|
|
gc.collect()
|
|
advanced_paths = run_advanced_from_existing(
|
|
source_path,
|
|
release_path,
|
|
features_path,
|
|
output_dir,
|
|
input_length_bucket_thresholds=input_length_bucket_thresholds,
|
|
show_progress=show_progress,
|
|
)
|
|
else:
|
|
advanced_paths, advanced_summary, tools_summary, theoretical = write_study_outputs(
|
|
records,
|
|
features,
|
|
output_dir,
|
|
source_path=source_path,
|
|
block_size=block_size,
|
|
segment_mode=segment_mode,
|
|
tokenizer_path=tokenizer_path,
|
|
input_length_bucket_thresholds=input_length_bucket_thresholds,
|
|
show_progress=show_progress,
|
|
)
|
|
report_path.write_text(
|
|
build_study_markdown(
|
|
report_path.read_text(encoding="utf-8"),
|
|
output_dir,
|
|
advanced_summary,
|
|
tools_summary,
|
|
theoretical,
|
|
input_length_comparison_summary=advanced_summary.get("input_length_comparison_summary"),
|
|
input_length_bucket_cache_reuse_summary=advanced_summary.get("input_length_bucket_cache_reuse_summary"),
|
|
agentic_summary=advanced_summary.get("agentic_patterns_summary"),
|
|
transition_summary=advanced_summary.get("transition_patterns_summary"),
|
|
retokenized_transition_summary=advanced_summary.get("retokenized_transition_summary"),
|
|
context_change_summary=advanced_summary.get("context_change_summary"),
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
paths = {
|
|
"features": features_path,
|
|
"summary": summary_path,
|
|
"report": report_path,
|
|
**advanced_paths,
|
|
}
|
|
if normalized_path is not None:
|
|
paths["normalized"] = normalized_path
|
|
return paths
|