Initial commit

This commit is contained in:
2026-04-21 15:44:47 +00:00
commit bce3fe1395
40 changed files with 1758724 additions and 0 deletions

View File

@@ -0,0 +1,10 @@
"""Trace analysis toolkit for coding-agent request logs."""
def main(argv=None):
from .cli import main as cli_main
return cli_main(argv)
__all__ = ["main"]

View File

@@ -0,0 +1,5 @@
from .cli import main
if __name__ == "__main__":
raise SystemExit(main())

477
trace_analyzer/cli.py Normal file
View File

@@ -0,0 +1,477 @@
import argparse
import json
from pathlib import Path
from tqdm.auto import tqdm
from .figures import render_figures
from .features import compute_features
from .layout import details_outputs_exist
from .parser import default_output_dir, infer_analysis_dataset_name, load_records, path_looks_like_release_trace
from .preparation import stream_prepare
from .report import write_features, write_normalized, write_report
from .reporting import write_reports
from .resume_advanced import collect_existing_detail_paths, run_advanced_from_existing
from .study import parse_input_length_bucket_thresholds, run_study
def build_parser():
parser = argparse.ArgumentParser(description="Analyze coding-agent trace patterns.")
subparsers = parser.add_subparsers(dest="command", required=True)
analyze_parser = subparsers.add_parser(
"analyze",
help="Run the full analysis workflow from one formatter-generated *-raw.jsonl trace.",
)
analyze_parser.add_argument("input", help="Path to the formatter-generated *-raw.jsonl trace.")
analyze_parser.add_argument(
"--release-input",
default=None,
help="Path to the formatter-generated release .jsonl with hash_ids. Defaults to the sibling file without the `-raw` suffix.",
)
analyze_parser.add_argument(
"--dataset-name",
default=None,
help="Dataset name used for output paths and figure titles. Defaults to the formatted trace stem.",
)
analyze_parser.add_argument(
"--output-dir",
default=None,
help="Explicit analysis output directory. Defaults to outputs/analysis/<dataset>/",
)
analyze_parser.add_argument("--output-root", default="outputs/analysis")
analyze_parser.add_argument(
"--figure-dir",
default=None,
help="Explicit figure directory. Defaults to <output-dir>/figures/.",
)
analyze_parser.add_argument(
"--block-size",
type=int,
default=256,
help="Block size for theoretical cache analysis.",
)
analyze_parser.add_argument(
"--segment-mode",
default="tokenizer",
choices=["bytes", "tokenizer"],
help="How to segment prompts for theoretical cache analysis.",
)
analyze_parser.add_argument(
"--tokenizer-path",
default=None,
help="Local path or model id for tokenizer mode. Defaults to the local resolved tokenizer path.",
)
analyze_parser.add_argument(
"--tokenizer-batch-size",
type=int,
default=64,
help="Batch size used by tokenizer-based theoretical cache analysis.",
)
analyze_parser.add_argument(
"--model-family",
default="auto",
help="Model family for tokenizer/chat-template metadata. Defaults to auto-detect.",
)
analyze_parser.add_argument(
"--model-meta-dir",
default=None,
help="Override the base directory that contains model_meta/<provider>/<model>/.",
)
analyze_parser.add_argument(
"--input-length-buckets",
default=None,
help="Semicolon-separated input-length bucket thresholds in tokens, such as `32768;87040;131072` or `32Ki;85Ki;128Ki`.",
)
parse_parser = subparsers.add_parser("parse", help="Normalize a formatter-generated *-raw.jsonl trace.")
_add_common_args(parse_parser)
parse_parser.add_argument(
"--format",
default="jsonl",
choices=["jsonl", "csv", "parquet"],
help="Normalized output format.",
)
features_parser = subparsers.add_parser("features", help="Extract request-level features.")
_add_common_args(features_parser)
report_parser = subparsers.add_parser("report", help="Generate markdown and json summary reports.")
_add_common_args(report_parser)
report_parser.add_argument(
"--normalized-format",
default="jsonl",
choices=["jsonl", "csv", "parquet"],
help="Also emit normalized records in this format.",
)
study_parser = subparsers.add_parser(
"study",
help="Generate data tables and CDF plots for lengths, cache reuse, and tool timing.",
)
_add_common_args(study_parser)
study_parser.add_argument(
"--normalized-format",
default="jsonl",
choices=["jsonl", "csv", "parquet"],
help="Normalized output format.",
)
study_parser.add_argument(
"--block-size",
type=int,
default=256,
help="Block size for theoretical cache analysis.",
)
study_parser.add_argument(
"--segment-mode",
default="tokenizer",
choices=["bytes", "tokenizer"],
help="How to segment prompts for theoretical cache analysis.",
)
study_parser.add_argument(
"--tokenizer-path",
default=None,
help="Local path or model id for tokenizer mode. Defaults to the local resolved tokenizer path.",
)
study_parser.add_argument(
"--tokenizer-batch-size",
type=int,
default=64,
help="Batch size used by tokenizer-based theoretical cache analysis.",
)
study_parser.add_argument(
"--model-family",
default="auto",
help="Model family for tokenizer/chat-template metadata. Defaults to auto-detect.",
)
study_parser.add_argument(
"--model-meta-dir",
default=None,
help="Override the base directory that contains model_meta/<provider>/<model>/.",
)
study_parser.add_argument(
"--input-length-buckets",
default=None,
help="Semicolon-separated input-length bucket thresholds in tokens, such as `32768;87040;131072` or `32Ki;85Ki;128Ki`.",
)
resume_parser = subparsers.add_parser(
"resume-details",
aliases=["resume-advanced"],
help="Reuse existing source trace (*-raw.jsonl or legacy normalized.jsonl) + features.csv and compute only detailed analysis outputs.",
)
resume_parser.add_argument("input", help="Path to formatter-generated *-raw.jsonl")
resume_parser.add_argument("features", help="Path to existing features.csv")
resume_parser.add_argument(
"--release-input",
default=None,
help="Path to the formatter-generated release .jsonl with hash_ids. Defaults to the sibling file without the `-raw` suffix.",
)
resume_parser.add_argument(
"--output-dir",
required=True,
help="Existing output directory to receive detailed analysis outputs.",
)
resume_parser.add_argument(
"--block-size",
type=int,
default=256,
help="Block size for theoretical cache analysis.",
)
resume_parser.add_argument(
"--segment-mode",
default="tokenizer",
choices=["bytes", "tokenizer"],
help="How to segment prompts for theoretical cache analysis.",
)
resume_parser.add_argument(
"--tokenizer-path",
default=None,
help="Local path or model id for tokenizer mode. Defaults to the local resolved tokenizer path.",
)
resume_parser.add_argument(
"--tokenizer-batch-size",
type=int,
default=64,
help="Batch size used by tokenizer-based theoretical cache analysis.",
)
resume_parser.add_argument(
"--model-family",
default="auto",
help="Model family for tokenizer/chat-template metadata. Defaults to auto-detect.",
)
resume_parser.add_argument(
"--model-meta-dir",
default=None,
help="Override the base directory that contains model_meta/<provider>/<model>/.",
)
resume_parser.add_argument(
"--limit",
type=int,
default=None,
help="Only process the first N source/features rows. Useful for throughput benchmarking.",
)
resume_parser.add_argument(
"--input-length-buckets",
default=None,
help="Semicolon-separated input-length bucket thresholds in tokens, such as `32768;87040;131072` or `32Ki;85Ki;128Ki`.",
)
return parser
def _add_common_args(parser):
parser.add_argument("input", help="Path to the formatter-generated *-raw.jsonl trace.")
parser.add_argument("--limit", type=int, default=None, help="Limit number of input lines.")
parser.add_argument(
"--output-dir",
default=None,
help="Output directory. Defaults to outputs/analysis/<input_stem>/",
)
def resolve_output_dir(input_path, output_dir):
return Path(output_dir) if output_dir else default_output_dir(input_path)
def _normalize_dataset_name(name: str) -> str:
text = str(name)
return text[:-4] if text.endswith("-raw") else text
def _resolve_analysis_output_dir(args):
dataset_name = args.dataset_name or _normalize_dataset_name(infer_analysis_dataset_name(args.input))
output_dir = Path(args.output_dir) if args.output_dir else Path(args.output_root) / dataset_name
figure_dir = Path(args.figure_dir) if args.figure_dir else output_dir / "figures"
return dataset_name, output_dir, figure_dir
def _resolve_release_input_path(raw_input: str, release_input: str | None) -> Path:
if release_input:
return Path(release_input)
raw_path = Path(raw_input)
name = raw_path.name
if name.endswith("-raw.jsonl"):
candidate = raw_path.with_name(name[:-len("-raw.jsonl")] + ".jsonl")
else:
raise ValueError("Expected a formatter-generated *-raw.jsonl input, or pass --release-input explicitly.")
return candidate
def _resolve_existing_release_input_path(raw_input: str, release_input: str | None) -> Path | None:
candidate = _resolve_release_input_path(raw_input, release_input)
if path_looks_like_release_trace(candidate):
return candidate
return None
def _existing_base_outputs(output_dir):
features = output_dir / "features.csv"
report = output_dir / "report.md"
if features.exists():
return {
"features": features,
"report": report if report.exists() else None,
}
return None
def _existing_detail_outputs(output_dir):
if not details_outputs_exist(output_dir):
return None
return collect_existing_detail_paths(output_dir)
def _stage_message(progress, step: int, total_steps: int, message: str) -> None:
tqdm.write(f"Stage {step}/{total_steps}: {message}")
progress.update(1)
progress.set_postfix(current=message)
def main(argv=None):
parser = build_parser()
args = parser.parse_args(argv)
if args.command == "analyze":
dataset_name, output_dir, figure_dir = _resolve_analysis_output_dir(args)
input_length_bucket_thresholds = parse_input_length_bucket_thresholds(args.input_length_buckets)
release_input_path = _resolve_existing_release_input_path(args.input, args.release_input)
if release_input_path is None:
raise FileNotFoundError(
f"Release trace not found for raw trace {args.input}. "
"Run `python -m trace_formatter build-release <raw-trace>` first, or pass --release-input."
)
total_steps = 4
progress = tqdm(
total=total_steps,
desc="Analyze trace",
unit="stage",
dynamic_ncols=True,
)
try:
prepare_result = None
reusable_base = _existing_base_outputs(output_dir)
if reusable_base:
_stage_message(progress, 1, total_steps, "reuse existing features.csv")
prepare_result = {
"features_path": str(reusable_base["features"]),
"reused": True,
}
else:
_stage_message(progress, 1, total_steps, "prepare features.csv")
prepare_result = stream_prepare(args.input, output_dir, show_progress=True)
reusable_details = _existing_detail_outputs(output_dir)
if reusable_details:
_stage_message(progress, 2, total_steps, "reuse existing details/")
advanced_paths = reusable_details
else:
_stage_message(
progress,
2,
total_steps,
"detailed analysis: request metrics, tool/session stats, kvcache stats",
)
advanced_paths = run_advanced_from_existing(
args.input,
release_input_path,
prepare_result["features_path"],
output_dir,
input_length_bucket_thresholds=input_length_bucket_thresholds,
show_progress=True,
)
_stage_message(progress, 3, total_steps, "reporting: summary.json, report.md, analysis_snapshot.json")
report_result = write_reports(
features_path=prepare_result["features_path"],
output_dir=output_dir,
pipeline_summary={
"dataset_name": dataset_name,
"formatted_path": str(Path(args.input)),
"release_path": str(release_input_path),
**{key: str(value) for key, value in advanced_paths.items()},
},
)
_stage_message(
progress,
4,
total_steps,
"figures: 13 approved request/session/tool/kvcache plots",
)
figure_result = render_figures(
analysis_dir=output_dir,
fig_dir=figure_dir,
dataset_title=dataset_name,
show_progress=True,
)
finally:
progress.close()
print(
json.dumps(
{
"dataset_name": dataset_name,
"formatted_path": str(Path(args.input)),
"output_dir": str(output_dir),
"prepare": prepare_result,
"details": {key: str(value) for key, value in advanced_paths.items()},
"report": report_result,
"figures": figure_result,
"release_path": str(release_input_path),
},
ensure_ascii=False,
indent=2,
)
)
return 0
if args.command in {"resume-details", "resume-advanced"}:
input_length_bucket_thresholds = parse_input_length_bucket_thresholds(args.input_length_buckets)
release_input_path = _resolve_existing_release_input_path(args.input, args.release_input)
if release_input_path is None:
raise FileNotFoundError(
f"Release trace not found for raw trace {args.input}. "
"Run `python -m trace_formatter build-release <raw-trace>` first, or pass --release-input."
)
paths = run_advanced_from_existing(
args.input,
release_input_path,
args.features,
args.output_dir,
input_length_bucket_thresholds=input_length_bucket_thresholds,
show_progress=True,
limit=args.limit,
)
for path in paths.values():
print(path)
return 0
output_dir = resolve_output_dir(args.input, args.output_dir)
if args.command == "study" and args.limit is None:
input_length_bucket_thresholds = parse_input_length_bucket_thresholds(args.input_length_buckets)
reusable = _existing_base_outputs(output_dir)
if reusable:
release_input_path = _resolve_existing_release_input_path(args.input, None)
if release_input_path is not None:
paths = _existing_detail_outputs(output_dir)
if paths is None:
paths = run_advanced_from_existing(
args.input,
release_input_path,
reusable["features"],
output_dir,
input_length_bucket_thresholds=input_length_bucket_thresholds,
show_progress=True,
)
for path in paths.values():
print(path)
return 0
show_progress = args.command == "study"
records = load_records(
args.input,
limit=args.limit,
show_progress=show_progress,
progress_desc="Load trace",
)
if args.command == "parse":
path = write_normalized(records, output_dir, output_format=args.format)
print(path)
return 0
features = compute_features(records)
if args.command == "features":
path = write_features(features, output_dir)
print(path)
return 0
if args.command == "study":
input_length_bucket_thresholds = parse_input_length_bucket_thresholds(args.input_length_buckets)
paths = run_study(
records,
output_dir,
normalized_format=args.normalized_format,
source_path=args.input,
block_size=args.block_size,
segment_mode=args.segment_mode,
tokenizer_path=args.tokenizer_path,
model_family=args.model_family,
model_meta_dir=args.model_meta_dir,
input_length_bucket_thresholds=input_length_bucket_thresholds,
show_progress=show_progress,
tokenizer_batch_size=args.tokenizer_batch_size,
)
for path in paths.values():
print(path)
return 0
normalized_path = write_normalized(records, output_dir, output_format=args.normalized_format)
features_path = write_features(features, output_dir)
summary_path, report_path = write_report(records, features, output_dir)
print(normalized_path)
print(features_path)
print(summary_path)
print(report_path)
return 0
if __name__ == "__main__":
raise SystemExit(main())

117
trace_analyzer/features.py Normal file
View File

@@ -0,0 +1,117 @@
from collections import Counter
from dataclasses import asdict
from .helpers import percentile, safe_div
from .models import TraceFeatures
LONG_CONTEXT_THRESHOLD = 32000
HIGH_CACHE_THRESHOLD = 0.8
TOOL_BURST_THRESHOLD = 4
TOOL_LOOP_THRESHOLD = 3
def _transition_count(roles, left, right):
return sum(1 for current, nxt in zip(roles, roles[1:]) if current == left and nxt == right)
def _tool_bursts(roles):
bursts = []
current = 0
for role in roles:
if role == "tool":
current += 1
elif current:
bursts.append(current)
current = 0
if current:
bursts.append(current)
return bursts
def compute_features(records):
features = []
for record in records:
role_counts = Counter(record.role_sequence)
bursts = _tool_bursts(record.role_sequence)
input_tokens = record.usage.input_tokens
output_tokens = record.usage.output_tokens
cached_tokens = record.usage.cached_tokens
latency_ms = record.meta.total_cost_time_ms
cache_hit_ratio = safe_div(cached_tokens, input_tokens)
tool_to_tool_count = _transition_count(record.role_sequence, "tool", "tool")
feature = TraceFeatures(
request_id=record.meta.request_id,
session_id=record.meta.session_id,
model=record.meta.request_model,
status_code=record.meta.status_code,
time=record.meta.time,
message_count=len(record.messages),
conversation_depth=len(record.messages),
declared_tool_count=len(record.declared_tools),
assistant_msg_count=role_counts.get("assistant", 0),
tool_msg_count=role_counts.get("tool", 0),
user_msg_count=role_counts.get("user", 0),
system_msg_count=role_counts.get("system", 0),
assistant_to_tool_count=_transition_count(record.role_sequence, "assistant", "tool"),
tool_to_assistant_count=_transition_count(record.role_sequence, "tool", "assistant"),
tool_to_tool_count=tool_to_tool_count,
assistant_to_user_count=_transition_count(record.role_sequence, "assistant", "user"),
user_to_assistant_count=_transition_count(record.role_sequence, "user", "assistant"),
max_consecutive_tool_msgs=max(bursts) if bursts else 0,
avg_tool_burst_len=safe_div(sum(bursts), len(bursts)) if bursts else 0.0,
has_tool_loop=1 if tool_to_tool_count > 0 else 0,
input_tokens=input_tokens,
output_tokens=output_tokens,
total_tokens=record.usage.total_tokens,
reasoning_tokens=record.usage.reasoning_tokens,
cached_tokens=cached_tokens,
cache_hit_ratio=cache_hit_ratio,
uncached_prompt_tokens=max(input_tokens - cached_tokens, 0),
output_input_ratio=safe_div(output_tokens, input_tokens),
latency_ms=latency_ms,
ms_per_input_token=safe_div(latency_ms, input_tokens),
ms_per_output_token=safe_div(latency_ms, output_tokens),
long_context=1 if input_tokens >= LONG_CONTEXT_THRESHOLD else 0,
high_cache=1 if cache_hit_ratio >= HIGH_CACHE_THRESHOLD else 0,
tool_burst_alert=1 if (max(bursts) if bursts else 0) >= TOOL_BURST_THRESHOLD else 0,
tool_loop_alert=1 if tool_to_tool_count >= TOOL_LOOP_THRESHOLD else 0,
)
feature.pattern_labels = base_pattern_labels(feature)
features.append(feature)
apply_batch_thresholds(features)
return features
def base_pattern_labels(feature):
labels = []
if feature.tool_msg_count == 0 and feature.declared_tool_count == 0:
labels.append("single-shot")
if feature.tool_msg_count > 0 and feature.tool_msg_count >= feature.assistant_msg_count:
labels.append("tool-heavy")
if feature.max_consecutive_tool_msgs >= TOOL_BURST_THRESHOLD:
labels.append("tool-burst")
if feature.cache_hit_ratio >= HIGH_CACHE_THRESHOLD:
labels.append("cache-efficient")
if feature.cache_hit_ratio <= 0.1:
labels.append("cache-cold")
return labels
def apply_batch_thresholds(features):
if not features:
return
latency_p90 = percentile([feature.latency_ms for feature in features], 0.9)
for feature in features:
feature.slow_request = 1 if feature.latency_ms >= latency_p90 else 0
if feature.slow_request and feature.high_cache:
feature.pattern_labels.append("slow-despite-cache")
if feature.input_tokens >= LONG_CONTEXT_THRESHOLD and feature.cache_hit_ratio <= 0.1:
feature.pattern_labels.append("long-context-no-cache")
feature.pattern_labels = sorted(set(feature.pattern_labels))
def feature_to_row(feature):
row = asdict(feature)
row["pattern_labels"] = ";".join(feature.pattern_labels)
return row

809
trace_analyzer/figures.py Normal file
View File

@@ -0,0 +1,809 @@
from __future__ import annotations
import csv
import json
from collections import Counter, defaultdict
from pathlib import Path
import matplotlib
import numpy as np
from tqdm.auto import tqdm
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator, MultipleLocator
from trace_analyzer.helpers import percentile, safe_float, safe_int
from trace_analyzer.layout import resolve_details_dir
PALETTE = {
"blue": "#2B6CB0",
"orange": "#DD6B20",
"green": "#2F855A",
"red": "#C53030",
"purple": "#6B46C1",
"gray": "#4A5568",
"teal": "#0F766E",
"gold": "#B7791F",
"pink": "#D53F8C",
"grid": "#CBD5E0",
}
FIGURE_STEMS = [
"01_input_output_length_cdf",
"02_session_turns_cdf",
"03_request_length_by_turn",
"04_request_trigger_role_pie",
"05_tool_call_output_length_cdf",
"06_tool_call_latency_cdf",
"07_consecutive_tool_call_count_cdf",
"08_tool_call_added_context_cdf",
"09_kvcache_block_reuse_time_cdf",
"10_kvcache_block_lifecycle_cdf",
"11_alive_kvcache_blocks_timeline",
"12_bucket_kvcache_reuse_ratio",
"13_session_cross_bucket_kvcache_miss",
]
def _ensure_dir(path: Path) -> None:
path.mkdir(parents=True, exist_ok=True)
def _clear_dir_files(path: Path) -> None:
path.mkdir(parents=True, exist_ok=True)
for child in path.iterdir():
if child.is_file():
child.unlink()
def _apply_style() -> None:
plt.rcParams.update(
{
"figure.figsize": (8.0, 4.8),
"figure.dpi": 600,
"savefig.dpi": 600,
"font.family": "DejaVu Serif",
"font.size": 11,
"axes.titlesize": 13,
"axes.labelsize": 12,
"axes.linewidth": 0.9,
"xtick.labelsize": 10,
"ytick.labelsize": 10,
"legend.fontsize": 10,
"legend.frameon": False,
}
)
def _finalize_axes(ax: plt.Axes, *, grid_axis: str = "y") -> None:
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.grid(axis=grid_axis, color=PALETTE["grid"], alpha=0.5, linewidth=0.8)
ax.tick_params(axis="both", which="major", length=4, width=0.8)
def _save(fig: plt.Figure, fig_dir: Path, stem: str) -> None:
fig.savefig(fig_dir / f"{stem}.png", bbox_inches="tight")
plt.close(fig)
def _read_json(path: Path) -> dict:
return json.loads(path.read_text(encoding="utf-8"))
def _read_csv_rows(path: Path) -> list[dict]:
with path.open("r", encoding="utf-8") as handle:
return list(csv.DictReader(handle))
def _load_request_metrics(path: Path) -> list[dict]:
rows = []
with path.open("r", encoding="utf-8") as handle:
for row in csv.DictReader(handle):
rows.append(
{
"request_id": row.get("request_id", ""),
"session_id": row.get("session_id", ""),
"turn": safe_int(row.get("turn")),
"trigger_group": row.get("trigger_group", "") or "unknown",
"input_tokens": safe_int(row.get("input_tokens")),
"output_tokens": safe_int(row.get("output_tokens")),
"request_ready_time_ms": safe_int(row.get("request_ready_time_ms")),
"request_end_time_ms": safe_int(row.get("request_end_time_ms")),
"input_length_bucket": row.get("input_length_bucket", ""),
"theoretical_prompt_unit_length": safe_int(row.get("theoretical_prompt_unit_length")),
"theoretical_prefix_hit_blocks": safe_int(row.get("theoretical_prefix_hit_blocks")),
"bucketed_theoretical_prefix_hit_blocks": safe_int(
row.get("bucketed_theoretical_prefix_hit_blocks")
),
}
)
return rows
def _sort_request_rows(rows: list[dict]) -> list[dict]:
return sorted(
rows,
key=lambda row: (
row["request_ready_time_ms"],
row["turn"],
row["request_id"],
),
)
def _build_session_sequences(request_rows: list[dict]) -> dict[str, list[dict]]:
sessions = defaultdict(list)
for row in request_rows:
sessions[row["session_id"]].append(row)
for session_rows in sessions.values():
session_rows.sort(
key=lambda row: (
row["request_ready_time_ms"],
row["turn"],
row["request_id"],
)
)
return sessions
def _build_tool_round_edges(session_rows_by_id: dict[str, list[dict]]) -> list[dict]:
edges = []
for session_id, session_rows in session_rows_by_id.items():
for previous, current in zip(session_rows, session_rows[1:]):
if current["trigger_group"] != "tool":
continue
edges.append(
{
"session_id": session_id,
"prev_request_id": previous["request_id"],
"next_request_id": current["request_id"],
"tool_call_output_tokens": previous["output_tokens"],
"tool_call_latency_ms": max(
current["request_ready_time_ms"] - previous["request_end_time_ms"],
0,
),
"added_context_tokens": max(
current["input_tokens"] - previous["output_tokens"],
0,
),
}
)
return edges
def _ecdf(values: list[float]) -> tuple[np.ndarray, np.ndarray]:
arr = np.asarray([value for value in values if value is not None], dtype=float)
arr = np.sort(arr)
if arr.size == 0:
return arr, arr
xs, counts = np.unique(arr, return_counts=True)
ys = np.cumsum(counts, dtype=float) / arr.size
return xs, ys
def _ecdf_from_weighted_rows(rows: list[dict], *, value_key: str, count_key: str) -> tuple[np.ndarray, np.ndarray]:
weighted = sorted(
(
safe_float(row[value_key]),
safe_int(row[count_key]),
)
for row in rows
if safe_int(row.get(count_key)) > 0
)
total = sum(count for _, count in weighted)
if total <= 0:
return np.asarray([]), np.asarray([])
xs = np.asarray([value for value, _ in weighted], dtype=float)
ys = np.asarray(np.cumsum([count for _, count in weighted], dtype=float) / total, dtype=float)
return xs, ys
def _stats(values: list[float], labels: tuple[str, ...]) -> dict[str, float]:
cleaned = [value for value in values if value is not None]
if not cleaned:
return {label: 0.0 for label in labels}
mapping = {"mean": float(np.mean(cleaned))}
for label in labels:
if label == "mean":
continue
mapping[label] = percentile(cleaned, int(label[1:]) / 100)
return mapping
def _weighted_stats(rows: list[dict], *, value_key: str, count_key: str, labels: tuple[str, ...]) -> dict[str, float]:
weighted = sorted(
(
safe_float(row[value_key]),
safe_int(row[count_key]),
)
for row in rows
if safe_int(row.get(count_key)) > 0
)
total = sum(count for _, count in weighted)
if total <= 0:
return {label: 0.0 for label in labels}
result = {}
weighted_sum = sum(value * count for value, count in weighted)
result["mean"] = weighted_sum / total
for label in labels:
if label == "mean":
continue
target = int(label[1:]) / 100 * total
seen = 0
value_at_target = weighted[-1][0]
for value, count in weighted:
seen += count
if seen >= target:
value_at_target = value
break
result[label] = value_at_target
return result
def _format_stat_text(title: str, stats: dict[str, float], labels: tuple[str, ...]) -> str:
parts = [title]
for label in labels:
value = stats.get(label, 0.0)
if abs(value - round(value)) < 1e-6:
parts.append(f"{label}={int(round(value))}")
else:
parts.append(f"{label}={value:.2f}")
return " ".join(parts)
def _add_footer(fig: plt.Figure, lines: list[str]) -> None:
fig.subplots_adjust(bottom=0.24)
y = 0.06
for line in lines:
fig.text(0.5, y, line, ha="center", va="bottom", fontsize=9.5)
y -= 0.035
def _plot_two_series_cdf_with_zoom(
fig_dir: Path,
*,
stem: str,
title: str,
xlabel: str,
first_label: str,
first_values: list[float],
first_color: str,
second_label: str,
second_values: list[float],
second_color: str,
zoom_quantile: float,
stats_labels: tuple[str, ...],
) -> None:
first_xs, first_ys = _ecdf(first_values)
second_xs, second_ys = _ecdf(second_values)
zoom_max = max(
percentile(first_values, zoom_quantile) if first_values else 0.0,
percentile(second_values, zoom_quantile) if second_values else 0.0,
)
fig, axes = plt.subplots(1, 2, figsize=(12.4, 4.8))
for ax, subtitle in zip(axes, ["Full Range", f"Zoom: <= p{int(zoom_quantile * 100)}"]):
ax.step(first_xs, first_ys, where="post", linewidth=2.2, color=first_color, label=first_label)
ax.step(second_xs, second_ys, where="post", linewidth=2.2, color=second_color, label=second_label)
ax.set_title(subtitle)
ax.set_xlabel(xlabel)
ax.set_ylabel("CDF")
_finalize_axes(ax)
axes[1].set_xlim(0, zoom_max if zoom_max > 0 else 1)
axes[0].legend(loc="lower right")
fig.suptitle(title, y=0.98)
_add_footer(
fig,
[
_format_stat_text(first_label, _stats(first_values, stats_labels), stats_labels),
_format_stat_text(second_label, _stats(second_values, stats_labels), stats_labels),
],
)
_save(fig, fig_dir, stem)
def _plot_single_cdf(
fig_dir: Path,
*,
stem: str,
title: str,
xlabel: str,
label: str,
values: list[float] | None = None,
weighted_rows: list[dict] | None = None,
weighted_value_key: str | None = None,
weighted_count_key: str | None = None,
color: str = PALETTE["blue"],
zoom_quantile: float | None = None,
stats_labels: tuple[str, ...] = ("mean", "p50", "p90", "p95", "p99"),
) -> None:
values = values or []
weighted_rows = weighted_rows or []
if weighted_rows:
xs, ys = _ecdf_from_weighted_rows(
weighted_rows,
value_key=weighted_value_key,
count_key=weighted_count_key,
)
stats = _weighted_stats(
weighted_rows,
value_key=weighted_value_key,
count_key=weighted_count_key,
labels=stats_labels,
)
zoom_max = stats.get(f"p{int(zoom_quantile * 100)}", 0.0) if zoom_quantile is not None else 0.0
else:
xs, ys = _ecdf(values)
stats = _stats(values, stats_labels)
zoom_max = percentile(values, zoom_quantile) if zoom_quantile is not None and values else 0.0
panel_count = 2 if zoom_quantile is not None else 1
fig, axes = plt.subplots(1, panel_count, figsize=(12.4, 4.8) if panel_count == 2 else (8.2, 4.8))
if panel_count == 1:
axes = [axes]
axes[0].step(xs, ys, where="post", linewidth=2.2, color=color)
axes[0].set_title("Full Range")
axes[0].set_xlabel(xlabel)
axes[0].set_ylabel("CDF")
_finalize_axes(axes[0])
if panel_count == 2:
axes[1].step(xs, ys, where="post", linewidth=2.2, color=color)
axes[1].set_title(f"Zoom: <= p{int(zoom_quantile * 100)}")
axes[1].set_xlabel(xlabel)
axes[1].set_ylabel("CDF")
axes[1].set_xlim(0, zoom_max if zoom_max > 0 else 1)
_finalize_axes(axes[1])
fig.suptitle(title, y=0.98)
_add_footer(fig, [_format_stat_text(label, stats, stats_labels)])
_save(fig, fig_dir, stem)
def _plot_session_turns_cdf(fig_dir: Path, request_rows: list[dict]) -> None:
session_sizes = Counter(row["session_id"] for row in request_rows)
values = list(session_sizes.values())
xs, ys = _ecdf(values)
max_turn = max(values) if values else 1
zoom_max = max(int(np.ceil(max_turn * 0.10)), 1)
fig, axes = plt.subplots(1, 2, figsize=(12.4, 4.8))
for ax, subtitle in zip(axes, ["Full Range", f"Zoom: <= {zoom_max} turns (first 10% of max turn)"]):
ax.step(xs, ys, where="post", linewidth=2.2, color=PALETTE["green"])
ax.set_title(subtitle)
ax.set_xlabel("Turns per session")
ax.set_ylabel("CDF")
_finalize_axes(ax)
axes[1].set_xlim(0.5, zoom_max + 0.5)
fig.suptitle("Session Turns CDF", y=0.98)
_add_footer(
fig,
[
_format_stat_text(
"Session turns",
_stats(values, ("mean", "p50", "p90", "p95", "p99")),
("mean", "p50", "p90", "p95", "p99"),
)
],
)
_save(fig, fig_dir, "02_session_turns_cdf")
def _plot_request_length_by_turn(fig_dir: Path, request_rows: list[dict]) -> None:
values_by_turn = defaultdict(list)
for row in request_rows:
if row["turn"] > 0:
values_by_turn[row["turn"]].append(row["input_tokens"])
turns = sorted(values_by_turn)
mean_values = [float(np.mean(values_by_turn[turn])) for turn in turns]
p50_values = [percentile(values_by_turn[turn], 0.50) for turn in turns]
p99_values = [percentile(values_by_turn[turn], 0.99) for turn in turns]
fig, ax = plt.subplots(figsize=(8.6, 4.8))
ax.plot(turns, mean_values, color=PALETTE["blue"], linewidth=2.0, label="mean")
ax.plot(turns, p50_values, color=PALETTE["orange"], linewidth=2.0, label="p50")
ax.plot(turns, p99_values, color=PALETTE["red"], linewidth=2.0, label="p99")
ax.set_title("Request Input Length by Turn")
ax.set_xlabel("Turn")
ax.set_ylabel("Input tokens")
ax.legend(loc="upper left")
ax.xaxis.set_major_locator(MaxNLocator(nbins=12, integer=True))
plt.setp(ax.get_xticklabels(), rotation=20, ha="right")
_finalize_axes(ax)
fig.tight_layout()
_save(fig, fig_dir, "03_request_length_by_turn")
def _plot_trigger_role_pie(fig_dir: Path, request_rows: list[dict]) -> None:
label_order = ["user", "tool", "assistant"]
color_by_label = {
"user": PALETTE["orange"],
"tool": PALETTE["green"],
"assistant": PALETTE["blue"],
}
counts = Counter(row["trigger_group"] for row in request_rows)
labels = [label for label in label_order if counts[label] > 0]
values = [counts[label] for label in labels]
colors = [color_by_label[label] for label in labels]
def _autopct(pct):
total = sum(values)
count = int(round(pct * total / 100.0))
return f"{pct:.1f}%\n({count})"
fig, ax = plt.subplots(figsize=(9.0, 5.8))
wedges, _texts, autotexts = ax.pie(
values,
autopct=_autopct,
startangle=90,
colors=colors,
wedgeprops={"linewidth": 0.8, "edgecolor": "white"},
textprops={"fontsize": 9},
)
for autotext in autotexts:
autotext.set_fontsize(8.5)
ax.legend(
wedges,
[f"{label} ({counts[label]:,})" for label in labels],
title="Trigger source",
loc="center left",
bbox_to_anchor=(1.02, 0.5),
)
ax.set_title("Request Trigger Role Proportion")
fig.tight_layout()
_save(fig, fig_dir, "04_request_trigger_role_pie")
def _plot_session_gap_cdf(fig_dir: Path, session_rows_by_id: dict[str, list[dict]]) -> None:
ready_gaps = []
end_ready_gaps = []
for session_rows in session_rows_by_id.values():
for previous, current in zip(session_rows, session_rows[1:]):
ready_gaps.append(max(current["request_ready_time_ms"] - previous["request_ready_time_ms"], 0))
end_ready_gaps.append(max(current["request_ready_time_ms"] - previous["request_end_time_ms"], 0))
_plot_two_series_cdf_with_zoom(
fig_dir,
stem="session_inter_request_gap_cdf",
title="Session Inter-Request Gap CDF",
xlabel="Milliseconds",
first_label="ready->ready",
first_values=ready_gaps,
first_color=PALETTE["purple"],
second_label="end->ready",
second_values=end_ready_gaps,
second_color=PALETTE["gray"],
zoom_quantile=0.90,
stats_labels=("mean", "p50", "p90", "p95", "p99"),
)
def _plot_consecutive_tool_calls_cdf(fig_dir: Path, session_rows_by_id: dict[str, list[dict]]) -> None:
values = []
for session_rows in session_rows_by_id.values():
for index, row in enumerate(session_rows):
if row["trigger_group"] != "user":
continue
count = 0
next_index = index + 1
while next_index < len(session_rows) and session_rows[next_index]["trigger_group"] == "tool":
count += 1
next_index += 1
values.append(count)
_plot_single_cdf(
fig_dir,
stem="07_consecutive_tool_call_count_cdf",
title="Consecutive Tool Calls After One User Input",
xlabel="Consecutive tool-triggered rounds",
label="Consecutive tool calls",
values=values,
color=PALETTE["green"],
)
def _plot_alive_kvcache_timeline(fig_dir: Path, timeline_rows: list[dict]) -> None:
fig, ax = plt.subplots(figsize=(10.2, 4.8))
if timeline_rows:
base_ts = safe_int(timeline_rows[0]["timestamp_ms"])
else:
base_ts = 0
xs = [
max(safe_int(row["timestamp_ms"]) - base_ts, 0) / 60000.0
for row in timeline_rows
]
ys = [safe_int(row["alive_block_count"]) for row in timeline_rows]
ax.step(xs, ys, where="post", color=PALETTE["purple"], linewidth=1.8)
ax.set_title("Alive KV-Cache Blocks Over Time")
ax.set_xlabel("Elapsed time (minutes)")
ax.set_ylabel("Alive block count")
ax.xaxis.set_major_locator(MultipleLocator(10))
plt.setp(ax.get_xticklabels(), rotation=20, ha="right")
_finalize_axes(ax)
fig.tight_layout()
_save(fig, fig_dir, "11_alive_kvcache_blocks_timeline")
def _plot_bucket_reuse_ratio(fig_dir: Path, request_rows: list[dict]) -> None:
by_bucket = defaultdict(lambda: {"prompt_blocks": 0, "reused_blocks": 0})
total_prompt_blocks = 0
total_reused_blocks = 0
for row in request_rows:
bucket = row["input_length_bucket"] or "unknown"
prompt_blocks = row["theoretical_prompt_unit_length"]
reused_blocks = row["bucketed_theoretical_prefix_hit_blocks"]
by_bucket[bucket]["prompt_blocks"] += prompt_blocks
by_bucket[bucket]["reused_blocks"] += reused_blocks
total_prompt_blocks += prompt_blocks
total_reused_blocks += row["theoretical_prefix_hit_blocks"]
labels = list(by_bucket)
ratios = [
(by_bucket[label]["reused_blocks"] / by_bucket[label]["prompt_blocks"])
if by_bucket[label]["prompt_blocks"]
else 0.0
for label in labels
]
reused_counts = [by_bucket[label]["reused_blocks"] for label in labels]
labels.append("Overall")
ratios.append((total_reused_blocks / total_prompt_blocks) if total_prompt_blocks else 0.0)
reused_counts.append(total_reused_blocks)
fig, ax = plt.subplots(figsize=(9.2, 4.8))
bars = ax.bar(
labels,
ratios,
color=[PALETTE["blue"], PALETTE["orange"], PALETTE["green"], PALETTE["purple"], PALETTE["teal"]][: len(labels)],
width=0.68,
edgecolor="white",
linewidth=0.8,
)
for bar, ratio, reused_count in zip(bars, ratios, reused_counts):
ax.text(
bar.get_x() + bar.get_width() / 2,
ratio + max(ratios + [0.0]) * 0.03 + 1e-9,
f"{ratio:.2%}\nreused={reused_count:,}",
ha="center",
va="bottom",
fontsize=8.8,
)
ax.set_title("Bucketed KV-Cache Reuse Ratio vs Global Reuse Ratio")
ax.set_xlabel("Input-length bucket")
ax.set_ylabel("Reuse ratio")
ax.set_ylim(0, max(ratios + [0.0]) * 1.25 + 1e-9)
_finalize_axes(ax)
fig.tight_layout()
_save(fig, fig_dir, "12_bucket_kvcache_reuse_ratio")
def _plot_session_cross_bucket_miss(fig_dir: Path, rows: list[dict]) -> None:
labels = [row["bucket"] for row in rows]
miss_ratios = [safe_float(row["cross_bucket_edge_fraction"]) for row in rows]
loss_ratios = [safe_float(row["reduced_reused_blocks_ratio"]) for row in rows]
miss_blocks = [safe_int(row["cross_bucket_shared_prefix_units_sum"]) for row in rows]
x = np.arange(len(labels))
width = 0.36
fig, ax = plt.subplots(figsize=(9.2, 4.8))
left = ax.bar(x - width / 2, miss_ratios, width=width, color=PALETTE["red"], label="cross-bucket miss ratio")
right = ax.bar(
x + width / 2,
loss_ratios,
width=width,
color=PALETTE["gold"],
label="reduced reused blocks / bucket reuse",
)
y_pad = max(miss_ratios + loss_ratios + [0.0]) * 0.03 + 1e-9
for bar, value, count in zip(left, miss_ratios, miss_blocks):
ax.text(
bar.get_x() + bar.get_width() / 2,
value + y_pad,
f"{value:.2%}\nmiss={count:,}",
ha="center",
va="bottom",
fontsize=8.8,
)
for bar, value in zip(right, loss_ratios):
ax.text(
bar.get_x() + bar.get_width() / 2,
value + y_pad,
f"{value:.2%}",
ha="center",
va="bottom",
fontsize=8.8,
)
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.set_title("Session Cross-Bucket KV-Cache Miss and Reuse Loss")
ax.set_xlabel("Child bucket")
ax.set_ylabel("Ratio")
ax.legend(loc="upper left")
ax.set_ylim(0, max(miss_ratios + loss_ratios + [0.0]) * 1.25 + 1e-9)
_finalize_axes(ax)
fig.tight_layout()
_save(fig, fig_dir, "13_session_cross_bucket_kvcache_miss")
def _write_manifest(fig_dir: Path, manifest: dict) -> None:
(fig_dir / "manifest.json").write_text(json.dumps(manifest, ensure_ascii=False, indent=2), encoding="utf-8")
def _write_readme(fig_dir: Path, dataset_title: str) -> None:
lines = [
f"# {dataset_title}",
"",
"This directory contains the PNG figures rendered from `details/` data.",
"",
"Figures:",
]
for stem in FIGURE_STEMS:
lines.append(f"- `{stem}.png`")
lines.append("- `session_inter_request_gap_cdf.png`")
(fig_dir / "README.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
def render_figures(
*,
analysis_dir: str | Path,
fig_dir: str | Path,
dataset_title: str,
show_progress: bool = False,
) -> dict:
analysis_root = Path(analysis_dir)
fig_root = Path(fig_dir)
details_root = resolve_details_dir(analysis_root)
_clear_dir_files(fig_root)
_apply_style()
request_rows = _load_request_metrics(details_root / "request_metrics.csv")
request_rows = _sort_request_rows(request_rows)
session_rows_by_id = _build_session_sequences(request_rows)
tool_round_edges = _build_tool_round_edges(session_rows_by_id)
reuse_gap_rows = _read_csv_rows(details_root / "theoretical_block_reuse_gaps.csv")
block_lifetime_rows = _read_csv_rows(details_root / "theoretical_block_lifetimes.csv")
timeline_rows = _read_csv_rows(details_root / "theoretical_alive_block_timeline.csv")
session_bucket_rows = _read_csv_rows(details_root / "session_bucket_boundary_miss.csv")
details_summary = _read_json(details_root / "details_summary.json")
progress = tqdm(
total=len(FIGURE_STEMS) + 1,
desc="Render figures",
unit="artifact",
dynamic_ncols=True,
disable=not show_progress,
)
if show_progress:
progress.set_postfix(current="01_input_output_length_cdf")
_plot_two_series_cdf_with_zoom(
fig_root,
stem="01_input_output_length_cdf",
title="Input / Output Length CDF",
xlabel="Tokens",
first_label="Input",
first_values=[row["input_tokens"] for row in request_rows],
first_color=PALETTE["blue"],
second_label="Output",
second_values=[row["output_tokens"] for row in request_rows],
second_color=PALETTE["orange"],
zoom_quantile=0.80,
stats_labels=("mean", "p50", "p80", "p90", "p95", "p99"),
)
if show_progress:
progress.update(1)
progress.set_postfix(current="02_session_turns_cdf")
_plot_session_turns_cdf(fig_root, request_rows)
if show_progress:
progress.update(1)
progress.set_postfix(current="03_request_length_by_turn")
_plot_request_length_by_turn(fig_root, request_rows)
if show_progress:
progress.update(1)
progress.set_postfix(current="04_request_trigger_role_pie")
_plot_trigger_role_pie(fig_root, request_rows)
if show_progress:
progress.update(1)
progress.set_postfix(current="05_tool_call_output_length_cdf")
_plot_single_cdf(
fig_root,
stem="05_tool_call_output_length_cdf",
title="Tool Call Output Length CDF",
xlabel="Output tokens",
label="Tool-call output length",
values=[row["tool_call_output_tokens"] for row in tool_round_edges],
color=PALETTE["teal"],
zoom_quantile=0.90,
)
if show_progress:
progress.update(1)
progress.set_postfix(current="06_tool_call_latency_cdf")
_plot_single_cdf(
fig_root,
stem="06_tool_call_latency_cdf",
title="Tool Call Latency CDF",
xlabel="Milliseconds",
label="Tool-call latency",
values=[row["tool_call_latency_ms"] for row in tool_round_edges],
color=PALETTE["red"],
zoom_quantile=0.90,
)
if show_progress:
progress.update(1)
progress.set_postfix(current="07_consecutive_tool_call_count_cdf")
_plot_consecutive_tool_calls_cdf(fig_root, session_rows_by_id)
if show_progress:
progress.update(1)
progress.set_postfix(current="08_tool_call_added_context_cdf")
_plot_single_cdf(
fig_root,
stem="08_tool_call_added_context_cdf",
title="Added Context After Tool Call CDF",
xlabel="Added context tokens",
label="Added context",
values=[row["added_context_tokens"] for row in tool_round_edges],
color=PALETTE["purple"],
)
if show_progress:
progress.update(1)
progress.set_postfix(current="09_kvcache_block_reuse_time_cdf")
_plot_single_cdf(
fig_root,
stem="09_kvcache_block_reuse_time_cdf",
title="KV-Cache Block Reuse Time CDF",
xlabel="Milliseconds",
label="Reuse time",
weighted_rows=reuse_gap_rows,
weighted_value_key="reuse_gap_ms",
weighted_count_key="count",
color=PALETTE["gold"],
zoom_quantile=0.90,
)
if show_progress:
progress.update(1)
progress.set_postfix(current="10_kvcache_block_lifecycle_cdf")
_plot_single_cdf(
fig_root,
stem="10_kvcache_block_lifecycle_cdf",
title="KV-Cache Block Lifecycle CDF",
xlabel="Milliseconds",
label="Block lifecycle",
values=[safe_int(row["lifetime_ms"]) for row in block_lifetime_rows],
color=PALETTE["gray"],
)
if show_progress:
progress.update(1)
progress.set_postfix(current="11_alive_kvcache_blocks_timeline")
_plot_alive_kvcache_timeline(fig_root, timeline_rows)
if show_progress:
progress.update(1)
progress.set_postfix(current="12_bucket_kvcache_reuse_ratio")
_plot_bucket_reuse_ratio(fig_root, request_rows)
if show_progress:
progress.update(1)
progress.set_postfix(current="13_session_cross_bucket_kvcache_miss")
_plot_session_cross_bucket_miss(fig_root, session_bucket_rows)
_plot_session_gap_cdf(fig_root, session_rows_by_id)
if show_progress:
progress.update(1)
progress.set_postfix(current="manifest.json + README.md")
manifest = {
"dataset_title": dataset_title,
"figure_count": len(FIGURE_STEMS),
"analysis_dir": str(analysis_root),
"request_count": details_summary.get("request_count", 0),
"global_reuse_ratio": details_summary.get("global_reuse_ratio", 0.0),
"figures": [f"{stem}.png" for stem in FIGURE_STEMS],
"extra_figures": ["session_inter_request_gap_cdf.png"],
}
_write_manifest(fig_root, manifest)
_write_readme(fig_root, dataset_title)
if show_progress:
progress.update(1)
progress.close()
return {
"fig_dir": str(fig_root),
"manifest_path": str(fig_root / "manifest.json"),
"readme_path": str(fig_root / "README.md"),
}

78
trace_analyzer/helpers.py Normal file
View File

@@ -0,0 +1,78 @@
import json
from statistics import mean, median
def parse_jsonish(value):
"""Parse nested JSON strings until a non-string value is reached."""
current = value
while isinstance(current, str):
text = current.strip()
if not text:
return current
try:
current = json.loads(text)
except json.JSONDecodeError:
return current
return current
def safe_int(value, default=0):
if value is None or value == "":
return default
try:
return int(value)
except (TypeError, ValueError):
return default
def safe_float(value, default=0.0):
if value is None or value == "":
return default
try:
return float(value)
except (TypeError, ValueError):
return default
def percentile(values, pct):
if not values:
return 0.0
ordered = sorted(values)
if len(ordered) == 1:
return float(ordered[0])
rank = pct * (len(ordered) - 1)
low = int(rank)
high = min(low + 1, len(ordered) - 1)
fraction = rank - low
return ordered[low] + (ordered[high] - ordered[low]) * fraction
def series_stats(values):
cleaned = [v for v in values if v is not None]
if not cleaned:
return {
"count": 0,
"min": 0,
"max": 0,
"mean": 0.0,
"median": 0.0,
"p90": 0.0,
}
return {
"count": len(cleaned),
"min": min(cleaned),
"max": max(cleaned),
"mean": mean(cleaned),
"median": median(cleaned),
"p90": percentile(cleaned, 0.9),
}
def safe_div(numerator, denominator):
if not denominator:
return 0.0
return numerator / denominator
def compact_json(data):
return json.dumps(data, ensure_ascii=False, separators=(",", ":"))

76
trace_analyzer/layout.py Normal file
View File

@@ -0,0 +1,76 @@
from __future__ import annotations
from pathlib import Path
import json
DETAILS_DIR_NAME = "details"
LEGACY_DETAILS_DIR_NAME = "advanced"
DETAILS_SUMMARY_FILENAME = "details_summary.json"
LEGACY_DETAILS_SUMMARY_FILENAME = "advanced_summary.json"
def preferred_details_dir(output_dir: str | Path) -> Path:
return Path(output_dir) / DETAILS_DIR_NAME
def legacy_details_dir(output_dir: str | Path) -> Path:
return Path(output_dir) / LEGACY_DETAILS_DIR_NAME
def resolve_existing_details_dir(output_dir: str | Path) -> Path | None:
preferred = preferred_details_dir(output_dir)
if _details_dir_has_outputs(preferred):
return preferred
legacy = legacy_details_dir(output_dir)
if _details_dir_has_outputs(legacy):
return legacy
if preferred.exists():
return preferred
if legacy.exists():
return legacy
return None
def resolve_details_dir(output_dir: str | Path) -> Path:
existing = resolve_existing_details_dir(output_dir)
if existing is not None:
return existing
return preferred_details_dir(output_dir)
def resolve_details_summary_path(output_dir: str | Path) -> Path | None:
for details_dir in [preferred_details_dir(output_dir), legacy_details_dir(output_dir)]:
for filename in [DETAILS_SUMMARY_FILENAME, LEGACY_DETAILS_SUMMARY_FILENAME]:
path = details_dir / filename
if path.exists():
return path
return None
def details_outputs_exist(output_dir: str | Path) -> bool:
return _details_dir_has_outputs(preferred_details_dir(output_dir)) or _details_dir_has_outputs(
legacy_details_dir(output_dir)
)
def _details_dir_has_outputs(details_dir: Path) -> bool:
if not details_dir.exists():
return False
required_files = [
details_dir / "request_metrics.csv",
details_dir / "theoretical_block_reuse_gaps.csv",
details_dir / "theoretical_block_lifetimes.csv",
details_dir / "theoretical_alive_block_timeline.csv",
details_dir / "session_bucket_boundary_miss.csv",
]
if not all(path.exists() for path in required_files):
return False
summary_path = details_dir / DETAILS_SUMMARY_FILENAME
if not summary_path.exists():
return False
try:
payload = json.loads(summary_path.read_text(encoding="utf-8"))
except Exception:
return False
return int(payload.get("schema_version", 0) or 0) >= 3

94
trace_analyzer/models.py Normal file
View File

@@ -0,0 +1,94 @@
from dataclasses import dataclass, field
@dataclass
class MessageEvent:
role: str
content_type: str
text_len: int
has_cache_control: bool = False
item_count: int = 0
@dataclass
class ToolSpec:
name: str
tool_type: str
@dataclass
class UsageStats:
input_tokens: int = 0
output_tokens: int = 0
total_tokens: int = 0
reasoning_tokens: int = 0
cached_tokens: int = 0
@dataclass
class RequestMeta:
provider: str
line_number: int
request_id: str
session_id: str
request_model: str
time: str
status_code: str
status_name: str
request_ready_time_ms: int
request_end_time_ms: int
total_cost_time_ms: int
backend_first_request_time_ms: int = 0
backend_first_response_time_ms: int = 0
@dataclass
class TraceRecord:
meta: RequestMeta
canonical_prompt: str = ""
messages: list[MessageEvent] = field(default_factory=list)
role_sequence: list[str] = field(default_factory=list)
declared_tools: list[ToolSpec] = field(default_factory=list)
usage: UsageStats = field(default_factory=UsageStats)
raw_messages: list[dict] = field(default_factory=list)
@dataclass
class TraceFeatures:
request_id: str
session_id: str
model: str
status_code: str
time: str
message_count: int
conversation_depth: int
declared_tool_count: int
assistant_msg_count: int
tool_msg_count: int
user_msg_count: int
system_msg_count: int
assistant_to_tool_count: int
tool_to_assistant_count: int
tool_to_tool_count: int
assistant_to_user_count: int
user_to_assistant_count: int
max_consecutive_tool_msgs: int
avg_tool_burst_len: float
has_tool_loop: int
input_tokens: int
output_tokens: int
total_tokens: int
reasoning_tokens: int
cached_tokens: int
cache_hit_ratio: float
uncached_prompt_tokens: int
output_input_ratio: float
latency_ms: int
ms_per_input_token: float
ms_per_output_token: float
long_context: int
high_cache: int
tool_burst_alert: int
tool_loop_alert: int
slow_request: int = 0
pattern_labels: list[str] = field(default_factory=list)

230
trace_analyzer/parser.py Normal file
View File

@@ -0,0 +1,230 @@
import json
import os
from dataclasses import asdict
from pathlib import Path
import psutil
from tqdm.auto import tqdm
from .helpers import safe_int
from .models import MessageEvent, RequestMeta, ToolSpec, TraceRecord, UsageStats
class FormattedAliTraceAdapter:
name = "formatted"
def detect(self, raw):
if not isinstance(raw.get("meta"), dict):
return False
required_keys = {"canonical_prompt", "usage", "message_events", "declared_tools", "role_sequence"}
if not required_keys.issubset(raw.keys()):
return False
schema_version = str(raw.get("schema_version", "")).strip()
return bool(schema_version) or "request_id" in raw["meta"]
def parse_line(self, raw, line_number=0):
meta_payload = raw.get("meta", {}) if isinstance(raw.get("meta", {}), dict) else {}
usage_payload = raw.get("usage", {}) if isinstance(raw.get("usage", {}), dict) else {}
message_events_payload = raw.get("message_events", [])
declared_tools_payload = raw.get("declared_tools", [])
usage = UsageStats(
input_tokens=safe_int(usage_payload.get("input_tokens")),
output_tokens=safe_int(usage_payload.get("output_tokens")),
total_tokens=safe_int(usage_payload.get("total_tokens")),
reasoning_tokens=safe_int(usage_payload.get("reasoning_tokens")),
cached_tokens=safe_int(usage_payload.get("cached_tokens")),
)
messages = [
MessageEvent(
role=str(message.get("role", "unknown")),
content_type=str(message.get("content_type", "unknown")),
text_len=safe_int(message.get("text_len")),
has_cache_control=bool(message.get("has_cache_control")),
item_count=safe_int(message.get("item_count")),
)
for message in message_events_payload
if isinstance(message, dict)
]
declared_tools = [
ToolSpec(
name=str(tool.get("name", "")),
tool_type=str(tool.get("tool_type", "function")),
)
for tool in declared_tools_payload
if isinstance(tool, dict)
]
inferred_family = str(meta_payload.get("model_family", "")).strip()
inferred_provider = str(meta_payload.get("provider", "")).strip()
if not inferred_provider:
inferred_provider = inferred_family or self.name
meta = RequestMeta(
provider=inferred_provider,
line_number=line_number,
request_id=str(meta_payload.get("request_id", "")),
session_id=str(meta_payload.get("session_id", "")),
request_model=str(meta_payload.get("request_model", "")),
time=str(meta_payload.get("time", "")),
status_code=str(meta_payload.get("status_code", "")),
status_name=str(meta_payload.get("status_name", "")),
request_ready_time_ms=safe_int(meta_payload.get("request_ready_time_ms")),
request_end_time_ms=safe_int(meta_payload.get("request_end_time_ms")),
total_cost_time_ms=safe_int(meta_payload.get("total_cost_time_ms")),
backend_first_request_time_ms=safe_int(meta_payload.get("backend_first_request_time_ms")),
backend_first_response_time_ms=safe_int(meta_payload.get("backend_first_response_time_ms")),
)
return TraceRecord(
meta=meta,
canonical_prompt=str(raw.get("canonical_prompt", "")),
messages=messages,
role_sequence=[
str(role)
for role in raw.get("role_sequence", [message.role for message in messages])
],
declared_tools=declared_tools,
usage=usage,
raw_messages=[
message
for message in raw.get("raw_messages", [])
if isinstance(message, dict)
],
)
def _looks_like_release_trace(raw):
expected_keys = {"chat_id", "parent_chat_id", "timestamp", "input_length", "output_length", "turn", "hash_ids"}
return expected_keys.issubset(raw.keys())
def path_looks_like_release_trace(path):
path = Path(path)
if not path.exists():
return False
try:
with path.open("r", encoding="utf-8") as handle:
for line in handle:
line = line.strip()
if not line:
continue
return _looks_like_release_trace(json.loads(line))
except Exception:
return False
return False
def get_adapter(raw):
adapter = FormattedAliTraceAdapter()
if adapter.detect(raw):
return adapter
if _looks_like_release_trace(raw):
raise ValueError("trace_analyzer currently analyzes formatter-generated *-raw.jsonl, not release hash-id traces.")
raise ValueError("trace_analyzer only accepts formatter-generated *-raw.jsonl inputs.")
def _estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, fraction_done):
baseline = max(current_rss_mb, peak_rss_mb)
headroom = 1.0 + 0.25 * max(0.0, 1.0 - fraction_done)
return baseline * headroom
def load_records(path, limit=None, show_progress=False, progress_desc="Load trace"):
records = []
path = str(path)
progress = None
process = psutil.Process(os.getpid()) if show_progress else None
peak_rss_mb = 0.0
total_bytes = os.path.getsize(path) if show_progress else 0
if show_progress:
progress = tqdm(
total=total_bytes,
desc=progress_desc,
unit="B",
unit_scale=True,
dynamic_ncols=True,
)
with open(path, "r", encoding="utf-8") as handle:
for line_number, line in enumerate(handle, start=1):
if limit is not None and len(records) >= limit:
break
raw_line = line
line = line.strip()
if not line:
if progress is not None:
progress.update(len(raw_line.encode("utf-8")))
continue
raw = json.loads(line)
adapter = get_adapter(raw)
try:
record = adapter.parse_line(raw, line_number=line_number)
except Exception as exc:
if progress is not None:
progress.close()
raise ValueError(f"Failed to parse line {line_number} in {path}: {exc}") from exc
records.append(record)
if progress is not None:
progress.update(len(raw_line.encode("utf-8")))
current_rss_mb = process.memory_info().rss / (1024 * 1024)
peak_rss_mb = max(peak_rss_mb, current_rss_mb)
fraction_done = progress.n / progress.total if progress.total else 0.0
progress.set_postfix(
records=len(records),
rss_mb=f"{current_rss_mb:.0f}",
est_peak_mb=f"{_estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, fraction_done):.0f}",
)
if progress is not None:
progress.close()
return records
def flatten_record(record):
return {
"provider": record.meta.provider,
"line_number": record.meta.line_number,
"request_id": record.meta.request_id,
"session_id": record.meta.session_id,
"request_model": record.meta.request_model,
"time": record.meta.time,
"status_code": record.meta.status_code,
"status_name": record.meta.status_name,
"request_ready_time_ms": record.meta.request_ready_time_ms,
"request_end_time_ms": record.meta.request_end_time_ms,
"total_cost_time_ms": record.meta.total_cost_time_ms,
"backend_first_request_time_ms": record.meta.backend_first_request_time_ms,
"backend_first_response_time_ms": record.meta.backend_first_response_time_ms,
"message_count": len(record.messages),
"role_sequence": ";".join(record.role_sequence),
"declared_tool_count": len(record.declared_tools),
"declared_tool_names": ";".join(tool.name for tool in record.declared_tools if tool.name),
"input_tokens": record.usage.input_tokens,
"output_tokens": record.usage.output_tokens,
"total_tokens": record.usage.total_tokens,
"reasoning_tokens": record.usage.reasoning_tokens,
"cached_tokens": record.usage.cached_tokens,
}
def record_to_dict(record):
return asdict(record)
def infer_analysis_dataset_name(input_path):
resolved = Path(input_path)
stem = resolved.stem
if stem.endswith("-raw"):
stem = stem[:-4]
parent_name = resolved.parent.name
model_slug = ""
if parent_name.startswith("trace-") and parent_name.endswith("-formatted"):
model_slug = parent_name[len("trace-") : -len("-formatted")]
if model_slug and not stem.startswith(f"{model_slug}-"):
return f"{model_slug}-{stem}"
return stem
def default_output_dir(input_path):
return Path("outputs") / "analysis" / infer_analysis_dataset_name(input_path)

View File

@@ -0,0 +1,221 @@
from __future__ import annotations
import csv
import json
import os
from pathlib import Path
from trace_analyzer.helpers import percentile
from trace_analyzer.parser import get_adapter
from tqdm.auto import tqdm
def stream_prepare(input_path: str | Path, output_dir: str | Path, *, show_progress: bool = False) -> dict:
input_file = Path(input_path)
output_root = Path(output_dir)
output_root.mkdir(parents=True, exist_ok=True)
features_path = output_root / "features.csv"
total_bytes = os.path.getsize(input_file) if show_progress and input_file.exists() else 0
progress = tqdm(
total=total_bytes,
desc="Prepare features",
unit="B",
unit_scale=True,
dynamic_ncols=True,
disable=not show_progress,
)
try:
with input_file.open("r", encoding="utf-8") as input_handle, features_path.open(
"w", encoding="utf-8", newline=""
) as features_handle:
writer: csv.DictWriter | None = None
kept_rows = 0
for line_number, line in enumerate(input_handle, start=1):
stripped = line.strip()
if not stripped:
if show_progress:
progress.update(len(line.encode("utf-8")))
continue
raw = json.loads(stripped)
adapter = get_adapter(raw)
record = adapter.parse_line(raw, line_number=line_number)
role_sequence = record.role_sequence
role_pairs = list(zip(role_sequence, role_sequence[1:]))
tool_bursts = _tool_bursts(role_sequence)
max_tool_burst = max(tool_bursts) if tool_bursts else 0
avg_tool_burst = _safe_div(sum(tool_bursts), len(tool_bursts)) if tool_bursts else 0.0
tool_to_tool_count = sum(1 for current, nxt in role_pairs if current == "tool" and nxt == "tool")
tool_msg_count = sum(message.role == "tool" for message in record.messages)
assistant_msg_count = sum(message.role == "assistant" for message in record.messages)
cache_hit_ratio = _safe_div(record.usage.cached_tokens, record.usage.input_tokens)
feature_row = {
"request_id": record.meta.request_id,
"session_id": record.meta.session_id,
"model": record.meta.request_model,
"status_code": record.meta.status_code,
"time": record.meta.time,
"message_count": len(record.messages),
"conversation_depth": len(record.messages),
"declared_tool_count": len(record.declared_tools),
"assistant_msg_count": assistant_msg_count,
"tool_msg_count": tool_msg_count,
"user_msg_count": sum(message.role == "user" for message in record.messages),
"system_msg_count": sum(message.role == "system" for message in record.messages),
"assistant_to_tool_count": sum(
1
for current, nxt in role_pairs
if current == "assistant" and nxt == "tool"
),
"tool_to_assistant_count": sum(
1
for current, nxt in role_pairs
if current == "tool" and nxt == "assistant"
),
"tool_to_tool_count": tool_to_tool_count,
"assistant_to_user_count": sum(
1
for current, nxt in role_pairs
if current == "assistant" and nxt == "user"
),
"user_to_assistant_count": sum(
1
for current, nxt in role_pairs
if current == "user" and nxt == "assistant"
),
"max_consecutive_tool_msgs": max_tool_burst,
"avg_tool_burst_len": avg_tool_burst,
"has_tool_loop": 1 if tool_to_tool_count > 0 else 0,
"input_tokens": record.usage.input_tokens,
"output_tokens": record.usage.output_tokens,
"total_tokens": record.usage.total_tokens,
"reasoning_tokens": record.usage.reasoning_tokens,
"cached_tokens": record.usage.cached_tokens,
"cache_hit_ratio": cache_hit_ratio,
"uncached_prompt_tokens": max(record.usage.input_tokens - record.usage.cached_tokens, 0),
"output_input_ratio": _safe_div(record.usage.output_tokens, record.usage.input_tokens),
"latency_ms": record.meta.total_cost_time_ms,
"ms_per_input_token": _safe_div(record.meta.total_cost_time_ms, record.usage.input_tokens),
"ms_per_output_token": _safe_div(record.meta.total_cost_time_ms, record.usage.output_tokens),
"long_context": 1 if record.usage.input_tokens >= 32000 else 0,
"high_cache": 1 if cache_hit_ratio >= 0.8 else 0,
"tool_burst_alert": 1 if max_tool_burst >= 4 else 0,
"tool_loop_alert": 1 if tool_to_tool_count >= 3 else 0,
"slow_request": 0,
"pattern_labels": _pattern_labels(
record,
cache_hit_ratio=cache_hit_ratio,
tool_msg_count=tool_msg_count,
assistant_msg_count=assistant_msg_count,
max_tool_burst=max_tool_burst,
),
}
if writer is None:
writer = csv.DictWriter(features_handle, fieldnames=list(feature_row.keys()))
writer.writeheader()
writer.writerow(feature_row)
kept_rows += 1
if show_progress:
progress.update(len(line.encode("utf-8")))
progress.set_postfix(
rows=kept_rows,
features=features_path.name,
)
finally:
if show_progress:
progress.close()
if show_progress:
tqdm.write("Finalize features.csv: apply slow_request p90 latency threshold")
_apply_slow_request_threshold(features_path)
return {
"features_path": str(features_path),
}
def _safe_div(numerator: float, denominator: float) -> float:
return (numerator / denominator) if denominator else 0.0
def _tool_bursts(role_sequence: list[str]) -> list[int]:
bursts: list[int] = []
current = 0
for role in role_sequence:
if role == "tool":
current += 1
elif current:
bursts.append(current)
current = 0
if current:
bursts.append(current)
return bursts
def _max_tool_burst(role_sequence: list[str]) -> int:
bursts = _tool_bursts(role_sequence)
return max(bursts) if bursts else 0
def _avg_tool_burst(role_sequence: list[str]) -> float:
bursts = _tool_bursts(role_sequence)
return _safe_div(sum(bursts), len(bursts)) if bursts else 0.0
def _pattern_labels(
record,
*,
cache_hit_ratio: float | None = None,
tool_msg_count: int | None = None,
assistant_msg_count: int | None = None,
max_tool_burst: int | None = None,
) -> str:
labels: list[str] = []
if tool_msg_count is None:
tool_msg_count = sum(message.role == "tool" for message in record.messages)
if assistant_msg_count is None:
assistant_msg_count = sum(message.role == "assistant" for message in record.messages)
if cache_hit_ratio is None:
cache_hit_ratio = _safe_div(record.usage.cached_tokens, record.usage.input_tokens)
if max_tool_burst is None:
max_tool_burst = _max_tool_burst(record.role_sequence)
if tool_msg_count == 0 and len(record.declared_tools) == 0:
labels.append("single-shot")
if tool_msg_count > 0 and tool_msg_count >= assistant_msg_count:
labels.append("tool-heavy")
if max_tool_burst >= 4:
labels.append("tool-burst")
if cache_hit_ratio >= 0.8:
labels.append("cache-efficient")
if cache_hit_ratio <= 0.1:
labels.append("cache-cold")
if record.usage.input_tokens >= 32000 and cache_hit_ratio <= 0.1:
labels.append("long-context-no-cache")
return ";".join(sorted(set(labels)))
def _apply_slow_request_threshold(features_path: Path) -> None:
with features_path.open("r", encoding="utf-8") as handle:
latencies = [int(row["latency_ms"]) for row in csv.DictReader(handle)]
if not latencies:
return
latencies.sort()
p90_latency = percentile(latencies, 0.9)
temp_path = features_path.with_suffix(features_path.suffix + ".tmp")
with features_path.open("r", encoding="utf-8") as input_handle, temp_path.open(
"w", encoding="utf-8", newline=""
) as output_handle:
reader = csv.DictReader(input_handle)
writer = None
for row in reader:
slow_request = 1 if int(row["latency_ms"]) >= p90_latency else 0
pattern_labels = {label for label in row.get("pattern_labels", "").split(";") if label}
row["slow_request"] = str(slow_request)
if slow_request and row.get("high_cache") == "1":
pattern_labels.add("slow-despite-cache")
row["pattern_labels"] = ";".join(sorted(pattern_labels))
if writer is None:
writer = csv.DictWriter(output_handle, fieldnames=list(row.keys()))
writer.writeheader()
writer.writerow(row)
temp_path.replace(features_path)

271
trace_analyzer/report.py Normal file
View File

@@ -0,0 +1,271 @@
import csv
import json
from collections import Counter
from pathlib import Path
from .features import feature_to_row
from .helpers import series_stats
from .parser import flatten_record, record_to_dict
def ensure_output_dir(path):
path.mkdir(parents=True, exist_ok=True)
return path
def write_jsonl(path, rows):
with open(path, "w", encoding="utf-8") as handle:
for row in rows:
handle.write(json.dumps(row, ensure_ascii=False) + "\n")
def write_csv(path, rows):
if not rows:
with open(path, "w", encoding="utf-8", newline="") as handle:
handle.write("")
return
fieldnames = list(rows[0].keys())
with open(path, "w", encoding="utf-8", newline="") as handle:
writer = csv.DictWriter(handle, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)
def write_parquet(path, rows):
try:
import pyarrow as pa
import pyarrow.parquet as pq
except ImportError as exc:
raise RuntimeError("Parquet output requires pyarrow to be installed.") from exc
table = pa.Table.from_pylist(rows)
pq.write_table(table, path)
def write_normalized(records, output_dir, output_format="jsonl"):
output_dir = ensure_output_dir(output_dir)
rows = [record_to_dict(record) for record in records]
if output_format == "jsonl":
path = output_dir / "normalized.jsonl"
write_jsonl(path, rows)
return path
if output_format == "csv":
path = output_dir / "normalized.csv"
write_csv(path, [flatten_record(record) for record in records])
return path
if output_format == "parquet":
path = output_dir / "normalized.parquet"
write_parquet(path, rows)
return path
raise ValueError(f"Unsupported format: {output_format}")
def write_features(features, output_dir):
output_dir = ensure_output_dir(output_dir)
path = output_dir / "features.csv"
write_csv(path, [feature_to_row(feature) for feature in features])
return path
def build_summary(records, features):
model_counts = Counter(feature.model or "unknown" for feature in features)
status_code_counts = Counter(feature.status_code or "unknown" for feature in features)
role_transition_counts = Counter()
for feature in features:
role_transition_counts["assistant->tool"] += feature.assistant_to_tool_count
role_transition_counts["tool->assistant"] += feature.tool_to_assistant_count
role_transition_counts["tool->tool"] += feature.tool_to_tool_count
role_transition_counts["assistant->user"] += feature.assistant_to_user_count
role_transition_counts["user->assistant"] += feature.user_to_assistant_count
latency_stats = series_stats([feature.latency_ms for feature in features])
cache_ratio_stats = series_stats([feature.cache_hit_ratio for feature in features])
cached_token_stats = series_stats([feature.cached_tokens for feature in features])
declared_tool_stats = series_stats([feature.declared_tool_count for feature in features])
burst_stats = series_stats([feature.max_consecutive_tool_msgs for feature in features])
high_burst_requests = sorted(
[
{
"request_id": feature.request_id,
"session_id": feature.session_id,
"max_consecutive_tool_msgs": feature.max_consecutive_tool_msgs,
"tool_to_tool_count": feature.tool_to_tool_count,
}
for feature in features
if feature.tool_burst_alert
],
key=lambda item: (item["max_consecutive_tool_msgs"], item["tool_to_tool_count"]),
reverse=True,
)[:10]
slow_despite_cache = sorted(
[
{
"request_id": feature.request_id,
"session_id": feature.session_id,
"latency_ms": feature.latency_ms,
"cache_hit_ratio": feature.cache_hit_ratio,
}
for feature in features
if "slow-despite-cache" in feature.pattern_labels
],
key=lambda item: item["latency_ms"],
reverse=True,
)[:10]
long_context_no_cache = sorted(
[
{
"request_id": feature.request_id,
"session_id": feature.session_id,
"input_tokens": feature.input_tokens,
"cache_hit_ratio": feature.cache_hit_ratio,
}
for feature in features
if "long-context-no-cache" in feature.pattern_labels
],
key=lambda item: item["input_tokens"],
reverse=True,
)[:10]
cache_buckets = []
for label, low, high in [
("lt_0_2", 0.0, 0.2),
("0_2_to_0_8", 0.2, 0.8),
("ge_0_8", 0.8, 1.01),
]:
bucket = [feature for feature in features if low <= feature.cache_hit_ratio < high]
cache_buckets.append(
{
"bucket": label,
"count": len(bucket),
"avg_latency_ms": series_stats([feature.latency_ms for feature in bucket])["mean"],
"avg_cache_hit_ratio": series_stats([feature.cache_hit_ratio for feature in bucket])["mean"],
}
)
return {
"record_count": len(records),
"success_count": sum(1 for feature in features if feature.status_code in {"1000", "200"}),
"session_count": len({record.meta.session_id for record in records if record.meta.session_id}),
"model_counts": dict(model_counts),
"status_code_counts": dict(status_code_counts),
"thresholds": {
"long_context": 32000,
"high_cache": 0.8,
"tool_burst_alert": 4,
"tool_loop_alert": 3,
"slow_request_p90_latency_ms": latency_stats["p90"],
},
"tool_patterns": {
"role_transitions": dict(role_transition_counts),
"declared_tool_count": declared_tool_stats,
"max_consecutive_tool_msgs": burst_stats,
"tool_burst_alert_count": sum(feature.tool_burst_alert for feature in features),
"tool_loop_alert_count": sum(feature.tool_loop_alert for feature in features),
"high_burst_requests": high_burst_requests,
},
"cache_patterns": {
"cached_tokens": cached_token_stats,
"cache_hit_ratio": cache_ratio_stats,
"latency_ms": latency_stats,
"cache_buckets": cache_buckets,
},
"anomalies": {
"slow_despite_cache": slow_despite_cache,
"long_context_no_cache": long_context_no_cache,
},
}
def _format_top_requests(rows, columns):
if not rows:
return "_none_"
header = "| " + " | ".join(columns) + " |"
divider = "| " + " | ".join(["---"] * len(columns)) + " |"
lines = [header, divider]
for row in rows:
lines.append("| " + " | ".join(_render_value(row.get(column, "")) for column in columns) + " |")
return "\n".join(lines)
def _render_value(value):
if isinstance(value, float):
return f"{value:.4f}".rstrip("0").rstrip(".")
return str(value)
def _render_mapping(mapping):
if isinstance(mapping, dict):
rendered = {key: _render_mapping(value) for key, value in mapping.items()}
return json.dumps(rendered, ensure_ascii=False)
if isinstance(mapping, list):
return [_render_mapping(value) for value in mapping]
if isinstance(mapping, float):
return float(f"{mapping:.4f}")
return mapping
def build_markdown_report(summary):
lines = [
"# Trace Analysis Report",
"",
"## Data Overview",
f"- Records: {summary['record_count']}",
f"- Success count: {summary['success_count']}",
f"- Session count: {summary['session_count']}",
f"- Models: {_render_mapping(summary['model_counts'])}",
f"- Status codes: {_render_mapping(summary['status_code_counts'])}",
"",
"## Tool Patterns",
f"- Role transitions: {_render_mapping(summary['tool_patterns']['role_transitions'])}",
f"- Declared tool count stats: {_render_mapping(summary['tool_patterns']['declared_tool_count'])}",
f"- Max consecutive tool msg stats: {_render_mapping(summary['tool_patterns']['max_consecutive_tool_msgs'])}",
f"- Tool burst alerts: {summary['tool_patterns']['tool_burst_alert_count']}",
f"- Tool loop alerts: {summary['tool_patterns']['tool_loop_alert_count']}",
"",
"High burst requests:",
_format_top_requests(
summary["tool_patterns"]["high_burst_requests"],
["request_id", "session_id", "max_consecutive_tool_msgs", "tool_to_tool_count"],
),
"",
"## Cache Patterns",
f"- Cached token stats: {_render_mapping(summary['cache_patterns']['cached_tokens'])}",
f"- Cache hit ratio stats: {_render_mapping(summary['cache_patterns']['cache_hit_ratio'])}",
f"- Latency stats: {_render_mapping(summary['cache_patterns']['latency_ms'])}",
"",
"Cache buckets:",
_format_top_requests(
summary["cache_patterns"]["cache_buckets"],
["bucket", "count", "avg_latency_ms", "avg_cache_hit_ratio"],
),
"",
"## Anomalies",
"Slow despite cache:",
_format_top_requests(
summary["anomalies"]["slow_despite_cache"],
["request_id", "session_id", "latency_ms", "cache_hit_ratio"],
),
"",
"Long context no cache:",
_format_top_requests(
summary["anomalies"]["long_context_no_cache"],
["request_id", "session_id", "input_tokens", "cache_hit_ratio"],
),
"",
]
return "\n".join(lines)
def write_report(records, features, output_dir):
output_dir = ensure_output_dir(output_dir)
summary = build_summary(records, features)
summary_path = output_dir / "summary.json"
with open(summary_path, "w", encoding="utf-8") as handle:
json.dump(summary, handle, ensure_ascii=False, indent=2)
report_path = output_dir / "report.md"
with open(report_path, "w", encoding="utf-8") as handle:
handle.write(build_markdown_report(summary))
return summary_path, report_path

228
trace_analyzer/reporting.py Normal file
View File

@@ -0,0 +1,228 @@
from __future__ import annotations
import csv
import json
from collections import Counter
from pathlib import Path
from trace_analyzer.helpers import safe_float, safe_int, series_stats
from trace_analyzer.layout import resolve_details_summary_path
from trace_analyzer.report import build_markdown_report
def _iter_feature_rows(features_path: str | Path):
with Path(features_path).open("r", encoding="utf-8") as handle:
for row in csv.DictReader(handle):
row["message_count"] = safe_int(row.get("message_count"))
row["conversation_depth"] = safe_int(row.get("conversation_depth"))
row["declared_tool_count"] = safe_int(row.get("declared_tool_count"))
row["assistant_msg_count"] = safe_int(row.get("assistant_msg_count"))
row["tool_msg_count"] = safe_int(row.get("tool_msg_count"))
row["user_msg_count"] = safe_int(row.get("user_msg_count"))
row["system_msg_count"] = safe_int(row.get("system_msg_count"))
row["assistant_to_tool_count"] = safe_int(row.get("assistant_to_tool_count"))
row["tool_to_assistant_count"] = safe_int(row.get("tool_to_assistant_count"))
row["tool_to_tool_count"] = safe_int(row.get("tool_to_tool_count"))
row["assistant_to_user_count"] = safe_int(row.get("assistant_to_user_count"))
row["user_to_assistant_count"] = safe_int(row.get("user_to_assistant_count"))
row["max_consecutive_tool_msgs"] = safe_int(row.get("max_consecutive_tool_msgs"))
row["avg_tool_burst_len"] = safe_float(row.get("avg_tool_burst_len"))
row["has_tool_loop"] = safe_int(row.get("has_tool_loop"))
row["input_tokens"] = safe_int(row.get("input_tokens"))
row["output_tokens"] = safe_int(row.get("output_tokens"))
row["total_tokens"] = safe_int(row.get("total_tokens"))
row["reasoning_tokens"] = safe_int(row.get("reasoning_tokens"))
row["cached_tokens"] = safe_int(row.get("cached_tokens"))
row["cache_hit_ratio"] = safe_float(row.get("cache_hit_ratio"))
row["uncached_prompt_tokens"] = safe_int(row.get("uncached_prompt_tokens"))
row["output_input_ratio"] = safe_float(row.get("output_input_ratio"))
row["latency_ms"] = safe_int(row.get("latency_ms"))
row["ms_per_input_token"] = safe_float(row.get("ms_per_input_token"))
row["ms_per_output_token"] = safe_float(row.get("ms_per_output_token"))
row["long_context"] = safe_int(row.get("long_context"))
row["high_cache"] = safe_int(row.get("high_cache"))
row["tool_burst_alert"] = safe_int(row.get("tool_burst_alert"))
row["tool_loop_alert"] = safe_int(row.get("tool_loop_alert"))
row["slow_request"] = safe_int(row.get("slow_request"))
row["pattern_labels"] = [label for label in str(row.get("pattern_labels", "")).split(";") if label]
yield row
def build_summary_from_features(features_path: str | Path) -> dict:
model_counts = Counter()
status_code_counts = Counter()
role_transition_counts = Counter()
session_ids: set[str] = set()
latencies: list[int] = []
cache_ratios: list[float] = []
cached_tokens_list: list[int] = []
declared_tool_counts: list[int] = []
burst_values: list[int] = []
record_count = 0
success_count = 0
high_burst_requests: list[dict] = []
slow_despite_cache: list[dict] = []
long_context_no_cache: list[dict] = []
tool_burst_alert_count = 0
tool_loop_alert_count = 0
cache_bucket_input = {
"lt_0_2": {"latencies": [], "ratios": [], "count": 0},
"0_2_to_0_8": {"latencies": [], "ratios": [], "count": 0},
"ge_0_8": {"latencies": [], "ratios": [], "count": 0},
}
for row in _iter_feature_rows(features_path):
record_count += 1
model_counts[row.get("model") or "unknown"] += 1
status_code_counts[row.get("status_code") or "unknown"] += 1
if row.get("session_id"):
session_ids.add(row["session_id"])
if row.get("status_code") in {"1000", "200"}:
success_count += 1
role_transition_counts["assistant->tool"] += row["assistant_to_tool_count"]
role_transition_counts["tool->assistant"] += row["tool_to_assistant_count"]
role_transition_counts["tool->tool"] += row["tool_to_tool_count"]
role_transition_counts["assistant->user"] += row["assistant_to_user_count"]
role_transition_counts["user->assistant"] += row["user_to_assistant_count"]
latencies.append(row["latency_ms"])
cache_ratios.append(row["cache_hit_ratio"])
cached_tokens_list.append(row["cached_tokens"])
declared_tool_counts.append(row["declared_tool_count"])
burst_values.append(row["max_consecutive_tool_msgs"])
tool_burst_alert_count += row["tool_burst_alert"]
tool_loop_alert_count += row["tool_loop_alert"]
if row["tool_burst_alert"]:
high_burst_requests.append(
{
"request_id": row["request_id"],
"session_id": row["session_id"],
"max_consecutive_tool_msgs": row["max_consecutive_tool_msgs"],
"tool_to_tool_count": row["tool_to_tool_count"],
}
)
high_burst_requests.sort(
key=lambda item: (item["max_consecutive_tool_msgs"], item["tool_to_tool_count"]),
reverse=True,
)
del high_burst_requests[10:]
if "slow-despite-cache" in row["pattern_labels"]:
slow_despite_cache.append(
{
"request_id": row["request_id"],
"session_id": row["session_id"],
"latency_ms": row["latency_ms"],
"cache_hit_ratio": row["cache_hit_ratio"],
}
)
slow_despite_cache.sort(key=lambda item: item["latency_ms"], reverse=True)
del slow_despite_cache[10:]
if "long-context-no-cache" in row["pattern_labels"]:
long_context_no_cache.append(
{
"request_id": row["request_id"],
"session_id": row["session_id"],
"input_tokens": row["input_tokens"],
"cache_hit_ratio": row["cache_hit_ratio"],
}
)
long_context_no_cache.sort(key=lambda item: item["input_tokens"], reverse=True)
del long_context_no_cache[10:]
ratio = row["cache_hit_ratio"]
if ratio < 0.2:
bucket_name = "lt_0_2"
elif ratio < 0.8:
bucket_name = "0_2_to_0_8"
else:
bucket_name = "ge_0_8"
cache_bucket_input[bucket_name]["count"] += 1
cache_bucket_input[bucket_name]["latencies"].append(row["latency_ms"])
cache_bucket_input[bucket_name]["ratios"].append(row["cache_hit_ratio"])
latency_stats = series_stats(latencies)
cache_ratio_stats = series_stats(cache_ratios)
cached_token_stats = series_stats(cached_tokens_list)
declared_tool_stats = series_stats(declared_tool_counts)
burst_stats = series_stats(burst_values)
cache_buckets = []
for label in ["lt_0_2", "0_2_to_0_8", "ge_0_8"]:
bucket = cache_bucket_input[label]
cache_buckets.append(
{
"bucket": label,
"count": bucket["count"],
"avg_latency_ms": series_stats(bucket["latencies"])["mean"],
"avg_cache_hit_ratio": series_stats(bucket["ratios"])["mean"],
}
)
return {
"record_count": record_count,
"success_count": success_count,
"session_count": len(session_ids),
"model_counts": dict(model_counts),
"status_code_counts": dict(status_code_counts),
"thresholds": {
"long_context": 32000,
"high_cache": 0.8,
"tool_burst_alert": 4,
"tool_loop_alert": 3,
"slow_request_p90_latency_ms": latency_stats["p90"],
},
"tool_patterns": {
"role_transitions": dict(role_transition_counts),
"declared_tool_count": declared_tool_stats,
"max_consecutive_tool_msgs": burst_stats,
"tool_burst_alert_count": tool_burst_alert_count,
"tool_loop_alert_count": tool_loop_alert_count,
"high_burst_requests": high_burst_requests,
},
"cache_patterns": {
"cached_tokens": cached_token_stats,
"cache_hit_ratio": cache_ratio_stats,
"latency_ms": latency_stats,
"cache_buckets": cache_buckets,
},
"anomalies": {
"slow_despite_cache": slow_despite_cache,
"long_context_no_cache": long_context_no_cache,
},
}
def write_reports(
*,
features_path: str | Path,
output_dir: str | Path,
pipeline_summary: dict | None = None,
) -> dict:
output_root = Path(output_dir)
output_root.mkdir(parents=True, exist_ok=True)
summary = build_summary_from_features(features_path)
summary_path = output_root / "summary.json"
summary_path.write_text(json.dumps(summary, ensure_ascii=False, indent=2), encoding="utf-8")
report_path = output_root / "report.md"
report_path.write_text(build_markdown_report(summary), encoding="utf-8")
combined = {
"summary": summary,
"pipeline": pipeline_summary or {},
}
details_summary_path = resolve_details_summary_path(output_root)
if details_summary_path is not None:
combined["details_summary"] = json.loads(details_summary_path.read_text(encoding="utf-8"))
combined_path = output_root / "analysis_snapshot.json"
combined_path.write_text(json.dumps(combined, ensure_ascii=False, indent=2), encoding="utf-8")
return {
"summary_path": str(summary_path),
"report_path": str(report_path),
"analysis_snapshot_path": str(combined_path),
}

View File

@@ -0,0 +1,801 @@
from __future__ import annotations
import csv
import json
import time
from collections import Counter
from itertools import islice
from pathlib import Path
import psutil
from tqdm.auto import tqdm
from .layout import DETAILS_SUMMARY_FILENAME, preferred_details_dir, resolve_details_dir
PROGRESS_FLUSH_INTERVAL_S = 5.0
PROGRESS_REFRESH_INTERVAL_S = 0.5
PROGRESS_REFRESH_INTERVAL_REQ = 256
DEFAULT_INPUT_LENGTH_BUCKET_THRESHOLDS = [32 * 1024, 85 * 1024, 128 * 1024]
FIRST_SEEN_MS = 0
LAST_SEEN_MS = 1
LAST_REUSE_MS = 2
FIRST_REQUEST_ID = 3
LAST_REQUEST_ID = 4
LAST_REUSE_REQUEST_ID = 5
REUSE_COUNT = 6
def _format_bucket_boundary(value: int) -> str:
if value == 0:
return "0"
if value % (1024 * 1024) == 0:
return f"{value // (1024 * 1024)}Mi"
if value % 1024 == 0:
return f"{value // 1024}Ki"
return str(value)
def build_input_length_bucket_defs(thresholds=None):
parsed_thresholds = (
list(DEFAULT_INPUT_LENGTH_BUCKET_THRESHOLDS)
if thresholds is None
else sorted(set(int(value) for value in thresholds))
)
if not parsed_thresholds:
raise ValueError("At least one input-length bucket threshold is required.")
if any(value <= 0 for value in parsed_thresholds):
raise ValueError("Input-length bucket thresholds must be positive integers.")
if parsed_thresholds == DEFAULT_INPUT_LENGTH_BUCKET_THRESHOLDS:
return [
("0-32Ki", 0, 32 * 1024),
("32-85Ki", 32 * 1024, 85 * 1024),
("85-128Ki", 85 * 1024, 128 * 1024),
("128Ki+", 128 * 1024, None),
]
bucket_defs = []
lower_bound = 0
for upper_bound in parsed_thresholds:
bucket_defs.append(
(
f"{_format_bucket_boundary(lower_bound)}-{_format_bucket_boundary(upper_bound)}",
lower_bound,
upper_bound,
)
)
lower_bound = upper_bound
bucket_defs.append((f"{_format_bucket_boundary(lower_bound)}+", lower_bound, None))
return bucket_defs
def assign_input_length_bucket(input_tokens: int, bucket_defs=None) -> str:
bucket_defs = bucket_defs or build_input_length_bucket_defs()
for bucket_label, lower_bound, upper_bound in bucket_defs:
if input_tokens >= lower_bound and (upper_bound is None or input_tokens < upper_bound):
return bucket_label
return bucket_defs[-1][0]
def write_csv(path: Path, rows: list[dict], fieldnames: list[str] | None = None) -> Path:
path.parent.mkdir(parents=True, exist_ok=True)
if fieldnames is None and rows:
fieldnames = list(rows[0].keys())
fieldnames = fieldnames or []
with path.open("w", encoding="utf-8", newline="") as handle:
writer = csv.DictWriter(handle, fieldnames=fieldnames)
if fieldnames:
writer.writeheader()
if rows:
writer.writerows(rows)
return path
def _estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, fraction_done):
baseline = max(current_rss_mb, peak_rss_mb)
headroom = 1.0 + 0.25 * max(0.0, 1.0 - fraction_done)
return baseline * headroom
def _progress_postfix(process, peak_rss_mb, fraction_done, **extra):
current_rss_mb = process.memory_info().rss / (1024 * 1024)
peak_rss_mb = max(peak_rss_mb, current_rss_mb)
postfix = {
"rss_mb": f"{current_rss_mb:.0f}",
"est_peak_mb": f"{_estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, fraction_done):.0f}",
}
postfix.update(extra)
return postfix, peak_rss_mb
def _format_duration(seconds):
if seconds is None or seconds < 0:
return "?"
if seconds < 60:
return f"{seconds:.0f}s"
if seconds < 3600:
return f"{seconds / 60:.1f}m"
return f"{seconds / 3600:.2f}h"
def _write_progress_state(
path,
*,
total_requests,
processed_requests,
started_at,
current_rss_mb,
peak_rss_mb,
est_peak_mb,
source_path,
features_path,
last_request_id,
block_state_count,
bucket_state_count,
):
elapsed_s = max(time.monotonic() - started_at, 1e-9)
req_per_s = processed_requests / elapsed_s
eta_s = ((total_requests - processed_requests) / req_per_s) if req_per_s > 0 and processed_requests < total_requests else 0.0
payload = {
"source_path": str(source_path),
"features_path": str(features_path),
"total_requests": total_requests,
"processed_requests": processed_requests,
"fraction_done": (processed_requests / total_requests) if total_requests else 1.0,
"elapsed_s": elapsed_s,
"req_per_s": req_per_s,
"eta_s": eta_s,
"eta_human": _format_duration(eta_s),
"rss_mb": current_rss_mb,
"peak_rss_mb": peak_rss_mb,
"est_peak_mb": est_peak_mb,
"block_state_count": block_state_count,
"bucket_state_count": bucket_state_count,
"last_request_id": last_request_id,
"updated_at_epoch_s": time.time(),
}
tmp_path = path.with_suffix(path.suffix + ".tmp")
tmp_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
tmp_path.replace(path)
def _count_lines(path):
with open(path, "r", encoding="utf-8") as handle:
return sum(1 for _ in handle)
def _count_feature_rows(path):
total_lines = _count_lines(path)
return max(total_lines - 1, 0)
class InMemoryBlockCache:
def __init__(self):
self.state = {}
def get(self, block_id):
return self.state.get(block_id)
def put(self, block_id, meta):
self.state[block_id] = meta
def iter_blocks(self):
for block_id, meta in self.state.items():
yield (
block_id,
meta[FIRST_SEEN_MS],
meta[LAST_SEEN_MS],
meta[LAST_REUSE_MS],
meta[FIRST_REQUEST_ID],
meta[LAST_REQUEST_ID],
meta[LAST_REUSE_REQUEST_ID],
meta[REUSE_COUNT],
)
def __len__(self):
return len(self.state)
def _normalize_source_row(row):
meta = row.get("meta", {}) if isinstance(row.get("meta", {}), dict) else {}
declared_tools = row.get("declared_tools", [])
raw_messages = row.get("raw_messages", [])
return {
"meta": meta,
"declared_tools": [tool for tool in declared_tools if isinstance(tool, dict)],
"raw_messages": [message for message in raw_messages if isinstance(message, dict)],
}
def _read_source_minimal(path):
with open(path, "r", encoding="utf-8") as handle:
for line in handle:
row = _normalize_source_row(json.loads(line))
meta = row["meta"]
yield {
"request_id": meta["request_id"],
"session_id": meta["session_id"],
"request_ready_time_ms": meta["request_ready_time_ms"],
"request_end_time_ms": meta["request_end_time_ms"],
"declared_tool_names": [
tool["name"] for tool in row.get("declared_tools", []) if tool.get("name")
],
"raw_messages": row["raw_messages"],
}
def _count_child_refs_by_chat_id(path, limit=None):
counts = Counter()
for index, row in enumerate(_iter_release_rows(path), start=1):
if limit is not None and index > limit:
break
parent_chat_id = int(row.get("parent_chat_id", -1) or -1)
if parent_chat_id != -1:
counts[parent_chat_id] += 1
return counts
def _new_block_meta(request_id, ready_ms):
return [ready_ms, ready_ms, 0, request_id, request_id, "", 0]
def _build_alive_block_timeline_from_events(events):
alive_rows = []
alive_count = 0
peak_alive_blocks = 0
for timestamp_ms in sorted(events):
alive_count += events[timestamp_ms]
peak_alive_blocks = max(peak_alive_blocks, alive_count)
alive_rows.append(
{
"timestamp_ms": timestamp_ms,
"delta_alive_blocks": events[timestamp_ms],
"alive_block_count": alive_count,
}
)
return {
"peak_alive_blocks": peak_alive_blocks,
"event_count": len(alive_rows),
}, alive_rows
def _compute_prefix_hits(
global_store,
bucket_store,
*,
hash_ids,
request_id,
ready_ms,
reuse_gap_counts=None,
):
global_prefix_active = True
bucket_prefix_active = True
global_prefix_match_blocks = 0
bucket_prefix_match_blocks = 0
global_source_request_id = ""
bucket_source_request_id = ""
for block_id in hash_ids:
global_meta = global_store.get(block_id)
if global_meta is not None and global_prefix_active:
global_prefix_match_blocks += 1
global_source_request_id = global_meta[LAST_REQUEST_ID]
if reuse_gap_counts is not None:
reuse_gap_counts[max(ready_ms - global_meta[LAST_SEEN_MS], 0)] += 1
global_meta[LAST_REUSE_MS] = ready_ms
global_meta[LAST_REUSE_REQUEST_ID] = request_id
global_meta[REUSE_COUNT] += 1
elif global_meta is None:
global_prefix_active = False
global_meta = _new_block_meta(request_id, ready_ms)
else:
global_prefix_active = False
global_meta[LAST_SEEN_MS] = ready_ms
global_meta[LAST_REQUEST_ID] = request_id
global_store.put(block_id, global_meta)
bucket_meta = bucket_store.get(block_id)
if bucket_meta is not None and bucket_prefix_active:
bucket_prefix_match_blocks += 1
bucket_source_request_id = bucket_meta[LAST_REQUEST_ID]
bucket_meta[LAST_REUSE_MS] = ready_ms
bucket_meta[LAST_REUSE_REQUEST_ID] = request_id
bucket_meta[REUSE_COUNT] += 1
elif bucket_meta is None:
bucket_prefix_active = False
bucket_meta = _new_block_meta(request_id, ready_ms)
else:
bucket_prefix_active = False
bucket_meta[LAST_SEEN_MS] = ready_ms
bucket_meta[LAST_REQUEST_ID] = request_id
bucket_store.put(block_id, bucket_meta)
return (
global_prefix_match_blocks,
global_source_request_id,
bucket_prefix_match_blocks,
bucket_source_request_id,
)
def _iter_release_rows(path):
with open(path, "r", encoding="utf-8") as handle:
for line in handle:
row = json.loads(line)
yield {
"chat_id": int(row.get("chat_id", -1) or -1),
"parent_chat_id": int(row.get("parent_chat_id", -1) or -1),
"timestamp": row.get("timestamp"),
"turn": int(row.get("turn", 0) or 0),
"type": row.get("type", ""),
"input_length": int(row.get("input_length", 0) or 0),
"output_length": int(row.get("output_length", 0) or 0),
"hash_ids": [int(value) for value in row.get("hash_ids", [])],
}
def _message_signature(message: dict) -> str:
return str(message.get("role", ""))
def _common_prefix_message_count(previous_messages, current_messages):
count = 0
for previous, current in zip(previous_messages, current_messages):
if _message_signature(previous) != _message_signature(current):
break
count += 1
return count
def _classify_trigger(previous_messages, current_messages):
common_prefix_count = _common_prefix_message_count(previous_messages, current_messages)
appended_messages = current_messages[common_prefix_count:]
appended_message_count = len(appended_messages)
last_role = str(current_messages[-1].get("role", "unknown")) if current_messages else "unknown"
trigger_group = last_role
trigger_detail = f"last_message_role={last_role}"
return {
"common_prefix_message_count": common_prefix_count,
"appended_message_count": appended_message_count,
"first_new_role": str(appended_messages[0].get("role", "unknown")) if appended_messages else "",
"trigger_group": trigger_group,
"trigger_detail": trigger_detail,
}
def _bucket_definition_rows(bucket_defs):
rows = []
for bucket, lower_bound, upper_bound in bucket_defs:
rows.append(
{
"bucket": bucket,
"input_tokens_min_inclusive": lower_bound,
"input_tokens_max_exclusive": upper_bound,
}
)
return rows
def _clear_details_dir(details_dir: Path) -> None:
details_dir.mkdir(parents=True, exist_ok=True)
for path in details_dir.iterdir():
if path.is_file():
path.unlink()
def collect_existing_detail_paths(output_dir):
details_dir = resolve_details_dir(output_dir)
return {
"details_dir": details_dir,
"progress": details_dir / "progress.json",
"request_metrics": details_dir / "request_metrics.csv",
"theoretical_block_reuse_gaps": details_dir / "theoretical_block_reuse_gaps.csv",
"theoretical_block_lifetimes": details_dir / "theoretical_block_lifetimes.csv",
"theoretical_alive_block_timeline": details_dir / "theoretical_alive_block_timeline.csv",
"session_bucket_boundary_miss": details_dir / "session_bucket_boundary_miss.csv",
"details_summary": details_dir / DETAILS_SUMMARY_FILENAME,
}
def run_advanced_from_existing(
source_path,
release_path,
features_path,
output_dir,
input_length_bucket_thresholds=None,
show_progress=True,
limit=None,
):
output_dir = Path(output_dir)
details_dir = preferred_details_dir(output_dir)
_clear_details_dir(details_dir)
source_path = Path(source_path)
release_path = Path(release_path)
features_path = Path(features_path)
total_requests = limit if limit is not None else _count_feature_rows(features_path)
release_request_count = _count_lines(release_path)
if limit is None and release_request_count != total_requests:
raise ValueError(
f"release/features row count mismatch: release={release_request_count} vs features={total_requests}"
)
process = psutil.Process()
peak_rss_mb = 0.0
started_at = time.monotonic()
bucket_defs = build_input_length_bucket_defs(input_length_bucket_thresholds)
child_ref_counts = _count_child_refs_by_chat_id(release_path, limit=limit)
store = InMemoryBlockCache()
bucket_stores = {bucket_label: InMemoryBlockCache() for bucket_label, _, _ in bucket_defs}
progress_state_path = details_dir / "progress.json"
next_progress_flush_at = started_at + PROGRESS_FLUSH_INTERVAL_S
request_metrics_path = details_dir / "request_metrics.csv"
processed_requests = 0
last_request_id = ""
reuse_gap_counts = Counter()
bucket_reused_block_totals = Counter()
total_prompt_blocks = 0
total_global_reused_blocks = 0
session_last = {}
chat_state_for_children = {}
session_bucket_totals = {
bucket_label: {
"edge_count": 0,
"reusable_edge_count": 0,
"cross_bucket_edge_count": 0,
"shared_prefix_units_sum": 0,
"cross_bucket_shared_prefix_units_sum": 0,
}
for bucket_label, _, _ in bucket_defs
}
with request_metrics_path.open("w", encoding="utf-8", newline="") as request_metrics_handle, features_path.open(
"r", encoding="utf-8"
) as features_handle:
feature_reader = csv.DictReader(features_handle)
source_iter = _read_source_minimal(source_path)
release_iter = _iter_release_rows(release_path)
if limit is not None:
feature_reader = islice(feature_reader, limit)
source_iter = islice(source_iter, limit)
release_iter = islice(release_iter, limit)
request_metrics_writer = None
progress = tqdm(
total=total_requests,
desc="Build details",
unit="req",
dynamic_ncols=True,
disable=not show_progress,
)
last_progress_refresh_at = started_at
try:
for source_row, feature_row, release_row in zip(source_iter, feature_reader, release_iter):
request_id = source_row["request_id"]
session_id = source_row["session_id"]
ready_ms = int(source_row["request_ready_time_ms"])
end_ms = int(source_row["request_end_time_ms"])
tool_names = source_row["declared_tool_names"]
raw_messages = source_row["raw_messages"]
hash_ids = release_row["hash_ids"]
release_input_length = int(release_row["input_length"])
release_output_length = int(release_row["output_length"])
feature_input_tokens = int(feature_row["input_tokens"])
feature_output_tokens = int(feature_row["output_tokens"])
if feature_input_tokens != release_input_length:
raise ValueError(
f"release/raw mismatch at request {request_id}: "
f"features.input_tokens={feature_row['input_tokens']} vs release.input_length={release_input_length}"
)
if feature_output_tokens != release_output_length:
raise ValueError(
f"release/raw mismatch at request {request_id}: "
f"features.output_tokens={feature_row['output_tokens']} vs release.output_length={release_output_length}"
)
input_tokens = feature_input_tokens
bucket_label = assign_input_length_bucket(input_tokens, bucket_defs)
bucket_store = bucket_stores[bucket_label]
(
prefix_match_blocks,
global_source_request_id,
bucketed_prefix_match_blocks,
bucketed_source_request_id,
) = _compute_prefix_hits(
store,
bucket_store,
hash_ids=hash_ids,
request_id=request_id,
ready_ms=ready_ms,
reuse_gap_counts=reuse_gap_counts,
)
prompt_block_count = len(hash_ids)
theoretical_prefix_hit_ratio = prefix_match_blocks / prompt_block_count if prompt_block_count else 0.0
bucketed_theoretical_prefix_hit_ratio = (
bucketed_prefix_match_blocks / prompt_block_count if prompt_block_count else 0.0
)
previous_session_state = session_last.get(session_id)
trigger = _classify_trigger(
previous_session_state["raw_messages"] if previous_session_state is not None else [],
raw_messages,
)
feature_row["request_ready_time_ms"] = ready_ms
feature_row["request_end_time_ms"] = end_ms
feature_row["turn"] = release_row["turn"]
feature_row["chat_id"] = release_row["chat_id"]
feature_row["parent_chat_id"] = release_row["parent_chat_id"]
feature_row["trigger_group"] = trigger["trigger_group"]
feature_row["trigger_detail"] = trigger["trigger_detail"]
feature_row["first_new_role"] = trigger["first_new_role"]
feature_row["common_prefix_message_count"] = trigger["common_prefix_message_count"]
feature_row["appended_message_count"] = trigger["appended_message_count"]
feature_row["input_length_bucket"] = bucket_label
feature_row["declared_tool_names"] = ";".join(tool_names)
feature_row["theoretical_prompt_unit_length"] = prompt_block_count
feature_row["theoretical_prefix_hit_blocks"] = prefix_match_blocks
feature_row["theoretical_prefix_hit_ratio"] = theoretical_prefix_hit_ratio
feature_row["theoretical_source_request_id"] = global_source_request_id
feature_row["bucketed_theoretical_prefix_hit_blocks"] = bucketed_prefix_match_blocks
feature_row["bucketed_theoretical_prefix_hit_ratio"] = bucketed_theoretical_prefix_hit_ratio
feature_row["bucketed_theoretical_source_request_id"] = bucketed_source_request_id
feature_row["theoretical_bucket_boundary_loss_blocks"] = max(
prefix_match_blocks - bucketed_prefix_match_blocks,
0,
)
feature_row["theoretical_bucket_boundary_loss_ratio"] = (
feature_row["theoretical_bucket_boundary_loss_blocks"] / prompt_block_count
if prompt_block_count
else 0.0
)
if request_metrics_writer is None:
request_metrics_writer = csv.DictWriter(
request_metrics_handle,
fieldnames=list(feature_row.keys()),
)
request_metrics_writer.writeheader()
request_metrics_writer.writerow(feature_row)
chat_id = release_row["chat_id"]
parent_chat_id = release_row["parent_chat_id"]
if parent_chat_id != -1:
parent_state = chat_state_for_children.get(parent_chat_id)
if parent_state is not None:
shared_prefix_units = 0
for parent_block_id, child_block_id in zip(parent_state["hash_ids"], hash_ids):
if parent_block_id != child_block_id:
break
shared_prefix_units += 1
bucket_totals = session_bucket_totals[bucket_label]
bucket_totals["edge_count"] += 1
if shared_prefix_units > 0:
bucket_totals["reusable_edge_count"] += 1
if parent_state["bucket_label"] != bucket_label:
bucket_totals["cross_bucket_edge_count"] += 1
bucket_totals["cross_bucket_shared_prefix_units_sum"] += shared_prefix_units
bucket_totals["shared_prefix_units_sum"] += shared_prefix_units
remaining_children = child_ref_counts.get(parent_chat_id, 0) - 1
if remaining_children > 0:
child_ref_counts[parent_chat_id] = remaining_children
else:
child_ref_counts.pop(parent_chat_id, None)
chat_state_for_children.pop(parent_chat_id, None)
if chat_id != -1 and child_ref_counts.get(chat_id, 0) > 0:
chat_state_for_children[chat_id] = {
"bucket_label": bucket_label,
"hash_ids": hash_ids,
}
total_prompt_blocks += prompt_block_count
total_global_reused_blocks += prefix_match_blocks
bucket_reused_block_totals[bucket_label] += bucketed_prefix_match_blocks
session_last[session_id] = {
"request_id": request_id,
"request_ready_time_ms": ready_ms,
"request_end_time_ms": end_ms,
"raw_messages": raw_messages,
}
processed_requests += 1
last_request_id = request_id
progress.update(1)
now = time.monotonic()
should_refresh_progress = (
processed_requests == 1
or processed_requests % PROGRESS_REFRESH_INTERVAL_REQ == 0
or now - last_progress_refresh_at >= PROGRESS_REFRESH_INTERVAL_S
or processed_requests == total_requests
)
if should_refresh_progress:
fraction_done = progress.n / progress.total if progress.total else 0.0
elapsed_s = max(now - started_at, 1e-9)
req_per_s = progress.n / elapsed_s
eta_s = ((progress.total - progress.n) / req_per_s) if req_per_s > 0 and progress.total else 0.0
total_bucket_state_count = sum(len(each_store) for each_store in bucket_stores.values())
postfix, peak_rss_mb = _progress_postfix(
process,
peak_rss_mb,
fraction_done,
req_s=f"{req_per_s:.1f}",
eta=_format_duration(eta_s),
blocks=len(store),
bucket_blocks=total_bucket_state_count,
sessions=len(session_last),
)
progress.set_postfix(postfix)
last_progress_refresh_at = now
if processed_requests and now >= next_progress_flush_at:
current_rss_mb = process.memory_info().rss / (1024 * 1024)
peak_rss_mb = max(peak_rss_mb, current_rss_mb)
est_peak_mb = _estimate_peak_rss_mb(
current_rss_mb,
peak_rss_mb,
(processed_requests / total_requests) if total_requests else 1.0,
)
_write_progress_state(
progress_state_path,
total_requests=total_requests,
processed_requests=processed_requests,
started_at=started_at,
current_rss_mb=current_rss_mb,
peak_rss_mb=peak_rss_mb,
est_peak_mb=est_peak_mb,
source_path=f"{source_path} + {release_path}",
features_path=features_path,
last_request_id=last_request_id,
block_state_count=len(store),
bucket_state_count=total_bucket_state_count,
)
next_progress_flush_at = now + PROGRESS_FLUSH_INTERVAL_S
finally:
progress.close()
theoretical_block_reuse_gaps_path = details_dir / "theoretical_block_reuse_gaps.csv"
write_csv(
theoretical_block_reuse_gaps_path,
[
{"reuse_gap_ms": reuse_gap_ms, "count": count}
for reuse_gap_ms, count in sorted(reuse_gap_counts.items())
],
fieldnames=["reuse_gap_ms", "count"],
)
theoretical_block_lifetimes_path = details_dir / "theoretical_block_lifetimes.csv"
alive_block_events = Counter()
block_lifetime_rows = []
for (
block_hash,
first_seen_ms,
last_seen_ms,
last_reuse_ms,
first_request_id,
last_request_id_for_block,
last_reuse_request_id,
reuse_count,
) in store.iter_blocks():
lifecycle_end_ms = last_reuse_ms if reuse_count > 0 else first_seen_ms
lifetime_ms = max(lifecycle_end_ms - first_seen_ms, 0)
block_lifetime_rows.append(
{
"hash": block_hash,
"first_request_id": first_request_id,
"last_request_id": last_request_id_for_block,
"first_seen_ms": first_seen_ms,
"last_seen_ms": last_seen_ms,
"last_reuse_ms": last_reuse_ms,
"last_reuse_request_id": last_reuse_request_id,
"reuse_count": reuse_count,
"lifetime_ms": lifetime_ms,
"span_end_ms": lifecycle_end_ms,
"reused": 1 if reuse_count > 0 else 0,
}
)
alive_block_events[first_seen_ms] += 1
alive_block_events[lifecycle_end_ms + 1] -= 1
write_csv(theoretical_block_lifetimes_path, block_lifetime_rows)
alive_block_timeline_summary, alive_block_timeline_rows = _build_alive_block_timeline_from_events(alive_block_events)
theoretical_alive_block_timeline_path = details_dir / "theoretical_alive_block_timeline.csv"
write_csv(theoretical_alive_block_timeline_path, alive_block_timeline_rows)
session_bucket_boundary_rows = []
for bucket_label, _, _ in bucket_defs:
bucket_totals = session_bucket_totals[bucket_label]
total_bucket_reused_blocks = bucket_reused_block_totals[bucket_label]
session_bucket_boundary_rows.append(
{
"bucket": bucket_label,
"edge_count": bucket_totals["edge_count"],
"reusable_edge_count": bucket_totals["reusable_edge_count"],
"cross_bucket_edge_count": bucket_totals["cross_bucket_edge_count"],
"cross_bucket_edge_fraction": (
bucket_totals["cross_bucket_edge_count"] / bucket_totals["edge_count"]
if bucket_totals["edge_count"]
else 0.0
),
"shared_prefix_units_sum": bucket_totals["shared_prefix_units_sum"],
"cross_bucket_shared_prefix_units_sum": bucket_totals["cross_bucket_shared_prefix_units_sum"],
"cross_bucket_shared_prefix_unit_fraction": (
bucket_totals["cross_bucket_shared_prefix_units_sum"] / bucket_totals["shared_prefix_units_sum"]
if bucket_totals["shared_prefix_units_sum"]
else 0.0
),
"bucket_total_reused_blocks": total_bucket_reused_blocks,
"reduced_reused_blocks_ratio": (
bucket_totals["cross_bucket_shared_prefix_units_sum"] / total_bucket_reused_blocks
if total_bucket_reused_blocks
else 0.0
),
}
)
session_bucket_boundary_miss_path = details_dir / "session_bucket_boundary_miss.csv"
write_csv(session_bucket_boundary_miss_path, session_bucket_boundary_rows)
details_summary_path = details_dir / DETAILS_SUMMARY_FILENAME
details_summary = {
"schema_version": 3,
"request_count": total_requests,
"figure_count": 13,
"cache_analysis_mode": "release_hash_ids",
"release_path": str(release_path),
"bucket_definition": {"buckets": _bucket_definition_rows(bucket_defs)},
"global_prompt_blocks": total_prompt_blocks,
"global_reused_blocks": total_global_reused_blocks,
"global_reuse_ratio": (total_global_reused_blocks / total_prompt_blocks) if total_prompt_blocks else 0.0,
"alive_block_timeline_summary": alive_block_timeline_summary,
"detail_files": [
"request_metrics.csv",
"theoretical_block_reuse_gaps.csv",
"theoretical_block_lifetimes.csv",
"theoretical_alive_block_timeline.csv",
"session_bucket_boundary_miss.csv",
DETAILS_SUMMARY_FILENAME,
"progress.json",
],
}
details_summary_path.write_text(json.dumps(details_summary, ensure_ascii=False, indent=2), encoding="utf-8")
current_rss_mb = process.memory_info().rss / (1024 * 1024)
peak_rss_mb = max(peak_rss_mb, current_rss_mb)
est_peak_mb = _estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, 1.0)
_write_progress_state(
progress_state_path,
total_requests=total_requests,
processed_requests=processed_requests,
started_at=started_at,
current_rss_mb=current_rss_mb,
peak_rss_mb=peak_rss_mb,
est_peak_mb=est_peak_mb,
source_path=f"{source_path} + {release_path}",
features_path=features_path,
last_request_id=last_request_id,
block_state_count=len(store),
bucket_state_count=sum(len(bucket_store) for bucket_store in bucket_stores.values()),
)
return {
"details_dir": details_dir,
"progress": progress_state_path,
"request_metrics": request_metrics_path,
"theoretical_block_reuse_gaps": theoretical_block_reuse_gaps_path,
"theoretical_block_lifetimes": theoretical_block_lifetimes_path,
"theoretical_alive_block_timeline": theoretical_alive_block_timeline_path,
"session_bucket_boundary_miss": session_bucket_boundary_miss_path,
"details_summary": details_summary_path,
}

3264
trace_analyzer/study.py Normal file

File diff suppressed because it is too large Load Diff