Initial commit
This commit is contained in:
10
trace_analyzer/__init__.py
Normal file
10
trace_analyzer/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
"""Trace analysis toolkit for coding-agent request logs."""
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
from .cli import main as cli_main
|
||||
|
||||
return cli_main(argv)
|
||||
|
||||
|
||||
__all__ = ["main"]
|
||||
5
trace_analyzer/__main__.py
Normal file
5
trace_analyzer/__main__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from .cli import main
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
477
trace_analyzer/cli.py
Normal file
477
trace_analyzer/cli.py
Normal file
@@ -0,0 +1,477 @@
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
from .figures import render_figures
|
||||
from .features import compute_features
|
||||
from .layout import details_outputs_exist
|
||||
from .parser import default_output_dir, infer_analysis_dataset_name, load_records, path_looks_like_release_trace
|
||||
from .preparation import stream_prepare
|
||||
from .report import write_features, write_normalized, write_report
|
||||
from .reporting import write_reports
|
||||
from .resume_advanced import collect_existing_detail_paths, run_advanced_from_existing
|
||||
from .study import parse_input_length_bucket_thresholds, run_study
|
||||
|
||||
|
||||
def build_parser():
|
||||
parser = argparse.ArgumentParser(description="Analyze coding-agent trace patterns.")
|
||||
subparsers = parser.add_subparsers(dest="command", required=True)
|
||||
|
||||
analyze_parser = subparsers.add_parser(
|
||||
"analyze",
|
||||
help="Run the full analysis workflow from one formatter-generated *-raw.jsonl trace.",
|
||||
)
|
||||
analyze_parser.add_argument("input", help="Path to the formatter-generated *-raw.jsonl trace.")
|
||||
analyze_parser.add_argument(
|
||||
"--release-input",
|
||||
default=None,
|
||||
help="Path to the formatter-generated release .jsonl with hash_ids. Defaults to the sibling file without the `-raw` suffix.",
|
||||
)
|
||||
analyze_parser.add_argument(
|
||||
"--dataset-name",
|
||||
default=None,
|
||||
help="Dataset name used for output paths and figure titles. Defaults to the formatted trace stem.",
|
||||
)
|
||||
analyze_parser.add_argument(
|
||||
"--output-dir",
|
||||
default=None,
|
||||
help="Explicit analysis output directory. Defaults to outputs/analysis/<dataset>/",
|
||||
)
|
||||
analyze_parser.add_argument("--output-root", default="outputs/analysis")
|
||||
analyze_parser.add_argument(
|
||||
"--figure-dir",
|
||||
default=None,
|
||||
help="Explicit figure directory. Defaults to <output-dir>/figures/.",
|
||||
)
|
||||
analyze_parser.add_argument(
|
||||
"--block-size",
|
||||
type=int,
|
||||
default=256,
|
||||
help="Block size for theoretical cache analysis.",
|
||||
)
|
||||
analyze_parser.add_argument(
|
||||
"--segment-mode",
|
||||
default="tokenizer",
|
||||
choices=["bytes", "tokenizer"],
|
||||
help="How to segment prompts for theoretical cache analysis.",
|
||||
)
|
||||
analyze_parser.add_argument(
|
||||
"--tokenizer-path",
|
||||
default=None,
|
||||
help="Local path or model id for tokenizer mode. Defaults to the local resolved tokenizer path.",
|
||||
)
|
||||
analyze_parser.add_argument(
|
||||
"--tokenizer-batch-size",
|
||||
type=int,
|
||||
default=64,
|
||||
help="Batch size used by tokenizer-based theoretical cache analysis.",
|
||||
)
|
||||
analyze_parser.add_argument(
|
||||
"--model-family",
|
||||
default="auto",
|
||||
help="Model family for tokenizer/chat-template metadata. Defaults to auto-detect.",
|
||||
)
|
||||
analyze_parser.add_argument(
|
||||
"--model-meta-dir",
|
||||
default=None,
|
||||
help="Override the base directory that contains model_meta/<provider>/<model>/.",
|
||||
)
|
||||
analyze_parser.add_argument(
|
||||
"--input-length-buckets",
|
||||
default=None,
|
||||
help="Semicolon-separated input-length bucket thresholds in tokens, such as `32768;87040;131072` or `32Ki;85Ki;128Ki`.",
|
||||
)
|
||||
|
||||
parse_parser = subparsers.add_parser("parse", help="Normalize a formatter-generated *-raw.jsonl trace.")
|
||||
_add_common_args(parse_parser)
|
||||
parse_parser.add_argument(
|
||||
"--format",
|
||||
default="jsonl",
|
||||
choices=["jsonl", "csv", "parquet"],
|
||||
help="Normalized output format.",
|
||||
)
|
||||
|
||||
features_parser = subparsers.add_parser("features", help="Extract request-level features.")
|
||||
_add_common_args(features_parser)
|
||||
|
||||
report_parser = subparsers.add_parser("report", help="Generate markdown and json summary reports.")
|
||||
_add_common_args(report_parser)
|
||||
report_parser.add_argument(
|
||||
"--normalized-format",
|
||||
default="jsonl",
|
||||
choices=["jsonl", "csv", "parquet"],
|
||||
help="Also emit normalized records in this format.",
|
||||
)
|
||||
|
||||
study_parser = subparsers.add_parser(
|
||||
"study",
|
||||
help="Generate data tables and CDF plots for lengths, cache reuse, and tool timing.",
|
||||
)
|
||||
_add_common_args(study_parser)
|
||||
study_parser.add_argument(
|
||||
"--normalized-format",
|
||||
default="jsonl",
|
||||
choices=["jsonl", "csv", "parquet"],
|
||||
help="Normalized output format.",
|
||||
)
|
||||
study_parser.add_argument(
|
||||
"--block-size",
|
||||
type=int,
|
||||
default=256,
|
||||
help="Block size for theoretical cache analysis.",
|
||||
)
|
||||
study_parser.add_argument(
|
||||
"--segment-mode",
|
||||
default="tokenizer",
|
||||
choices=["bytes", "tokenizer"],
|
||||
help="How to segment prompts for theoretical cache analysis.",
|
||||
)
|
||||
study_parser.add_argument(
|
||||
"--tokenizer-path",
|
||||
default=None,
|
||||
help="Local path or model id for tokenizer mode. Defaults to the local resolved tokenizer path.",
|
||||
)
|
||||
study_parser.add_argument(
|
||||
"--tokenizer-batch-size",
|
||||
type=int,
|
||||
default=64,
|
||||
help="Batch size used by tokenizer-based theoretical cache analysis.",
|
||||
)
|
||||
study_parser.add_argument(
|
||||
"--model-family",
|
||||
default="auto",
|
||||
help="Model family for tokenizer/chat-template metadata. Defaults to auto-detect.",
|
||||
)
|
||||
study_parser.add_argument(
|
||||
"--model-meta-dir",
|
||||
default=None,
|
||||
help="Override the base directory that contains model_meta/<provider>/<model>/.",
|
||||
)
|
||||
study_parser.add_argument(
|
||||
"--input-length-buckets",
|
||||
default=None,
|
||||
help="Semicolon-separated input-length bucket thresholds in tokens, such as `32768;87040;131072` or `32Ki;85Ki;128Ki`.",
|
||||
)
|
||||
|
||||
resume_parser = subparsers.add_parser(
|
||||
"resume-details",
|
||||
aliases=["resume-advanced"],
|
||||
help="Reuse existing source trace (*-raw.jsonl or legacy normalized.jsonl) + features.csv and compute only detailed analysis outputs.",
|
||||
)
|
||||
resume_parser.add_argument("input", help="Path to formatter-generated *-raw.jsonl")
|
||||
resume_parser.add_argument("features", help="Path to existing features.csv")
|
||||
resume_parser.add_argument(
|
||||
"--release-input",
|
||||
default=None,
|
||||
help="Path to the formatter-generated release .jsonl with hash_ids. Defaults to the sibling file without the `-raw` suffix.",
|
||||
)
|
||||
resume_parser.add_argument(
|
||||
"--output-dir",
|
||||
required=True,
|
||||
help="Existing output directory to receive detailed analysis outputs.",
|
||||
)
|
||||
resume_parser.add_argument(
|
||||
"--block-size",
|
||||
type=int,
|
||||
default=256,
|
||||
help="Block size for theoretical cache analysis.",
|
||||
)
|
||||
resume_parser.add_argument(
|
||||
"--segment-mode",
|
||||
default="tokenizer",
|
||||
choices=["bytes", "tokenizer"],
|
||||
help="How to segment prompts for theoretical cache analysis.",
|
||||
)
|
||||
resume_parser.add_argument(
|
||||
"--tokenizer-path",
|
||||
default=None,
|
||||
help="Local path or model id for tokenizer mode. Defaults to the local resolved tokenizer path.",
|
||||
)
|
||||
resume_parser.add_argument(
|
||||
"--tokenizer-batch-size",
|
||||
type=int,
|
||||
default=64,
|
||||
help="Batch size used by tokenizer-based theoretical cache analysis.",
|
||||
)
|
||||
resume_parser.add_argument(
|
||||
"--model-family",
|
||||
default="auto",
|
||||
help="Model family for tokenizer/chat-template metadata. Defaults to auto-detect.",
|
||||
)
|
||||
resume_parser.add_argument(
|
||||
"--model-meta-dir",
|
||||
default=None,
|
||||
help="Override the base directory that contains model_meta/<provider>/<model>/.",
|
||||
)
|
||||
resume_parser.add_argument(
|
||||
"--limit",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Only process the first N source/features rows. Useful for throughput benchmarking.",
|
||||
)
|
||||
resume_parser.add_argument(
|
||||
"--input-length-buckets",
|
||||
default=None,
|
||||
help="Semicolon-separated input-length bucket thresholds in tokens, such as `32768;87040;131072` or `32Ki;85Ki;128Ki`.",
|
||||
)
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
def _add_common_args(parser):
|
||||
parser.add_argument("input", help="Path to the formatter-generated *-raw.jsonl trace.")
|
||||
parser.add_argument("--limit", type=int, default=None, help="Limit number of input lines.")
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
default=None,
|
||||
help="Output directory. Defaults to outputs/analysis/<input_stem>/",
|
||||
)
|
||||
|
||||
|
||||
def resolve_output_dir(input_path, output_dir):
|
||||
return Path(output_dir) if output_dir else default_output_dir(input_path)
|
||||
|
||||
|
||||
def _normalize_dataset_name(name: str) -> str:
|
||||
text = str(name)
|
||||
return text[:-4] if text.endswith("-raw") else text
|
||||
|
||||
|
||||
def _resolve_analysis_output_dir(args):
|
||||
dataset_name = args.dataset_name or _normalize_dataset_name(infer_analysis_dataset_name(args.input))
|
||||
output_dir = Path(args.output_dir) if args.output_dir else Path(args.output_root) / dataset_name
|
||||
figure_dir = Path(args.figure_dir) if args.figure_dir else output_dir / "figures"
|
||||
return dataset_name, output_dir, figure_dir
|
||||
|
||||
|
||||
def _resolve_release_input_path(raw_input: str, release_input: str | None) -> Path:
|
||||
if release_input:
|
||||
return Path(release_input)
|
||||
raw_path = Path(raw_input)
|
||||
name = raw_path.name
|
||||
if name.endswith("-raw.jsonl"):
|
||||
candidate = raw_path.with_name(name[:-len("-raw.jsonl")] + ".jsonl")
|
||||
else:
|
||||
raise ValueError("Expected a formatter-generated *-raw.jsonl input, or pass --release-input explicitly.")
|
||||
return candidate
|
||||
|
||||
|
||||
def _resolve_existing_release_input_path(raw_input: str, release_input: str | None) -> Path | None:
|
||||
candidate = _resolve_release_input_path(raw_input, release_input)
|
||||
if path_looks_like_release_trace(candidate):
|
||||
return candidate
|
||||
return None
|
||||
|
||||
|
||||
def _existing_base_outputs(output_dir):
|
||||
features = output_dir / "features.csv"
|
||||
report = output_dir / "report.md"
|
||||
if features.exists():
|
||||
return {
|
||||
"features": features,
|
||||
"report": report if report.exists() else None,
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
def _existing_detail_outputs(output_dir):
|
||||
if not details_outputs_exist(output_dir):
|
||||
return None
|
||||
return collect_existing_detail_paths(output_dir)
|
||||
|
||||
|
||||
def _stage_message(progress, step: int, total_steps: int, message: str) -> None:
|
||||
tqdm.write(f"Stage {step}/{total_steps}: {message}")
|
||||
progress.update(1)
|
||||
progress.set_postfix(current=message)
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
parser = build_parser()
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
if args.command == "analyze":
|
||||
dataset_name, output_dir, figure_dir = _resolve_analysis_output_dir(args)
|
||||
input_length_bucket_thresholds = parse_input_length_bucket_thresholds(args.input_length_buckets)
|
||||
release_input_path = _resolve_existing_release_input_path(args.input, args.release_input)
|
||||
if release_input_path is None:
|
||||
raise FileNotFoundError(
|
||||
f"Release trace not found for raw trace {args.input}. "
|
||||
"Run `python -m trace_formatter build-release <raw-trace>` first, or pass --release-input."
|
||||
)
|
||||
total_steps = 4
|
||||
progress = tqdm(
|
||||
total=total_steps,
|
||||
desc="Analyze trace",
|
||||
unit="stage",
|
||||
dynamic_ncols=True,
|
||||
)
|
||||
try:
|
||||
prepare_result = None
|
||||
reusable_base = _existing_base_outputs(output_dir)
|
||||
if reusable_base:
|
||||
_stage_message(progress, 1, total_steps, "reuse existing features.csv")
|
||||
prepare_result = {
|
||||
"features_path": str(reusable_base["features"]),
|
||||
"reused": True,
|
||||
}
|
||||
else:
|
||||
_stage_message(progress, 1, total_steps, "prepare features.csv")
|
||||
prepare_result = stream_prepare(args.input, output_dir, show_progress=True)
|
||||
reusable_details = _existing_detail_outputs(output_dir)
|
||||
if reusable_details:
|
||||
_stage_message(progress, 2, total_steps, "reuse existing details/")
|
||||
advanced_paths = reusable_details
|
||||
else:
|
||||
_stage_message(
|
||||
progress,
|
||||
2,
|
||||
total_steps,
|
||||
"detailed analysis: request metrics, tool/session stats, kvcache stats",
|
||||
)
|
||||
advanced_paths = run_advanced_from_existing(
|
||||
args.input,
|
||||
release_input_path,
|
||||
prepare_result["features_path"],
|
||||
output_dir,
|
||||
input_length_bucket_thresholds=input_length_bucket_thresholds,
|
||||
show_progress=True,
|
||||
)
|
||||
_stage_message(progress, 3, total_steps, "reporting: summary.json, report.md, analysis_snapshot.json")
|
||||
report_result = write_reports(
|
||||
features_path=prepare_result["features_path"],
|
||||
output_dir=output_dir,
|
||||
pipeline_summary={
|
||||
"dataset_name": dataset_name,
|
||||
"formatted_path": str(Path(args.input)),
|
||||
"release_path": str(release_input_path),
|
||||
**{key: str(value) for key, value in advanced_paths.items()},
|
||||
},
|
||||
)
|
||||
_stage_message(
|
||||
progress,
|
||||
4,
|
||||
total_steps,
|
||||
"figures: 13 approved request/session/tool/kvcache plots",
|
||||
)
|
||||
figure_result = render_figures(
|
||||
analysis_dir=output_dir,
|
||||
fig_dir=figure_dir,
|
||||
dataset_title=dataset_name,
|
||||
show_progress=True,
|
||||
)
|
||||
finally:
|
||||
progress.close()
|
||||
print(
|
||||
json.dumps(
|
||||
{
|
||||
"dataset_name": dataset_name,
|
||||
"formatted_path": str(Path(args.input)),
|
||||
"output_dir": str(output_dir),
|
||||
"prepare": prepare_result,
|
||||
"details": {key: str(value) for key, value in advanced_paths.items()},
|
||||
"report": report_result,
|
||||
"figures": figure_result,
|
||||
"release_path": str(release_input_path),
|
||||
},
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
)
|
||||
)
|
||||
return 0
|
||||
|
||||
if args.command in {"resume-details", "resume-advanced"}:
|
||||
input_length_bucket_thresholds = parse_input_length_bucket_thresholds(args.input_length_buckets)
|
||||
release_input_path = _resolve_existing_release_input_path(args.input, args.release_input)
|
||||
if release_input_path is None:
|
||||
raise FileNotFoundError(
|
||||
f"Release trace not found for raw trace {args.input}. "
|
||||
"Run `python -m trace_formatter build-release <raw-trace>` first, or pass --release-input."
|
||||
)
|
||||
paths = run_advanced_from_existing(
|
||||
args.input,
|
||||
release_input_path,
|
||||
args.features,
|
||||
args.output_dir,
|
||||
input_length_bucket_thresholds=input_length_bucket_thresholds,
|
||||
show_progress=True,
|
||||
limit=args.limit,
|
||||
)
|
||||
for path in paths.values():
|
||||
print(path)
|
||||
return 0
|
||||
|
||||
output_dir = resolve_output_dir(args.input, args.output_dir)
|
||||
if args.command == "study" and args.limit is None:
|
||||
input_length_bucket_thresholds = parse_input_length_bucket_thresholds(args.input_length_buckets)
|
||||
reusable = _existing_base_outputs(output_dir)
|
||||
if reusable:
|
||||
release_input_path = _resolve_existing_release_input_path(args.input, None)
|
||||
if release_input_path is not None:
|
||||
paths = _existing_detail_outputs(output_dir)
|
||||
if paths is None:
|
||||
paths = run_advanced_from_existing(
|
||||
args.input,
|
||||
release_input_path,
|
||||
reusable["features"],
|
||||
output_dir,
|
||||
input_length_bucket_thresholds=input_length_bucket_thresholds,
|
||||
show_progress=True,
|
||||
)
|
||||
for path in paths.values():
|
||||
print(path)
|
||||
return 0
|
||||
|
||||
show_progress = args.command == "study"
|
||||
records = load_records(
|
||||
args.input,
|
||||
limit=args.limit,
|
||||
show_progress=show_progress,
|
||||
progress_desc="Load trace",
|
||||
)
|
||||
|
||||
if args.command == "parse":
|
||||
path = write_normalized(records, output_dir, output_format=args.format)
|
||||
print(path)
|
||||
return 0
|
||||
|
||||
features = compute_features(records)
|
||||
if args.command == "features":
|
||||
path = write_features(features, output_dir)
|
||||
print(path)
|
||||
return 0
|
||||
|
||||
if args.command == "study":
|
||||
input_length_bucket_thresholds = parse_input_length_bucket_thresholds(args.input_length_buckets)
|
||||
paths = run_study(
|
||||
records,
|
||||
output_dir,
|
||||
normalized_format=args.normalized_format,
|
||||
source_path=args.input,
|
||||
block_size=args.block_size,
|
||||
segment_mode=args.segment_mode,
|
||||
tokenizer_path=args.tokenizer_path,
|
||||
model_family=args.model_family,
|
||||
model_meta_dir=args.model_meta_dir,
|
||||
input_length_bucket_thresholds=input_length_bucket_thresholds,
|
||||
show_progress=show_progress,
|
||||
tokenizer_batch_size=args.tokenizer_batch_size,
|
||||
)
|
||||
for path in paths.values():
|
||||
print(path)
|
||||
return 0
|
||||
|
||||
normalized_path = write_normalized(records, output_dir, output_format=args.normalized_format)
|
||||
features_path = write_features(features, output_dir)
|
||||
summary_path, report_path = write_report(records, features, output_dir)
|
||||
print(normalized_path)
|
||||
print(features_path)
|
||||
print(summary_path)
|
||||
print(report_path)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
117
trace_analyzer/features.py
Normal file
117
trace_analyzer/features.py
Normal file
@@ -0,0 +1,117 @@
|
||||
from collections import Counter
|
||||
from dataclasses import asdict
|
||||
|
||||
from .helpers import percentile, safe_div
|
||||
from .models import TraceFeatures
|
||||
|
||||
LONG_CONTEXT_THRESHOLD = 32000
|
||||
HIGH_CACHE_THRESHOLD = 0.8
|
||||
TOOL_BURST_THRESHOLD = 4
|
||||
TOOL_LOOP_THRESHOLD = 3
|
||||
|
||||
|
||||
def _transition_count(roles, left, right):
|
||||
return sum(1 for current, nxt in zip(roles, roles[1:]) if current == left and nxt == right)
|
||||
|
||||
|
||||
def _tool_bursts(roles):
|
||||
bursts = []
|
||||
current = 0
|
||||
for role in roles:
|
||||
if role == "tool":
|
||||
current += 1
|
||||
elif current:
|
||||
bursts.append(current)
|
||||
current = 0
|
||||
if current:
|
||||
bursts.append(current)
|
||||
return bursts
|
||||
|
||||
|
||||
def compute_features(records):
|
||||
features = []
|
||||
for record in records:
|
||||
role_counts = Counter(record.role_sequence)
|
||||
bursts = _tool_bursts(record.role_sequence)
|
||||
input_tokens = record.usage.input_tokens
|
||||
output_tokens = record.usage.output_tokens
|
||||
cached_tokens = record.usage.cached_tokens
|
||||
latency_ms = record.meta.total_cost_time_ms
|
||||
cache_hit_ratio = safe_div(cached_tokens, input_tokens)
|
||||
tool_to_tool_count = _transition_count(record.role_sequence, "tool", "tool")
|
||||
feature = TraceFeatures(
|
||||
request_id=record.meta.request_id,
|
||||
session_id=record.meta.session_id,
|
||||
model=record.meta.request_model,
|
||||
status_code=record.meta.status_code,
|
||||
time=record.meta.time,
|
||||
message_count=len(record.messages),
|
||||
conversation_depth=len(record.messages),
|
||||
declared_tool_count=len(record.declared_tools),
|
||||
assistant_msg_count=role_counts.get("assistant", 0),
|
||||
tool_msg_count=role_counts.get("tool", 0),
|
||||
user_msg_count=role_counts.get("user", 0),
|
||||
system_msg_count=role_counts.get("system", 0),
|
||||
assistant_to_tool_count=_transition_count(record.role_sequence, "assistant", "tool"),
|
||||
tool_to_assistant_count=_transition_count(record.role_sequence, "tool", "assistant"),
|
||||
tool_to_tool_count=tool_to_tool_count,
|
||||
assistant_to_user_count=_transition_count(record.role_sequence, "assistant", "user"),
|
||||
user_to_assistant_count=_transition_count(record.role_sequence, "user", "assistant"),
|
||||
max_consecutive_tool_msgs=max(bursts) if bursts else 0,
|
||||
avg_tool_burst_len=safe_div(sum(bursts), len(bursts)) if bursts else 0.0,
|
||||
has_tool_loop=1 if tool_to_tool_count > 0 else 0,
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens,
|
||||
total_tokens=record.usage.total_tokens,
|
||||
reasoning_tokens=record.usage.reasoning_tokens,
|
||||
cached_tokens=cached_tokens,
|
||||
cache_hit_ratio=cache_hit_ratio,
|
||||
uncached_prompt_tokens=max(input_tokens - cached_tokens, 0),
|
||||
output_input_ratio=safe_div(output_tokens, input_tokens),
|
||||
latency_ms=latency_ms,
|
||||
ms_per_input_token=safe_div(latency_ms, input_tokens),
|
||||
ms_per_output_token=safe_div(latency_ms, output_tokens),
|
||||
long_context=1 if input_tokens >= LONG_CONTEXT_THRESHOLD else 0,
|
||||
high_cache=1 if cache_hit_ratio >= HIGH_CACHE_THRESHOLD else 0,
|
||||
tool_burst_alert=1 if (max(bursts) if bursts else 0) >= TOOL_BURST_THRESHOLD else 0,
|
||||
tool_loop_alert=1 if tool_to_tool_count >= TOOL_LOOP_THRESHOLD else 0,
|
||||
)
|
||||
feature.pattern_labels = base_pattern_labels(feature)
|
||||
features.append(feature)
|
||||
|
||||
apply_batch_thresholds(features)
|
||||
return features
|
||||
|
||||
|
||||
def base_pattern_labels(feature):
|
||||
labels = []
|
||||
if feature.tool_msg_count == 0 and feature.declared_tool_count == 0:
|
||||
labels.append("single-shot")
|
||||
if feature.tool_msg_count > 0 and feature.tool_msg_count >= feature.assistant_msg_count:
|
||||
labels.append("tool-heavy")
|
||||
if feature.max_consecutive_tool_msgs >= TOOL_BURST_THRESHOLD:
|
||||
labels.append("tool-burst")
|
||||
if feature.cache_hit_ratio >= HIGH_CACHE_THRESHOLD:
|
||||
labels.append("cache-efficient")
|
||||
if feature.cache_hit_ratio <= 0.1:
|
||||
labels.append("cache-cold")
|
||||
return labels
|
||||
|
||||
|
||||
def apply_batch_thresholds(features):
|
||||
if not features:
|
||||
return
|
||||
latency_p90 = percentile([feature.latency_ms for feature in features], 0.9)
|
||||
for feature in features:
|
||||
feature.slow_request = 1 if feature.latency_ms >= latency_p90 else 0
|
||||
if feature.slow_request and feature.high_cache:
|
||||
feature.pattern_labels.append("slow-despite-cache")
|
||||
if feature.input_tokens >= LONG_CONTEXT_THRESHOLD and feature.cache_hit_ratio <= 0.1:
|
||||
feature.pattern_labels.append("long-context-no-cache")
|
||||
feature.pattern_labels = sorted(set(feature.pattern_labels))
|
||||
|
||||
|
||||
def feature_to_row(feature):
|
||||
row = asdict(feature)
|
||||
row["pattern_labels"] = ";".join(feature.pattern_labels)
|
||||
return row
|
||||
809
trace_analyzer/figures.py
Normal file
809
trace_analyzer/figures.py
Normal file
@@ -0,0 +1,809 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import json
|
||||
from collections import Counter, defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib
|
||||
import numpy as np
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.ticker import MaxNLocator, MultipleLocator
|
||||
|
||||
from trace_analyzer.helpers import percentile, safe_float, safe_int
|
||||
from trace_analyzer.layout import resolve_details_dir
|
||||
|
||||
|
||||
PALETTE = {
|
||||
"blue": "#2B6CB0",
|
||||
"orange": "#DD6B20",
|
||||
"green": "#2F855A",
|
||||
"red": "#C53030",
|
||||
"purple": "#6B46C1",
|
||||
"gray": "#4A5568",
|
||||
"teal": "#0F766E",
|
||||
"gold": "#B7791F",
|
||||
"pink": "#D53F8C",
|
||||
"grid": "#CBD5E0",
|
||||
}
|
||||
|
||||
FIGURE_STEMS = [
|
||||
"01_input_output_length_cdf",
|
||||
"02_session_turns_cdf",
|
||||
"03_request_length_by_turn",
|
||||
"04_request_trigger_role_pie",
|
||||
"05_tool_call_output_length_cdf",
|
||||
"06_tool_call_latency_cdf",
|
||||
"07_consecutive_tool_call_count_cdf",
|
||||
"08_tool_call_added_context_cdf",
|
||||
"09_kvcache_block_reuse_time_cdf",
|
||||
"10_kvcache_block_lifecycle_cdf",
|
||||
"11_alive_kvcache_blocks_timeline",
|
||||
"12_bucket_kvcache_reuse_ratio",
|
||||
"13_session_cross_bucket_kvcache_miss",
|
||||
]
|
||||
|
||||
|
||||
def _ensure_dir(path: Path) -> None:
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def _clear_dir_files(path: Path) -> None:
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
for child in path.iterdir():
|
||||
if child.is_file():
|
||||
child.unlink()
|
||||
|
||||
|
||||
def _apply_style() -> None:
|
||||
plt.rcParams.update(
|
||||
{
|
||||
"figure.figsize": (8.0, 4.8),
|
||||
"figure.dpi": 600,
|
||||
"savefig.dpi": 600,
|
||||
"font.family": "DejaVu Serif",
|
||||
"font.size": 11,
|
||||
"axes.titlesize": 13,
|
||||
"axes.labelsize": 12,
|
||||
"axes.linewidth": 0.9,
|
||||
"xtick.labelsize": 10,
|
||||
"ytick.labelsize": 10,
|
||||
"legend.fontsize": 10,
|
||||
"legend.frameon": False,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _finalize_axes(ax: plt.Axes, *, grid_axis: str = "y") -> None:
|
||||
ax.spines["top"].set_visible(False)
|
||||
ax.spines["right"].set_visible(False)
|
||||
ax.grid(axis=grid_axis, color=PALETTE["grid"], alpha=0.5, linewidth=0.8)
|
||||
ax.tick_params(axis="both", which="major", length=4, width=0.8)
|
||||
|
||||
|
||||
def _save(fig: plt.Figure, fig_dir: Path, stem: str) -> None:
|
||||
fig.savefig(fig_dir / f"{stem}.png", bbox_inches="tight")
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def _read_json(path: Path) -> dict:
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
def _read_csv_rows(path: Path) -> list[dict]:
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
return list(csv.DictReader(handle))
|
||||
|
||||
|
||||
def _load_request_metrics(path: Path) -> list[dict]:
|
||||
rows = []
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
for row in csv.DictReader(handle):
|
||||
rows.append(
|
||||
{
|
||||
"request_id": row.get("request_id", ""),
|
||||
"session_id": row.get("session_id", ""),
|
||||
"turn": safe_int(row.get("turn")),
|
||||
"trigger_group": row.get("trigger_group", "") or "unknown",
|
||||
"input_tokens": safe_int(row.get("input_tokens")),
|
||||
"output_tokens": safe_int(row.get("output_tokens")),
|
||||
"request_ready_time_ms": safe_int(row.get("request_ready_time_ms")),
|
||||
"request_end_time_ms": safe_int(row.get("request_end_time_ms")),
|
||||
"input_length_bucket": row.get("input_length_bucket", ""),
|
||||
"theoretical_prompt_unit_length": safe_int(row.get("theoretical_prompt_unit_length")),
|
||||
"theoretical_prefix_hit_blocks": safe_int(row.get("theoretical_prefix_hit_blocks")),
|
||||
"bucketed_theoretical_prefix_hit_blocks": safe_int(
|
||||
row.get("bucketed_theoretical_prefix_hit_blocks")
|
||||
),
|
||||
}
|
||||
)
|
||||
return rows
|
||||
|
||||
|
||||
def _sort_request_rows(rows: list[dict]) -> list[dict]:
|
||||
return sorted(
|
||||
rows,
|
||||
key=lambda row: (
|
||||
row["request_ready_time_ms"],
|
||||
row["turn"],
|
||||
row["request_id"],
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _build_session_sequences(request_rows: list[dict]) -> dict[str, list[dict]]:
|
||||
sessions = defaultdict(list)
|
||||
for row in request_rows:
|
||||
sessions[row["session_id"]].append(row)
|
||||
for session_rows in sessions.values():
|
||||
session_rows.sort(
|
||||
key=lambda row: (
|
||||
row["request_ready_time_ms"],
|
||||
row["turn"],
|
||||
row["request_id"],
|
||||
)
|
||||
)
|
||||
return sessions
|
||||
|
||||
|
||||
def _build_tool_round_edges(session_rows_by_id: dict[str, list[dict]]) -> list[dict]:
|
||||
edges = []
|
||||
for session_id, session_rows in session_rows_by_id.items():
|
||||
for previous, current in zip(session_rows, session_rows[1:]):
|
||||
if current["trigger_group"] != "tool":
|
||||
continue
|
||||
edges.append(
|
||||
{
|
||||
"session_id": session_id,
|
||||
"prev_request_id": previous["request_id"],
|
||||
"next_request_id": current["request_id"],
|
||||
"tool_call_output_tokens": previous["output_tokens"],
|
||||
"tool_call_latency_ms": max(
|
||||
current["request_ready_time_ms"] - previous["request_end_time_ms"],
|
||||
0,
|
||||
),
|
||||
"added_context_tokens": max(
|
||||
current["input_tokens"] - previous["output_tokens"],
|
||||
0,
|
||||
),
|
||||
}
|
||||
)
|
||||
return edges
|
||||
|
||||
|
||||
def _ecdf(values: list[float]) -> tuple[np.ndarray, np.ndarray]:
|
||||
arr = np.asarray([value for value in values if value is not None], dtype=float)
|
||||
arr = np.sort(arr)
|
||||
if arr.size == 0:
|
||||
return arr, arr
|
||||
xs, counts = np.unique(arr, return_counts=True)
|
||||
ys = np.cumsum(counts, dtype=float) / arr.size
|
||||
return xs, ys
|
||||
|
||||
|
||||
def _ecdf_from_weighted_rows(rows: list[dict], *, value_key: str, count_key: str) -> tuple[np.ndarray, np.ndarray]:
|
||||
weighted = sorted(
|
||||
(
|
||||
safe_float(row[value_key]),
|
||||
safe_int(row[count_key]),
|
||||
)
|
||||
for row in rows
|
||||
if safe_int(row.get(count_key)) > 0
|
||||
)
|
||||
total = sum(count for _, count in weighted)
|
||||
if total <= 0:
|
||||
return np.asarray([]), np.asarray([])
|
||||
xs = np.asarray([value for value, _ in weighted], dtype=float)
|
||||
ys = np.asarray(np.cumsum([count for _, count in weighted], dtype=float) / total, dtype=float)
|
||||
return xs, ys
|
||||
|
||||
|
||||
def _stats(values: list[float], labels: tuple[str, ...]) -> dict[str, float]:
|
||||
cleaned = [value for value in values if value is not None]
|
||||
if not cleaned:
|
||||
return {label: 0.0 for label in labels}
|
||||
mapping = {"mean": float(np.mean(cleaned))}
|
||||
for label in labels:
|
||||
if label == "mean":
|
||||
continue
|
||||
mapping[label] = percentile(cleaned, int(label[1:]) / 100)
|
||||
return mapping
|
||||
|
||||
|
||||
def _weighted_stats(rows: list[dict], *, value_key: str, count_key: str, labels: tuple[str, ...]) -> dict[str, float]:
|
||||
weighted = sorted(
|
||||
(
|
||||
safe_float(row[value_key]),
|
||||
safe_int(row[count_key]),
|
||||
)
|
||||
for row in rows
|
||||
if safe_int(row.get(count_key)) > 0
|
||||
)
|
||||
total = sum(count for _, count in weighted)
|
||||
if total <= 0:
|
||||
return {label: 0.0 for label in labels}
|
||||
result = {}
|
||||
weighted_sum = sum(value * count for value, count in weighted)
|
||||
result["mean"] = weighted_sum / total
|
||||
for label in labels:
|
||||
if label == "mean":
|
||||
continue
|
||||
target = int(label[1:]) / 100 * total
|
||||
seen = 0
|
||||
value_at_target = weighted[-1][0]
|
||||
for value, count in weighted:
|
||||
seen += count
|
||||
if seen >= target:
|
||||
value_at_target = value
|
||||
break
|
||||
result[label] = value_at_target
|
||||
return result
|
||||
|
||||
|
||||
def _format_stat_text(title: str, stats: dict[str, float], labels: tuple[str, ...]) -> str:
|
||||
parts = [title]
|
||||
for label in labels:
|
||||
value = stats.get(label, 0.0)
|
||||
if abs(value - round(value)) < 1e-6:
|
||||
parts.append(f"{label}={int(round(value))}")
|
||||
else:
|
||||
parts.append(f"{label}={value:.2f}")
|
||||
return " ".join(parts)
|
||||
|
||||
|
||||
def _add_footer(fig: plt.Figure, lines: list[str]) -> None:
|
||||
fig.subplots_adjust(bottom=0.24)
|
||||
y = 0.06
|
||||
for line in lines:
|
||||
fig.text(0.5, y, line, ha="center", va="bottom", fontsize=9.5)
|
||||
y -= 0.035
|
||||
|
||||
|
||||
def _plot_two_series_cdf_with_zoom(
|
||||
fig_dir: Path,
|
||||
*,
|
||||
stem: str,
|
||||
title: str,
|
||||
xlabel: str,
|
||||
first_label: str,
|
||||
first_values: list[float],
|
||||
first_color: str,
|
||||
second_label: str,
|
||||
second_values: list[float],
|
||||
second_color: str,
|
||||
zoom_quantile: float,
|
||||
stats_labels: tuple[str, ...],
|
||||
) -> None:
|
||||
first_xs, first_ys = _ecdf(first_values)
|
||||
second_xs, second_ys = _ecdf(second_values)
|
||||
zoom_max = max(
|
||||
percentile(first_values, zoom_quantile) if first_values else 0.0,
|
||||
percentile(second_values, zoom_quantile) if second_values else 0.0,
|
||||
)
|
||||
|
||||
fig, axes = plt.subplots(1, 2, figsize=(12.4, 4.8))
|
||||
for ax, subtitle in zip(axes, ["Full Range", f"Zoom: <= p{int(zoom_quantile * 100)}"]):
|
||||
ax.step(first_xs, first_ys, where="post", linewidth=2.2, color=first_color, label=first_label)
|
||||
ax.step(second_xs, second_ys, where="post", linewidth=2.2, color=second_color, label=second_label)
|
||||
ax.set_title(subtitle)
|
||||
ax.set_xlabel(xlabel)
|
||||
ax.set_ylabel("CDF")
|
||||
_finalize_axes(ax)
|
||||
axes[1].set_xlim(0, zoom_max if zoom_max > 0 else 1)
|
||||
axes[0].legend(loc="lower right")
|
||||
fig.suptitle(title, y=0.98)
|
||||
_add_footer(
|
||||
fig,
|
||||
[
|
||||
_format_stat_text(first_label, _stats(first_values, stats_labels), stats_labels),
|
||||
_format_stat_text(second_label, _stats(second_values, stats_labels), stats_labels),
|
||||
],
|
||||
)
|
||||
_save(fig, fig_dir, stem)
|
||||
|
||||
|
||||
def _plot_single_cdf(
|
||||
fig_dir: Path,
|
||||
*,
|
||||
stem: str,
|
||||
title: str,
|
||||
xlabel: str,
|
||||
label: str,
|
||||
values: list[float] | None = None,
|
||||
weighted_rows: list[dict] | None = None,
|
||||
weighted_value_key: str | None = None,
|
||||
weighted_count_key: str | None = None,
|
||||
color: str = PALETTE["blue"],
|
||||
zoom_quantile: float | None = None,
|
||||
stats_labels: tuple[str, ...] = ("mean", "p50", "p90", "p95", "p99"),
|
||||
) -> None:
|
||||
values = values or []
|
||||
weighted_rows = weighted_rows or []
|
||||
if weighted_rows:
|
||||
xs, ys = _ecdf_from_weighted_rows(
|
||||
weighted_rows,
|
||||
value_key=weighted_value_key,
|
||||
count_key=weighted_count_key,
|
||||
)
|
||||
stats = _weighted_stats(
|
||||
weighted_rows,
|
||||
value_key=weighted_value_key,
|
||||
count_key=weighted_count_key,
|
||||
labels=stats_labels,
|
||||
)
|
||||
zoom_max = stats.get(f"p{int(zoom_quantile * 100)}", 0.0) if zoom_quantile is not None else 0.0
|
||||
else:
|
||||
xs, ys = _ecdf(values)
|
||||
stats = _stats(values, stats_labels)
|
||||
zoom_max = percentile(values, zoom_quantile) if zoom_quantile is not None and values else 0.0
|
||||
|
||||
panel_count = 2 if zoom_quantile is not None else 1
|
||||
fig, axes = plt.subplots(1, panel_count, figsize=(12.4, 4.8) if panel_count == 2 else (8.2, 4.8))
|
||||
if panel_count == 1:
|
||||
axes = [axes]
|
||||
axes[0].step(xs, ys, where="post", linewidth=2.2, color=color)
|
||||
axes[0].set_title("Full Range")
|
||||
axes[0].set_xlabel(xlabel)
|
||||
axes[0].set_ylabel("CDF")
|
||||
_finalize_axes(axes[0])
|
||||
if panel_count == 2:
|
||||
axes[1].step(xs, ys, where="post", linewidth=2.2, color=color)
|
||||
axes[1].set_title(f"Zoom: <= p{int(zoom_quantile * 100)}")
|
||||
axes[1].set_xlabel(xlabel)
|
||||
axes[1].set_ylabel("CDF")
|
||||
axes[1].set_xlim(0, zoom_max if zoom_max > 0 else 1)
|
||||
_finalize_axes(axes[1])
|
||||
fig.suptitle(title, y=0.98)
|
||||
_add_footer(fig, [_format_stat_text(label, stats, stats_labels)])
|
||||
_save(fig, fig_dir, stem)
|
||||
|
||||
|
||||
def _plot_session_turns_cdf(fig_dir: Path, request_rows: list[dict]) -> None:
|
||||
session_sizes = Counter(row["session_id"] for row in request_rows)
|
||||
values = list(session_sizes.values())
|
||||
xs, ys = _ecdf(values)
|
||||
max_turn = max(values) if values else 1
|
||||
zoom_max = max(int(np.ceil(max_turn * 0.10)), 1)
|
||||
|
||||
fig, axes = plt.subplots(1, 2, figsize=(12.4, 4.8))
|
||||
for ax, subtitle in zip(axes, ["Full Range", f"Zoom: <= {zoom_max} turns (first 10% of max turn)"]):
|
||||
ax.step(xs, ys, where="post", linewidth=2.2, color=PALETTE["green"])
|
||||
ax.set_title(subtitle)
|
||||
ax.set_xlabel("Turns per session")
|
||||
ax.set_ylabel("CDF")
|
||||
_finalize_axes(ax)
|
||||
axes[1].set_xlim(0.5, zoom_max + 0.5)
|
||||
fig.suptitle("Session Turns CDF", y=0.98)
|
||||
_add_footer(
|
||||
fig,
|
||||
[
|
||||
_format_stat_text(
|
||||
"Session turns",
|
||||
_stats(values, ("mean", "p50", "p90", "p95", "p99")),
|
||||
("mean", "p50", "p90", "p95", "p99"),
|
||||
)
|
||||
],
|
||||
)
|
||||
_save(fig, fig_dir, "02_session_turns_cdf")
|
||||
|
||||
|
||||
def _plot_request_length_by_turn(fig_dir: Path, request_rows: list[dict]) -> None:
|
||||
values_by_turn = defaultdict(list)
|
||||
for row in request_rows:
|
||||
if row["turn"] > 0:
|
||||
values_by_turn[row["turn"]].append(row["input_tokens"])
|
||||
turns = sorted(values_by_turn)
|
||||
mean_values = [float(np.mean(values_by_turn[turn])) for turn in turns]
|
||||
p50_values = [percentile(values_by_turn[turn], 0.50) for turn in turns]
|
||||
p99_values = [percentile(values_by_turn[turn], 0.99) for turn in turns]
|
||||
|
||||
fig, ax = plt.subplots(figsize=(8.6, 4.8))
|
||||
ax.plot(turns, mean_values, color=PALETTE["blue"], linewidth=2.0, label="mean")
|
||||
ax.plot(turns, p50_values, color=PALETTE["orange"], linewidth=2.0, label="p50")
|
||||
ax.plot(turns, p99_values, color=PALETTE["red"], linewidth=2.0, label="p99")
|
||||
ax.set_title("Request Input Length by Turn")
|
||||
ax.set_xlabel("Turn")
|
||||
ax.set_ylabel("Input tokens")
|
||||
ax.legend(loc="upper left")
|
||||
ax.xaxis.set_major_locator(MaxNLocator(nbins=12, integer=True))
|
||||
plt.setp(ax.get_xticklabels(), rotation=20, ha="right")
|
||||
_finalize_axes(ax)
|
||||
fig.tight_layout()
|
||||
_save(fig, fig_dir, "03_request_length_by_turn")
|
||||
|
||||
|
||||
def _plot_trigger_role_pie(fig_dir: Path, request_rows: list[dict]) -> None:
|
||||
label_order = ["user", "tool", "assistant"]
|
||||
color_by_label = {
|
||||
"user": PALETTE["orange"],
|
||||
"tool": PALETTE["green"],
|
||||
"assistant": PALETTE["blue"],
|
||||
}
|
||||
counts = Counter(row["trigger_group"] for row in request_rows)
|
||||
labels = [label for label in label_order if counts[label] > 0]
|
||||
values = [counts[label] for label in labels]
|
||||
colors = [color_by_label[label] for label in labels]
|
||||
|
||||
def _autopct(pct):
|
||||
total = sum(values)
|
||||
count = int(round(pct * total / 100.0))
|
||||
return f"{pct:.1f}%\n({count})"
|
||||
|
||||
fig, ax = plt.subplots(figsize=(9.0, 5.8))
|
||||
wedges, _texts, autotexts = ax.pie(
|
||||
values,
|
||||
autopct=_autopct,
|
||||
startangle=90,
|
||||
colors=colors,
|
||||
wedgeprops={"linewidth": 0.8, "edgecolor": "white"},
|
||||
textprops={"fontsize": 9},
|
||||
)
|
||||
for autotext in autotexts:
|
||||
autotext.set_fontsize(8.5)
|
||||
ax.legend(
|
||||
wedges,
|
||||
[f"{label} ({counts[label]:,})" for label in labels],
|
||||
title="Trigger source",
|
||||
loc="center left",
|
||||
bbox_to_anchor=(1.02, 0.5),
|
||||
)
|
||||
ax.set_title("Request Trigger Role Proportion")
|
||||
fig.tight_layout()
|
||||
_save(fig, fig_dir, "04_request_trigger_role_pie")
|
||||
|
||||
|
||||
def _plot_session_gap_cdf(fig_dir: Path, session_rows_by_id: dict[str, list[dict]]) -> None:
|
||||
ready_gaps = []
|
||||
end_ready_gaps = []
|
||||
for session_rows in session_rows_by_id.values():
|
||||
for previous, current in zip(session_rows, session_rows[1:]):
|
||||
ready_gaps.append(max(current["request_ready_time_ms"] - previous["request_ready_time_ms"], 0))
|
||||
end_ready_gaps.append(max(current["request_ready_time_ms"] - previous["request_end_time_ms"], 0))
|
||||
_plot_two_series_cdf_with_zoom(
|
||||
fig_dir,
|
||||
stem="session_inter_request_gap_cdf",
|
||||
title="Session Inter-Request Gap CDF",
|
||||
xlabel="Milliseconds",
|
||||
first_label="ready->ready",
|
||||
first_values=ready_gaps,
|
||||
first_color=PALETTE["purple"],
|
||||
second_label="end->ready",
|
||||
second_values=end_ready_gaps,
|
||||
second_color=PALETTE["gray"],
|
||||
zoom_quantile=0.90,
|
||||
stats_labels=("mean", "p50", "p90", "p95", "p99"),
|
||||
)
|
||||
|
||||
|
||||
def _plot_consecutive_tool_calls_cdf(fig_dir: Path, session_rows_by_id: dict[str, list[dict]]) -> None:
|
||||
values = []
|
||||
for session_rows in session_rows_by_id.values():
|
||||
for index, row in enumerate(session_rows):
|
||||
if row["trigger_group"] != "user":
|
||||
continue
|
||||
count = 0
|
||||
next_index = index + 1
|
||||
while next_index < len(session_rows) and session_rows[next_index]["trigger_group"] == "tool":
|
||||
count += 1
|
||||
next_index += 1
|
||||
values.append(count)
|
||||
_plot_single_cdf(
|
||||
fig_dir,
|
||||
stem="07_consecutive_tool_call_count_cdf",
|
||||
title="Consecutive Tool Calls After One User Input",
|
||||
xlabel="Consecutive tool-triggered rounds",
|
||||
label="Consecutive tool calls",
|
||||
values=values,
|
||||
color=PALETTE["green"],
|
||||
)
|
||||
|
||||
|
||||
def _plot_alive_kvcache_timeline(fig_dir: Path, timeline_rows: list[dict]) -> None:
|
||||
fig, ax = plt.subplots(figsize=(10.2, 4.8))
|
||||
if timeline_rows:
|
||||
base_ts = safe_int(timeline_rows[0]["timestamp_ms"])
|
||||
else:
|
||||
base_ts = 0
|
||||
xs = [
|
||||
max(safe_int(row["timestamp_ms"]) - base_ts, 0) / 60000.0
|
||||
for row in timeline_rows
|
||||
]
|
||||
ys = [safe_int(row["alive_block_count"]) for row in timeline_rows]
|
||||
ax.step(xs, ys, where="post", color=PALETTE["purple"], linewidth=1.8)
|
||||
ax.set_title("Alive KV-Cache Blocks Over Time")
|
||||
ax.set_xlabel("Elapsed time (minutes)")
|
||||
ax.set_ylabel("Alive block count")
|
||||
ax.xaxis.set_major_locator(MultipleLocator(10))
|
||||
plt.setp(ax.get_xticklabels(), rotation=20, ha="right")
|
||||
_finalize_axes(ax)
|
||||
fig.tight_layout()
|
||||
_save(fig, fig_dir, "11_alive_kvcache_blocks_timeline")
|
||||
|
||||
|
||||
def _plot_bucket_reuse_ratio(fig_dir: Path, request_rows: list[dict]) -> None:
|
||||
by_bucket = defaultdict(lambda: {"prompt_blocks": 0, "reused_blocks": 0})
|
||||
total_prompt_blocks = 0
|
||||
total_reused_blocks = 0
|
||||
for row in request_rows:
|
||||
bucket = row["input_length_bucket"] or "unknown"
|
||||
prompt_blocks = row["theoretical_prompt_unit_length"]
|
||||
reused_blocks = row["bucketed_theoretical_prefix_hit_blocks"]
|
||||
by_bucket[bucket]["prompt_blocks"] += prompt_blocks
|
||||
by_bucket[bucket]["reused_blocks"] += reused_blocks
|
||||
total_prompt_blocks += prompt_blocks
|
||||
total_reused_blocks += row["theoretical_prefix_hit_blocks"]
|
||||
|
||||
labels = list(by_bucket)
|
||||
ratios = [
|
||||
(by_bucket[label]["reused_blocks"] / by_bucket[label]["prompt_blocks"])
|
||||
if by_bucket[label]["prompt_blocks"]
|
||||
else 0.0
|
||||
for label in labels
|
||||
]
|
||||
reused_counts = [by_bucket[label]["reused_blocks"] for label in labels]
|
||||
labels.append("Overall")
|
||||
ratios.append((total_reused_blocks / total_prompt_blocks) if total_prompt_blocks else 0.0)
|
||||
reused_counts.append(total_reused_blocks)
|
||||
|
||||
fig, ax = plt.subplots(figsize=(9.2, 4.8))
|
||||
bars = ax.bar(
|
||||
labels,
|
||||
ratios,
|
||||
color=[PALETTE["blue"], PALETTE["orange"], PALETTE["green"], PALETTE["purple"], PALETTE["teal"]][: len(labels)],
|
||||
width=0.68,
|
||||
edgecolor="white",
|
||||
linewidth=0.8,
|
||||
)
|
||||
for bar, ratio, reused_count in zip(bars, ratios, reused_counts):
|
||||
ax.text(
|
||||
bar.get_x() + bar.get_width() / 2,
|
||||
ratio + max(ratios + [0.0]) * 0.03 + 1e-9,
|
||||
f"{ratio:.2%}\nreused={reused_count:,}",
|
||||
ha="center",
|
||||
va="bottom",
|
||||
fontsize=8.8,
|
||||
)
|
||||
ax.set_title("Bucketed KV-Cache Reuse Ratio vs Global Reuse Ratio")
|
||||
ax.set_xlabel("Input-length bucket")
|
||||
ax.set_ylabel("Reuse ratio")
|
||||
ax.set_ylim(0, max(ratios + [0.0]) * 1.25 + 1e-9)
|
||||
_finalize_axes(ax)
|
||||
fig.tight_layout()
|
||||
_save(fig, fig_dir, "12_bucket_kvcache_reuse_ratio")
|
||||
|
||||
|
||||
def _plot_session_cross_bucket_miss(fig_dir: Path, rows: list[dict]) -> None:
|
||||
labels = [row["bucket"] for row in rows]
|
||||
miss_ratios = [safe_float(row["cross_bucket_edge_fraction"]) for row in rows]
|
||||
loss_ratios = [safe_float(row["reduced_reused_blocks_ratio"]) for row in rows]
|
||||
miss_blocks = [safe_int(row["cross_bucket_shared_prefix_units_sum"]) for row in rows]
|
||||
x = np.arange(len(labels))
|
||||
width = 0.36
|
||||
|
||||
fig, ax = plt.subplots(figsize=(9.2, 4.8))
|
||||
left = ax.bar(x - width / 2, miss_ratios, width=width, color=PALETTE["red"], label="cross-bucket miss ratio")
|
||||
right = ax.bar(
|
||||
x + width / 2,
|
||||
loss_ratios,
|
||||
width=width,
|
||||
color=PALETTE["gold"],
|
||||
label="reduced reused blocks / bucket reuse",
|
||||
)
|
||||
y_pad = max(miss_ratios + loss_ratios + [0.0]) * 0.03 + 1e-9
|
||||
for bar, value, count in zip(left, miss_ratios, miss_blocks):
|
||||
ax.text(
|
||||
bar.get_x() + bar.get_width() / 2,
|
||||
value + y_pad,
|
||||
f"{value:.2%}\nmiss={count:,}",
|
||||
ha="center",
|
||||
va="bottom",
|
||||
fontsize=8.8,
|
||||
)
|
||||
for bar, value in zip(right, loss_ratios):
|
||||
ax.text(
|
||||
bar.get_x() + bar.get_width() / 2,
|
||||
value + y_pad,
|
||||
f"{value:.2%}",
|
||||
ha="center",
|
||||
va="bottom",
|
||||
fontsize=8.8,
|
||||
)
|
||||
ax.set_xticks(x)
|
||||
ax.set_xticklabels(labels)
|
||||
ax.set_title("Session Cross-Bucket KV-Cache Miss and Reuse Loss")
|
||||
ax.set_xlabel("Child bucket")
|
||||
ax.set_ylabel("Ratio")
|
||||
ax.legend(loc="upper left")
|
||||
ax.set_ylim(0, max(miss_ratios + loss_ratios + [0.0]) * 1.25 + 1e-9)
|
||||
_finalize_axes(ax)
|
||||
fig.tight_layout()
|
||||
_save(fig, fig_dir, "13_session_cross_bucket_kvcache_miss")
|
||||
|
||||
|
||||
def _write_manifest(fig_dir: Path, manifest: dict) -> None:
|
||||
(fig_dir / "manifest.json").write_text(json.dumps(manifest, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
|
||||
def _write_readme(fig_dir: Path, dataset_title: str) -> None:
|
||||
lines = [
|
||||
f"# {dataset_title}",
|
||||
"",
|
||||
"This directory contains the PNG figures rendered from `details/` data.",
|
||||
"",
|
||||
"Figures:",
|
||||
]
|
||||
for stem in FIGURE_STEMS:
|
||||
lines.append(f"- `{stem}.png`")
|
||||
lines.append("- `session_inter_request_gap_cdf.png`")
|
||||
(fig_dir / "README.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
|
||||
|
||||
|
||||
def render_figures(
|
||||
*,
|
||||
analysis_dir: str | Path,
|
||||
fig_dir: str | Path,
|
||||
dataset_title: str,
|
||||
show_progress: bool = False,
|
||||
) -> dict:
|
||||
analysis_root = Path(analysis_dir)
|
||||
fig_root = Path(fig_dir)
|
||||
details_root = resolve_details_dir(analysis_root)
|
||||
_clear_dir_files(fig_root)
|
||||
_apply_style()
|
||||
|
||||
request_rows = _load_request_metrics(details_root / "request_metrics.csv")
|
||||
request_rows = _sort_request_rows(request_rows)
|
||||
session_rows_by_id = _build_session_sequences(request_rows)
|
||||
tool_round_edges = _build_tool_round_edges(session_rows_by_id)
|
||||
reuse_gap_rows = _read_csv_rows(details_root / "theoretical_block_reuse_gaps.csv")
|
||||
block_lifetime_rows = _read_csv_rows(details_root / "theoretical_block_lifetimes.csv")
|
||||
timeline_rows = _read_csv_rows(details_root / "theoretical_alive_block_timeline.csv")
|
||||
session_bucket_rows = _read_csv_rows(details_root / "session_bucket_boundary_miss.csv")
|
||||
details_summary = _read_json(details_root / "details_summary.json")
|
||||
|
||||
progress = tqdm(
|
||||
total=len(FIGURE_STEMS) + 1,
|
||||
desc="Render figures",
|
||||
unit="artifact",
|
||||
dynamic_ncols=True,
|
||||
disable=not show_progress,
|
||||
)
|
||||
|
||||
if show_progress:
|
||||
progress.set_postfix(current="01_input_output_length_cdf")
|
||||
_plot_two_series_cdf_with_zoom(
|
||||
fig_root,
|
||||
stem="01_input_output_length_cdf",
|
||||
title="Input / Output Length CDF",
|
||||
xlabel="Tokens",
|
||||
first_label="Input",
|
||||
first_values=[row["input_tokens"] for row in request_rows],
|
||||
first_color=PALETTE["blue"],
|
||||
second_label="Output",
|
||||
second_values=[row["output_tokens"] for row in request_rows],
|
||||
second_color=PALETTE["orange"],
|
||||
zoom_quantile=0.80,
|
||||
stats_labels=("mean", "p50", "p80", "p90", "p95", "p99"),
|
||||
)
|
||||
if show_progress:
|
||||
progress.update(1)
|
||||
progress.set_postfix(current="02_session_turns_cdf")
|
||||
_plot_session_turns_cdf(fig_root, request_rows)
|
||||
if show_progress:
|
||||
progress.update(1)
|
||||
progress.set_postfix(current="03_request_length_by_turn")
|
||||
_plot_request_length_by_turn(fig_root, request_rows)
|
||||
if show_progress:
|
||||
progress.update(1)
|
||||
progress.set_postfix(current="04_request_trigger_role_pie")
|
||||
_plot_trigger_role_pie(fig_root, request_rows)
|
||||
if show_progress:
|
||||
progress.update(1)
|
||||
progress.set_postfix(current="05_tool_call_output_length_cdf")
|
||||
_plot_single_cdf(
|
||||
fig_root,
|
||||
stem="05_tool_call_output_length_cdf",
|
||||
title="Tool Call Output Length CDF",
|
||||
xlabel="Output tokens",
|
||||
label="Tool-call output length",
|
||||
values=[row["tool_call_output_tokens"] for row in tool_round_edges],
|
||||
color=PALETTE["teal"],
|
||||
zoom_quantile=0.90,
|
||||
)
|
||||
if show_progress:
|
||||
progress.update(1)
|
||||
progress.set_postfix(current="06_tool_call_latency_cdf")
|
||||
_plot_single_cdf(
|
||||
fig_root,
|
||||
stem="06_tool_call_latency_cdf",
|
||||
title="Tool Call Latency CDF",
|
||||
xlabel="Milliseconds",
|
||||
label="Tool-call latency",
|
||||
values=[row["tool_call_latency_ms"] for row in tool_round_edges],
|
||||
color=PALETTE["red"],
|
||||
zoom_quantile=0.90,
|
||||
)
|
||||
if show_progress:
|
||||
progress.update(1)
|
||||
progress.set_postfix(current="07_consecutive_tool_call_count_cdf")
|
||||
_plot_consecutive_tool_calls_cdf(fig_root, session_rows_by_id)
|
||||
if show_progress:
|
||||
progress.update(1)
|
||||
progress.set_postfix(current="08_tool_call_added_context_cdf")
|
||||
_plot_single_cdf(
|
||||
fig_root,
|
||||
stem="08_tool_call_added_context_cdf",
|
||||
title="Added Context After Tool Call CDF",
|
||||
xlabel="Added context tokens",
|
||||
label="Added context",
|
||||
values=[row["added_context_tokens"] for row in tool_round_edges],
|
||||
color=PALETTE["purple"],
|
||||
)
|
||||
if show_progress:
|
||||
progress.update(1)
|
||||
progress.set_postfix(current="09_kvcache_block_reuse_time_cdf")
|
||||
_plot_single_cdf(
|
||||
fig_root,
|
||||
stem="09_kvcache_block_reuse_time_cdf",
|
||||
title="KV-Cache Block Reuse Time CDF",
|
||||
xlabel="Milliseconds",
|
||||
label="Reuse time",
|
||||
weighted_rows=reuse_gap_rows,
|
||||
weighted_value_key="reuse_gap_ms",
|
||||
weighted_count_key="count",
|
||||
color=PALETTE["gold"],
|
||||
zoom_quantile=0.90,
|
||||
)
|
||||
if show_progress:
|
||||
progress.update(1)
|
||||
progress.set_postfix(current="10_kvcache_block_lifecycle_cdf")
|
||||
_plot_single_cdf(
|
||||
fig_root,
|
||||
stem="10_kvcache_block_lifecycle_cdf",
|
||||
title="KV-Cache Block Lifecycle CDF",
|
||||
xlabel="Milliseconds",
|
||||
label="Block lifecycle",
|
||||
values=[safe_int(row["lifetime_ms"]) for row in block_lifetime_rows],
|
||||
color=PALETTE["gray"],
|
||||
)
|
||||
if show_progress:
|
||||
progress.update(1)
|
||||
progress.set_postfix(current="11_alive_kvcache_blocks_timeline")
|
||||
_plot_alive_kvcache_timeline(fig_root, timeline_rows)
|
||||
if show_progress:
|
||||
progress.update(1)
|
||||
progress.set_postfix(current="12_bucket_kvcache_reuse_ratio")
|
||||
_plot_bucket_reuse_ratio(fig_root, request_rows)
|
||||
if show_progress:
|
||||
progress.update(1)
|
||||
progress.set_postfix(current="13_session_cross_bucket_kvcache_miss")
|
||||
_plot_session_cross_bucket_miss(fig_root, session_bucket_rows)
|
||||
_plot_session_gap_cdf(fig_root, session_rows_by_id)
|
||||
|
||||
if show_progress:
|
||||
progress.update(1)
|
||||
progress.set_postfix(current="manifest.json + README.md")
|
||||
|
||||
manifest = {
|
||||
"dataset_title": dataset_title,
|
||||
"figure_count": len(FIGURE_STEMS),
|
||||
"analysis_dir": str(analysis_root),
|
||||
"request_count": details_summary.get("request_count", 0),
|
||||
"global_reuse_ratio": details_summary.get("global_reuse_ratio", 0.0),
|
||||
"figures": [f"{stem}.png" for stem in FIGURE_STEMS],
|
||||
"extra_figures": ["session_inter_request_gap_cdf.png"],
|
||||
}
|
||||
_write_manifest(fig_root, manifest)
|
||||
_write_readme(fig_root, dataset_title)
|
||||
if show_progress:
|
||||
progress.update(1)
|
||||
progress.close()
|
||||
|
||||
return {
|
||||
"fig_dir": str(fig_root),
|
||||
"manifest_path": str(fig_root / "manifest.json"),
|
||||
"readme_path": str(fig_root / "README.md"),
|
||||
}
|
||||
78
trace_analyzer/helpers.py
Normal file
78
trace_analyzer/helpers.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import json
|
||||
from statistics import mean, median
|
||||
|
||||
|
||||
def parse_jsonish(value):
|
||||
"""Parse nested JSON strings until a non-string value is reached."""
|
||||
current = value
|
||||
while isinstance(current, str):
|
||||
text = current.strip()
|
||||
if not text:
|
||||
return current
|
||||
try:
|
||||
current = json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
return current
|
||||
return current
|
||||
|
||||
|
||||
def safe_int(value, default=0):
|
||||
if value is None or value == "":
|
||||
return default
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def safe_float(value, default=0.0):
|
||||
if value is None or value == "":
|
||||
return default
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def percentile(values, pct):
|
||||
if not values:
|
||||
return 0.0
|
||||
ordered = sorted(values)
|
||||
if len(ordered) == 1:
|
||||
return float(ordered[0])
|
||||
rank = pct * (len(ordered) - 1)
|
||||
low = int(rank)
|
||||
high = min(low + 1, len(ordered) - 1)
|
||||
fraction = rank - low
|
||||
return ordered[low] + (ordered[high] - ordered[low]) * fraction
|
||||
|
||||
|
||||
def series_stats(values):
|
||||
cleaned = [v for v in values if v is not None]
|
||||
if not cleaned:
|
||||
return {
|
||||
"count": 0,
|
||||
"min": 0,
|
||||
"max": 0,
|
||||
"mean": 0.0,
|
||||
"median": 0.0,
|
||||
"p90": 0.0,
|
||||
}
|
||||
return {
|
||||
"count": len(cleaned),
|
||||
"min": min(cleaned),
|
||||
"max": max(cleaned),
|
||||
"mean": mean(cleaned),
|
||||
"median": median(cleaned),
|
||||
"p90": percentile(cleaned, 0.9),
|
||||
}
|
||||
|
||||
|
||||
def safe_div(numerator, denominator):
|
||||
if not denominator:
|
||||
return 0.0
|
||||
return numerator / denominator
|
||||
|
||||
|
||||
def compact_json(data):
|
||||
return json.dumps(data, ensure_ascii=False, separators=(",", ":"))
|
||||
76
trace_analyzer/layout.py
Normal file
76
trace_analyzer/layout.py
Normal file
@@ -0,0 +1,76 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
import json
|
||||
|
||||
|
||||
DETAILS_DIR_NAME = "details"
|
||||
LEGACY_DETAILS_DIR_NAME = "advanced"
|
||||
DETAILS_SUMMARY_FILENAME = "details_summary.json"
|
||||
LEGACY_DETAILS_SUMMARY_FILENAME = "advanced_summary.json"
|
||||
|
||||
|
||||
def preferred_details_dir(output_dir: str | Path) -> Path:
|
||||
return Path(output_dir) / DETAILS_DIR_NAME
|
||||
|
||||
|
||||
def legacy_details_dir(output_dir: str | Path) -> Path:
|
||||
return Path(output_dir) / LEGACY_DETAILS_DIR_NAME
|
||||
|
||||
|
||||
def resolve_existing_details_dir(output_dir: str | Path) -> Path | None:
|
||||
preferred = preferred_details_dir(output_dir)
|
||||
if _details_dir_has_outputs(preferred):
|
||||
return preferred
|
||||
legacy = legacy_details_dir(output_dir)
|
||||
if _details_dir_has_outputs(legacy):
|
||||
return legacy
|
||||
if preferred.exists():
|
||||
return preferred
|
||||
if legacy.exists():
|
||||
return legacy
|
||||
return None
|
||||
|
||||
|
||||
def resolve_details_dir(output_dir: str | Path) -> Path:
|
||||
existing = resolve_existing_details_dir(output_dir)
|
||||
if existing is not None:
|
||||
return existing
|
||||
return preferred_details_dir(output_dir)
|
||||
|
||||
|
||||
def resolve_details_summary_path(output_dir: str | Path) -> Path | None:
|
||||
for details_dir in [preferred_details_dir(output_dir), legacy_details_dir(output_dir)]:
|
||||
for filename in [DETAILS_SUMMARY_FILENAME, LEGACY_DETAILS_SUMMARY_FILENAME]:
|
||||
path = details_dir / filename
|
||||
if path.exists():
|
||||
return path
|
||||
return None
|
||||
|
||||
|
||||
def details_outputs_exist(output_dir: str | Path) -> bool:
|
||||
return _details_dir_has_outputs(preferred_details_dir(output_dir)) or _details_dir_has_outputs(
|
||||
legacy_details_dir(output_dir)
|
||||
)
|
||||
|
||||
|
||||
def _details_dir_has_outputs(details_dir: Path) -> bool:
|
||||
if not details_dir.exists():
|
||||
return False
|
||||
required_files = [
|
||||
details_dir / "request_metrics.csv",
|
||||
details_dir / "theoretical_block_reuse_gaps.csv",
|
||||
details_dir / "theoretical_block_lifetimes.csv",
|
||||
details_dir / "theoretical_alive_block_timeline.csv",
|
||||
details_dir / "session_bucket_boundary_miss.csv",
|
||||
]
|
||||
if not all(path.exists() for path in required_files):
|
||||
return False
|
||||
summary_path = details_dir / DETAILS_SUMMARY_FILENAME
|
||||
if not summary_path.exists():
|
||||
return False
|
||||
try:
|
||||
payload = json.loads(summary_path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return False
|
||||
return int(payload.get("schema_version", 0) or 0) >= 3
|
||||
94
trace_analyzer/models.py
Normal file
94
trace_analyzer/models.py
Normal file
@@ -0,0 +1,94 @@
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass
|
||||
class MessageEvent:
|
||||
role: str
|
||||
content_type: str
|
||||
text_len: int
|
||||
has_cache_control: bool = False
|
||||
item_count: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class ToolSpec:
|
||||
name: str
|
||||
tool_type: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class UsageStats:
|
||||
input_tokens: int = 0
|
||||
output_tokens: int = 0
|
||||
total_tokens: int = 0
|
||||
reasoning_tokens: int = 0
|
||||
cached_tokens: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class RequestMeta:
|
||||
provider: str
|
||||
line_number: int
|
||||
request_id: str
|
||||
session_id: str
|
||||
request_model: str
|
||||
time: str
|
||||
status_code: str
|
||||
status_name: str
|
||||
request_ready_time_ms: int
|
||||
request_end_time_ms: int
|
||||
total_cost_time_ms: int
|
||||
backend_first_request_time_ms: int = 0
|
||||
backend_first_response_time_ms: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class TraceRecord:
|
||||
meta: RequestMeta
|
||||
canonical_prompt: str = ""
|
||||
messages: list[MessageEvent] = field(default_factory=list)
|
||||
role_sequence: list[str] = field(default_factory=list)
|
||||
declared_tools: list[ToolSpec] = field(default_factory=list)
|
||||
usage: UsageStats = field(default_factory=UsageStats)
|
||||
raw_messages: list[dict] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TraceFeatures:
|
||||
request_id: str
|
||||
session_id: str
|
||||
model: str
|
||||
status_code: str
|
||||
time: str
|
||||
message_count: int
|
||||
conversation_depth: int
|
||||
declared_tool_count: int
|
||||
assistant_msg_count: int
|
||||
tool_msg_count: int
|
||||
user_msg_count: int
|
||||
system_msg_count: int
|
||||
assistant_to_tool_count: int
|
||||
tool_to_assistant_count: int
|
||||
tool_to_tool_count: int
|
||||
assistant_to_user_count: int
|
||||
user_to_assistant_count: int
|
||||
max_consecutive_tool_msgs: int
|
||||
avg_tool_burst_len: float
|
||||
has_tool_loop: int
|
||||
input_tokens: int
|
||||
output_tokens: int
|
||||
total_tokens: int
|
||||
reasoning_tokens: int
|
||||
cached_tokens: int
|
||||
cache_hit_ratio: float
|
||||
uncached_prompt_tokens: int
|
||||
output_input_ratio: float
|
||||
latency_ms: int
|
||||
ms_per_input_token: float
|
||||
ms_per_output_token: float
|
||||
long_context: int
|
||||
high_cache: int
|
||||
tool_burst_alert: int
|
||||
tool_loop_alert: int
|
||||
slow_request: int = 0
|
||||
pattern_labels: list[str] = field(default_factory=list)
|
||||
230
trace_analyzer/parser.py
Normal file
230
trace_analyzer/parser.py
Normal file
@@ -0,0 +1,230 @@
|
||||
import json
|
||||
import os
|
||||
from dataclasses import asdict
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
from .helpers import safe_int
|
||||
from .models import MessageEvent, RequestMeta, ToolSpec, TraceRecord, UsageStats
|
||||
|
||||
|
||||
class FormattedAliTraceAdapter:
|
||||
name = "formatted"
|
||||
|
||||
def detect(self, raw):
|
||||
if not isinstance(raw.get("meta"), dict):
|
||||
return False
|
||||
required_keys = {"canonical_prompt", "usage", "message_events", "declared_tools", "role_sequence"}
|
||||
if not required_keys.issubset(raw.keys()):
|
||||
return False
|
||||
schema_version = str(raw.get("schema_version", "")).strip()
|
||||
return bool(schema_version) or "request_id" in raw["meta"]
|
||||
|
||||
def parse_line(self, raw, line_number=0):
|
||||
meta_payload = raw.get("meta", {}) if isinstance(raw.get("meta", {}), dict) else {}
|
||||
usage_payload = raw.get("usage", {}) if isinstance(raw.get("usage", {}), dict) else {}
|
||||
message_events_payload = raw.get("message_events", [])
|
||||
declared_tools_payload = raw.get("declared_tools", [])
|
||||
|
||||
usage = UsageStats(
|
||||
input_tokens=safe_int(usage_payload.get("input_tokens")),
|
||||
output_tokens=safe_int(usage_payload.get("output_tokens")),
|
||||
total_tokens=safe_int(usage_payload.get("total_tokens")),
|
||||
reasoning_tokens=safe_int(usage_payload.get("reasoning_tokens")),
|
||||
cached_tokens=safe_int(usage_payload.get("cached_tokens")),
|
||||
)
|
||||
|
||||
messages = [
|
||||
MessageEvent(
|
||||
role=str(message.get("role", "unknown")),
|
||||
content_type=str(message.get("content_type", "unknown")),
|
||||
text_len=safe_int(message.get("text_len")),
|
||||
has_cache_control=bool(message.get("has_cache_control")),
|
||||
item_count=safe_int(message.get("item_count")),
|
||||
)
|
||||
for message in message_events_payload
|
||||
if isinstance(message, dict)
|
||||
]
|
||||
declared_tools = [
|
||||
ToolSpec(
|
||||
name=str(tool.get("name", "")),
|
||||
tool_type=str(tool.get("tool_type", "function")),
|
||||
)
|
||||
for tool in declared_tools_payload
|
||||
if isinstance(tool, dict)
|
||||
]
|
||||
|
||||
inferred_family = str(meta_payload.get("model_family", "")).strip()
|
||||
inferred_provider = str(meta_payload.get("provider", "")).strip()
|
||||
if not inferred_provider:
|
||||
inferred_provider = inferred_family or self.name
|
||||
|
||||
meta = RequestMeta(
|
||||
provider=inferred_provider,
|
||||
line_number=line_number,
|
||||
request_id=str(meta_payload.get("request_id", "")),
|
||||
session_id=str(meta_payload.get("session_id", "")),
|
||||
request_model=str(meta_payload.get("request_model", "")),
|
||||
time=str(meta_payload.get("time", "")),
|
||||
status_code=str(meta_payload.get("status_code", "")),
|
||||
status_name=str(meta_payload.get("status_name", "")),
|
||||
request_ready_time_ms=safe_int(meta_payload.get("request_ready_time_ms")),
|
||||
request_end_time_ms=safe_int(meta_payload.get("request_end_time_ms")),
|
||||
total_cost_time_ms=safe_int(meta_payload.get("total_cost_time_ms")),
|
||||
backend_first_request_time_ms=safe_int(meta_payload.get("backend_first_request_time_ms")),
|
||||
backend_first_response_time_ms=safe_int(meta_payload.get("backend_first_response_time_ms")),
|
||||
)
|
||||
return TraceRecord(
|
||||
meta=meta,
|
||||
canonical_prompt=str(raw.get("canonical_prompt", "")),
|
||||
messages=messages,
|
||||
role_sequence=[
|
||||
str(role)
|
||||
for role in raw.get("role_sequence", [message.role for message in messages])
|
||||
],
|
||||
declared_tools=declared_tools,
|
||||
usage=usage,
|
||||
raw_messages=[
|
||||
message
|
||||
for message in raw.get("raw_messages", [])
|
||||
if isinstance(message, dict)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def _looks_like_release_trace(raw):
|
||||
expected_keys = {"chat_id", "parent_chat_id", "timestamp", "input_length", "output_length", "turn", "hash_ids"}
|
||||
return expected_keys.issubset(raw.keys())
|
||||
|
||||
|
||||
def path_looks_like_release_trace(path):
|
||||
path = Path(path)
|
||||
if not path.exists():
|
||||
return False
|
||||
try:
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
for line in handle:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
return _looks_like_release_trace(json.loads(line))
|
||||
except Exception:
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
def get_adapter(raw):
|
||||
adapter = FormattedAliTraceAdapter()
|
||||
if adapter.detect(raw):
|
||||
return adapter
|
||||
if _looks_like_release_trace(raw):
|
||||
raise ValueError("trace_analyzer currently analyzes formatter-generated *-raw.jsonl, not release hash-id traces.")
|
||||
raise ValueError("trace_analyzer only accepts formatter-generated *-raw.jsonl inputs.")
|
||||
|
||||
|
||||
def _estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, fraction_done):
|
||||
baseline = max(current_rss_mb, peak_rss_mb)
|
||||
headroom = 1.0 + 0.25 * max(0.0, 1.0 - fraction_done)
|
||||
return baseline * headroom
|
||||
|
||||
|
||||
def load_records(path, limit=None, show_progress=False, progress_desc="Load trace"):
|
||||
records = []
|
||||
path = str(path)
|
||||
progress = None
|
||||
process = psutil.Process(os.getpid()) if show_progress else None
|
||||
peak_rss_mb = 0.0
|
||||
total_bytes = os.path.getsize(path) if show_progress else 0
|
||||
if show_progress:
|
||||
progress = tqdm(
|
||||
total=total_bytes,
|
||||
desc=progress_desc,
|
||||
unit="B",
|
||||
unit_scale=True,
|
||||
dynamic_ncols=True,
|
||||
)
|
||||
with open(path, "r", encoding="utf-8") as handle:
|
||||
for line_number, line in enumerate(handle, start=1):
|
||||
if limit is not None and len(records) >= limit:
|
||||
break
|
||||
raw_line = line
|
||||
line = line.strip()
|
||||
if not line:
|
||||
if progress is not None:
|
||||
progress.update(len(raw_line.encode("utf-8")))
|
||||
continue
|
||||
raw = json.loads(line)
|
||||
adapter = get_adapter(raw)
|
||||
try:
|
||||
record = adapter.parse_line(raw, line_number=line_number)
|
||||
except Exception as exc:
|
||||
if progress is not None:
|
||||
progress.close()
|
||||
raise ValueError(f"Failed to parse line {line_number} in {path}: {exc}") from exc
|
||||
records.append(record)
|
||||
if progress is not None:
|
||||
progress.update(len(raw_line.encode("utf-8")))
|
||||
current_rss_mb = process.memory_info().rss / (1024 * 1024)
|
||||
peak_rss_mb = max(peak_rss_mb, current_rss_mb)
|
||||
fraction_done = progress.n / progress.total if progress.total else 0.0
|
||||
progress.set_postfix(
|
||||
records=len(records),
|
||||
rss_mb=f"{current_rss_mb:.0f}",
|
||||
est_peak_mb=f"{_estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, fraction_done):.0f}",
|
||||
)
|
||||
if progress is not None:
|
||||
progress.close()
|
||||
return records
|
||||
|
||||
|
||||
def flatten_record(record):
|
||||
return {
|
||||
"provider": record.meta.provider,
|
||||
"line_number": record.meta.line_number,
|
||||
"request_id": record.meta.request_id,
|
||||
"session_id": record.meta.session_id,
|
||||
"request_model": record.meta.request_model,
|
||||
"time": record.meta.time,
|
||||
"status_code": record.meta.status_code,
|
||||
"status_name": record.meta.status_name,
|
||||
"request_ready_time_ms": record.meta.request_ready_time_ms,
|
||||
"request_end_time_ms": record.meta.request_end_time_ms,
|
||||
"total_cost_time_ms": record.meta.total_cost_time_ms,
|
||||
"backend_first_request_time_ms": record.meta.backend_first_request_time_ms,
|
||||
"backend_first_response_time_ms": record.meta.backend_first_response_time_ms,
|
||||
"message_count": len(record.messages),
|
||||
"role_sequence": ";".join(record.role_sequence),
|
||||
"declared_tool_count": len(record.declared_tools),
|
||||
"declared_tool_names": ";".join(tool.name for tool in record.declared_tools if tool.name),
|
||||
"input_tokens": record.usage.input_tokens,
|
||||
"output_tokens": record.usage.output_tokens,
|
||||
"total_tokens": record.usage.total_tokens,
|
||||
"reasoning_tokens": record.usage.reasoning_tokens,
|
||||
"cached_tokens": record.usage.cached_tokens,
|
||||
}
|
||||
|
||||
|
||||
def record_to_dict(record):
|
||||
return asdict(record)
|
||||
|
||||
|
||||
def infer_analysis_dataset_name(input_path):
|
||||
resolved = Path(input_path)
|
||||
stem = resolved.stem
|
||||
if stem.endswith("-raw"):
|
||||
stem = stem[:-4]
|
||||
|
||||
parent_name = resolved.parent.name
|
||||
model_slug = ""
|
||||
if parent_name.startswith("trace-") and parent_name.endswith("-formatted"):
|
||||
model_slug = parent_name[len("trace-") : -len("-formatted")]
|
||||
|
||||
if model_slug and not stem.startswith(f"{model_slug}-"):
|
||||
return f"{model_slug}-{stem}"
|
||||
return stem
|
||||
|
||||
|
||||
def default_output_dir(input_path):
|
||||
return Path("outputs") / "analysis" / infer_analysis_dataset_name(input_path)
|
||||
221
trace_analyzer/preparation.py
Normal file
221
trace_analyzer/preparation.py
Normal file
@@ -0,0 +1,221 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from trace_analyzer.helpers import percentile
|
||||
from trace_analyzer.parser import get_adapter
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
|
||||
def stream_prepare(input_path: str | Path, output_dir: str | Path, *, show_progress: bool = False) -> dict:
|
||||
input_file = Path(input_path)
|
||||
output_root = Path(output_dir)
|
||||
output_root.mkdir(parents=True, exist_ok=True)
|
||||
features_path = output_root / "features.csv"
|
||||
total_bytes = os.path.getsize(input_file) if show_progress and input_file.exists() else 0
|
||||
|
||||
progress = tqdm(
|
||||
total=total_bytes,
|
||||
desc="Prepare features",
|
||||
unit="B",
|
||||
unit_scale=True,
|
||||
dynamic_ncols=True,
|
||||
disable=not show_progress,
|
||||
)
|
||||
try:
|
||||
with input_file.open("r", encoding="utf-8") as input_handle, features_path.open(
|
||||
"w", encoding="utf-8", newline=""
|
||||
) as features_handle:
|
||||
writer: csv.DictWriter | None = None
|
||||
kept_rows = 0
|
||||
for line_number, line in enumerate(input_handle, start=1):
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
if show_progress:
|
||||
progress.update(len(line.encode("utf-8")))
|
||||
continue
|
||||
raw = json.loads(stripped)
|
||||
adapter = get_adapter(raw)
|
||||
record = adapter.parse_line(raw, line_number=line_number)
|
||||
role_sequence = record.role_sequence
|
||||
role_pairs = list(zip(role_sequence, role_sequence[1:]))
|
||||
tool_bursts = _tool_bursts(role_sequence)
|
||||
max_tool_burst = max(tool_bursts) if tool_bursts else 0
|
||||
avg_tool_burst = _safe_div(sum(tool_bursts), len(tool_bursts)) if tool_bursts else 0.0
|
||||
tool_to_tool_count = sum(1 for current, nxt in role_pairs if current == "tool" and nxt == "tool")
|
||||
tool_msg_count = sum(message.role == "tool" for message in record.messages)
|
||||
assistant_msg_count = sum(message.role == "assistant" for message in record.messages)
|
||||
cache_hit_ratio = _safe_div(record.usage.cached_tokens, record.usage.input_tokens)
|
||||
|
||||
feature_row = {
|
||||
"request_id": record.meta.request_id,
|
||||
"session_id": record.meta.session_id,
|
||||
"model": record.meta.request_model,
|
||||
"status_code": record.meta.status_code,
|
||||
"time": record.meta.time,
|
||||
"message_count": len(record.messages),
|
||||
"conversation_depth": len(record.messages),
|
||||
"declared_tool_count": len(record.declared_tools),
|
||||
"assistant_msg_count": assistant_msg_count,
|
||||
"tool_msg_count": tool_msg_count,
|
||||
"user_msg_count": sum(message.role == "user" for message in record.messages),
|
||||
"system_msg_count": sum(message.role == "system" for message in record.messages),
|
||||
"assistant_to_tool_count": sum(
|
||||
1
|
||||
for current, nxt in role_pairs
|
||||
if current == "assistant" and nxt == "tool"
|
||||
),
|
||||
"tool_to_assistant_count": sum(
|
||||
1
|
||||
for current, nxt in role_pairs
|
||||
if current == "tool" and nxt == "assistant"
|
||||
),
|
||||
"tool_to_tool_count": tool_to_tool_count,
|
||||
"assistant_to_user_count": sum(
|
||||
1
|
||||
for current, nxt in role_pairs
|
||||
if current == "assistant" and nxt == "user"
|
||||
),
|
||||
"user_to_assistant_count": sum(
|
||||
1
|
||||
for current, nxt in role_pairs
|
||||
if current == "user" and nxt == "assistant"
|
||||
),
|
||||
"max_consecutive_tool_msgs": max_tool_burst,
|
||||
"avg_tool_burst_len": avg_tool_burst,
|
||||
"has_tool_loop": 1 if tool_to_tool_count > 0 else 0,
|
||||
"input_tokens": record.usage.input_tokens,
|
||||
"output_tokens": record.usage.output_tokens,
|
||||
"total_tokens": record.usage.total_tokens,
|
||||
"reasoning_tokens": record.usage.reasoning_tokens,
|
||||
"cached_tokens": record.usage.cached_tokens,
|
||||
"cache_hit_ratio": cache_hit_ratio,
|
||||
"uncached_prompt_tokens": max(record.usage.input_tokens - record.usage.cached_tokens, 0),
|
||||
"output_input_ratio": _safe_div(record.usage.output_tokens, record.usage.input_tokens),
|
||||
"latency_ms": record.meta.total_cost_time_ms,
|
||||
"ms_per_input_token": _safe_div(record.meta.total_cost_time_ms, record.usage.input_tokens),
|
||||
"ms_per_output_token": _safe_div(record.meta.total_cost_time_ms, record.usage.output_tokens),
|
||||
"long_context": 1 if record.usage.input_tokens >= 32000 else 0,
|
||||
"high_cache": 1 if cache_hit_ratio >= 0.8 else 0,
|
||||
"tool_burst_alert": 1 if max_tool_burst >= 4 else 0,
|
||||
"tool_loop_alert": 1 if tool_to_tool_count >= 3 else 0,
|
||||
"slow_request": 0,
|
||||
"pattern_labels": _pattern_labels(
|
||||
record,
|
||||
cache_hit_ratio=cache_hit_ratio,
|
||||
tool_msg_count=tool_msg_count,
|
||||
assistant_msg_count=assistant_msg_count,
|
||||
max_tool_burst=max_tool_burst,
|
||||
),
|
||||
}
|
||||
if writer is None:
|
||||
writer = csv.DictWriter(features_handle, fieldnames=list(feature_row.keys()))
|
||||
writer.writeheader()
|
||||
writer.writerow(feature_row)
|
||||
kept_rows += 1
|
||||
if show_progress:
|
||||
progress.update(len(line.encode("utf-8")))
|
||||
progress.set_postfix(
|
||||
rows=kept_rows,
|
||||
features=features_path.name,
|
||||
)
|
||||
finally:
|
||||
if show_progress:
|
||||
progress.close()
|
||||
|
||||
if show_progress:
|
||||
tqdm.write("Finalize features.csv: apply slow_request p90 latency threshold")
|
||||
_apply_slow_request_threshold(features_path)
|
||||
return {
|
||||
"features_path": str(features_path),
|
||||
}
|
||||
|
||||
|
||||
def _safe_div(numerator: float, denominator: float) -> float:
|
||||
return (numerator / denominator) if denominator else 0.0
|
||||
|
||||
|
||||
def _tool_bursts(role_sequence: list[str]) -> list[int]:
|
||||
bursts: list[int] = []
|
||||
current = 0
|
||||
for role in role_sequence:
|
||||
if role == "tool":
|
||||
current += 1
|
||||
elif current:
|
||||
bursts.append(current)
|
||||
current = 0
|
||||
if current:
|
||||
bursts.append(current)
|
||||
return bursts
|
||||
|
||||
|
||||
def _max_tool_burst(role_sequence: list[str]) -> int:
|
||||
bursts = _tool_bursts(role_sequence)
|
||||
return max(bursts) if bursts else 0
|
||||
|
||||
|
||||
def _avg_tool_burst(role_sequence: list[str]) -> float:
|
||||
bursts = _tool_bursts(role_sequence)
|
||||
return _safe_div(sum(bursts), len(bursts)) if bursts else 0.0
|
||||
|
||||
|
||||
def _pattern_labels(
|
||||
record,
|
||||
*,
|
||||
cache_hit_ratio: float | None = None,
|
||||
tool_msg_count: int | None = None,
|
||||
assistant_msg_count: int | None = None,
|
||||
max_tool_burst: int | None = None,
|
||||
) -> str:
|
||||
labels: list[str] = []
|
||||
if tool_msg_count is None:
|
||||
tool_msg_count = sum(message.role == "tool" for message in record.messages)
|
||||
if assistant_msg_count is None:
|
||||
assistant_msg_count = sum(message.role == "assistant" for message in record.messages)
|
||||
if cache_hit_ratio is None:
|
||||
cache_hit_ratio = _safe_div(record.usage.cached_tokens, record.usage.input_tokens)
|
||||
if max_tool_burst is None:
|
||||
max_tool_burst = _max_tool_burst(record.role_sequence)
|
||||
if tool_msg_count == 0 and len(record.declared_tools) == 0:
|
||||
labels.append("single-shot")
|
||||
if tool_msg_count > 0 and tool_msg_count >= assistant_msg_count:
|
||||
labels.append("tool-heavy")
|
||||
if max_tool_burst >= 4:
|
||||
labels.append("tool-burst")
|
||||
if cache_hit_ratio >= 0.8:
|
||||
labels.append("cache-efficient")
|
||||
if cache_hit_ratio <= 0.1:
|
||||
labels.append("cache-cold")
|
||||
if record.usage.input_tokens >= 32000 and cache_hit_ratio <= 0.1:
|
||||
labels.append("long-context-no-cache")
|
||||
return ";".join(sorted(set(labels)))
|
||||
|
||||
|
||||
def _apply_slow_request_threshold(features_path: Path) -> None:
|
||||
with features_path.open("r", encoding="utf-8") as handle:
|
||||
latencies = [int(row["latency_ms"]) for row in csv.DictReader(handle)]
|
||||
if not latencies:
|
||||
return
|
||||
latencies.sort()
|
||||
p90_latency = percentile(latencies, 0.9)
|
||||
temp_path = features_path.with_suffix(features_path.suffix + ".tmp")
|
||||
with features_path.open("r", encoding="utf-8") as input_handle, temp_path.open(
|
||||
"w", encoding="utf-8", newline=""
|
||||
) as output_handle:
|
||||
reader = csv.DictReader(input_handle)
|
||||
writer = None
|
||||
for row in reader:
|
||||
slow_request = 1 if int(row["latency_ms"]) >= p90_latency else 0
|
||||
pattern_labels = {label for label in row.get("pattern_labels", "").split(";") if label}
|
||||
row["slow_request"] = str(slow_request)
|
||||
if slow_request and row.get("high_cache") == "1":
|
||||
pattern_labels.add("slow-despite-cache")
|
||||
row["pattern_labels"] = ";".join(sorted(pattern_labels))
|
||||
if writer is None:
|
||||
writer = csv.DictWriter(output_handle, fieldnames=list(row.keys()))
|
||||
writer.writeheader()
|
||||
writer.writerow(row)
|
||||
temp_path.replace(features_path)
|
||||
271
trace_analyzer/report.py
Normal file
271
trace_analyzer/report.py
Normal file
@@ -0,0 +1,271 @@
|
||||
import csv
|
||||
import json
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
|
||||
from .features import feature_to_row
|
||||
from .helpers import series_stats
|
||||
from .parser import flatten_record, record_to_dict
|
||||
|
||||
|
||||
def ensure_output_dir(path):
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
|
||||
|
||||
def write_jsonl(path, rows):
|
||||
with open(path, "w", encoding="utf-8") as handle:
|
||||
for row in rows:
|
||||
handle.write(json.dumps(row, ensure_ascii=False) + "\n")
|
||||
|
||||
|
||||
def write_csv(path, rows):
|
||||
if not rows:
|
||||
with open(path, "w", encoding="utf-8", newline="") as handle:
|
||||
handle.write("")
|
||||
return
|
||||
fieldnames = list(rows[0].keys())
|
||||
with open(path, "w", encoding="utf-8", newline="") as handle:
|
||||
writer = csv.DictWriter(handle, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
|
||||
|
||||
def write_parquet(path, rows):
|
||||
try:
|
||||
import pyarrow as pa
|
||||
import pyarrow.parquet as pq
|
||||
except ImportError as exc:
|
||||
raise RuntimeError("Parquet output requires pyarrow to be installed.") from exc
|
||||
table = pa.Table.from_pylist(rows)
|
||||
pq.write_table(table, path)
|
||||
|
||||
|
||||
def write_normalized(records, output_dir, output_format="jsonl"):
|
||||
output_dir = ensure_output_dir(output_dir)
|
||||
rows = [record_to_dict(record) for record in records]
|
||||
if output_format == "jsonl":
|
||||
path = output_dir / "normalized.jsonl"
|
||||
write_jsonl(path, rows)
|
||||
return path
|
||||
if output_format == "csv":
|
||||
path = output_dir / "normalized.csv"
|
||||
write_csv(path, [flatten_record(record) for record in records])
|
||||
return path
|
||||
if output_format == "parquet":
|
||||
path = output_dir / "normalized.parquet"
|
||||
write_parquet(path, rows)
|
||||
return path
|
||||
raise ValueError(f"Unsupported format: {output_format}")
|
||||
|
||||
|
||||
def write_features(features, output_dir):
|
||||
output_dir = ensure_output_dir(output_dir)
|
||||
path = output_dir / "features.csv"
|
||||
write_csv(path, [feature_to_row(feature) for feature in features])
|
||||
return path
|
||||
|
||||
|
||||
def build_summary(records, features):
|
||||
model_counts = Counter(feature.model or "unknown" for feature in features)
|
||||
status_code_counts = Counter(feature.status_code or "unknown" for feature in features)
|
||||
role_transition_counts = Counter()
|
||||
for feature in features:
|
||||
role_transition_counts["assistant->tool"] += feature.assistant_to_tool_count
|
||||
role_transition_counts["tool->assistant"] += feature.tool_to_assistant_count
|
||||
role_transition_counts["tool->tool"] += feature.tool_to_tool_count
|
||||
role_transition_counts["assistant->user"] += feature.assistant_to_user_count
|
||||
role_transition_counts["user->assistant"] += feature.user_to_assistant_count
|
||||
|
||||
latency_stats = series_stats([feature.latency_ms for feature in features])
|
||||
cache_ratio_stats = series_stats([feature.cache_hit_ratio for feature in features])
|
||||
cached_token_stats = series_stats([feature.cached_tokens for feature in features])
|
||||
declared_tool_stats = series_stats([feature.declared_tool_count for feature in features])
|
||||
burst_stats = series_stats([feature.max_consecutive_tool_msgs for feature in features])
|
||||
|
||||
high_burst_requests = sorted(
|
||||
[
|
||||
{
|
||||
"request_id": feature.request_id,
|
||||
"session_id": feature.session_id,
|
||||
"max_consecutive_tool_msgs": feature.max_consecutive_tool_msgs,
|
||||
"tool_to_tool_count": feature.tool_to_tool_count,
|
||||
}
|
||||
for feature in features
|
||||
if feature.tool_burst_alert
|
||||
],
|
||||
key=lambda item: (item["max_consecutive_tool_msgs"], item["tool_to_tool_count"]),
|
||||
reverse=True,
|
||||
)[:10]
|
||||
slow_despite_cache = sorted(
|
||||
[
|
||||
{
|
||||
"request_id": feature.request_id,
|
||||
"session_id": feature.session_id,
|
||||
"latency_ms": feature.latency_ms,
|
||||
"cache_hit_ratio": feature.cache_hit_ratio,
|
||||
}
|
||||
for feature in features
|
||||
if "slow-despite-cache" in feature.pattern_labels
|
||||
],
|
||||
key=lambda item: item["latency_ms"],
|
||||
reverse=True,
|
||||
)[:10]
|
||||
long_context_no_cache = sorted(
|
||||
[
|
||||
{
|
||||
"request_id": feature.request_id,
|
||||
"session_id": feature.session_id,
|
||||
"input_tokens": feature.input_tokens,
|
||||
"cache_hit_ratio": feature.cache_hit_ratio,
|
||||
}
|
||||
for feature in features
|
||||
if "long-context-no-cache" in feature.pattern_labels
|
||||
],
|
||||
key=lambda item: item["input_tokens"],
|
||||
reverse=True,
|
||||
)[:10]
|
||||
|
||||
cache_buckets = []
|
||||
for label, low, high in [
|
||||
("lt_0_2", 0.0, 0.2),
|
||||
("0_2_to_0_8", 0.2, 0.8),
|
||||
("ge_0_8", 0.8, 1.01),
|
||||
]:
|
||||
bucket = [feature for feature in features if low <= feature.cache_hit_ratio < high]
|
||||
cache_buckets.append(
|
||||
{
|
||||
"bucket": label,
|
||||
"count": len(bucket),
|
||||
"avg_latency_ms": series_stats([feature.latency_ms for feature in bucket])["mean"],
|
||||
"avg_cache_hit_ratio": series_stats([feature.cache_hit_ratio for feature in bucket])["mean"],
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"record_count": len(records),
|
||||
"success_count": sum(1 for feature in features if feature.status_code in {"1000", "200"}),
|
||||
"session_count": len({record.meta.session_id for record in records if record.meta.session_id}),
|
||||
"model_counts": dict(model_counts),
|
||||
"status_code_counts": dict(status_code_counts),
|
||||
"thresholds": {
|
||||
"long_context": 32000,
|
||||
"high_cache": 0.8,
|
||||
"tool_burst_alert": 4,
|
||||
"tool_loop_alert": 3,
|
||||
"slow_request_p90_latency_ms": latency_stats["p90"],
|
||||
},
|
||||
"tool_patterns": {
|
||||
"role_transitions": dict(role_transition_counts),
|
||||
"declared_tool_count": declared_tool_stats,
|
||||
"max_consecutive_tool_msgs": burst_stats,
|
||||
"tool_burst_alert_count": sum(feature.tool_burst_alert for feature in features),
|
||||
"tool_loop_alert_count": sum(feature.tool_loop_alert for feature in features),
|
||||
"high_burst_requests": high_burst_requests,
|
||||
},
|
||||
"cache_patterns": {
|
||||
"cached_tokens": cached_token_stats,
|
||||
"cache_hit_ratio": cache_ratio_stats,
|
||||
"latency_ms": latency_stats,
|
||||
"cache_buckets": cache_buckets,
|
||||
},
|
||||
"anomalies": {
|
||||
"slow_despite_cache": slow_despite_cache,
|
||||
"long_context_no_cache": long_context_no_cache,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _format_top_requests(rows, columns):
|
||||
if not rows:
|
||||
return "_none_"
|
||||
header = "| " + " | ".join(columns) + " |"
|
||||
divider = "| " + " | ".join(["---"] * len(columns)) + " |"
|
||||
lines = [header, divider]
|
||||
for row in rows:
|
||||
lines.append("| " + " | ".join(_render_value(row.get(column, "")) for column in columns) + " |")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _render_value(value):
|
||||
if isinstance(value, float):
|
||||
return f"{value:.4f}".rstrip("0").rstrip(".")
|
||||
return str(value)
|
||||
|
||||
|
||||
def _render_mapping(mapping):
|
||||
if isinstance(mapping, dict):
|
||||
rendered = {key: _render_mapping(value) for key, value in mapping.items()}
|
||||
return json.dumps(rendered, ensure_ascii=False)
|
||||
if isinstance(mapping, list):
|
||||
return [_render_mapping(value) for value in mapping]
|
||||
if isinstance(mapping, float):
|
||||
return float(f"{mapping:.4f}")
|
||||
return mapping
|
||||
|
||||
|
||||
def build_markdown_report(summary):
|
||||
lines = [
|
||||
"# Trace Analysis Report",
|
||||
"",
|
||||
"## Data Overview",
|
||||
f"- Records: {summary['record_count']}",
|
||||
f"- Success count: {summary['success_count']}",
|
||||
f"- Session count: {summary['session_count']}",
|
||||
f"- Models: {_render_mapping(summary['model_counts'])}",
|
||||
f"- Status codes: {_render_mapping(summary['status_code_counts'])}",
|
||||
"",
|
||||
"## Tool Patterns",
|
||||
f"- Role transitions: {_render_mapping(summary['tool_patterns']['role_transitions'])}",
|
||||
f"- Declared tool count stats: {_render_mapping(summary['tool_patterns']['declared_tool_count'])}",
|
||||
f"- Max consecutive tool msg stats: {_render_mapping(summary['tool_patterns']['max_consecutive_tool_msgs'])}",
|
||||
f"- Tool burst alerts: {summary['tool_patterns']['tool_burst_alert_count']}",
|
||||
f"- Tool loop alerts: {summary['tool_patterns']['tool_loop_alert_count']}",
|
||||
"",
|
||||
"High burst requests:",
|
||||
_format_top_requests(
|
||||
summary["tool_patterns"]["high_burst_requests"],
|
||||
["request_id", "session_id", "max_consecutive_tool_msgs", "tool_to_tool_count"],
|
||||
),
|
||||
"",
|
||||
"## Cache Patterns",
|
||||
f"- Cached token stats: {_render_mapping(summary['cache_patterns']['cached_tokens'])}",
|
||||
f"- Cache hit ratio stats: {_render_mapping(summary['cache_patterns']['cache_hit_ratio'])}",
|
||||
f"- Latency stats: {_render_mapping(summary['cache_patterns']['latency_ms'])}",
|
||||
"",
|
||||
"Cache buckets:",
|
||||
_format_top_requests(
|
||||
summary["cache_patterns"]["cache_buckets"],
|
||||
["bucket", "count", "avg_latency_ms", "avg_cache_hit_ratio"],
|
||||
),
|
||||
"",
|
||||
"## Anomalies",
|
||||
"Slow despite cache:",
|
||||
_format_top_requests(
|
||||
summary["anomalies"]["slow_despite_cache"],
|
||||
["request_id", "session_id", "latency_ms", "cache_hit_ratio"],
|
||||
),
|
||||
"",
|
||||
"Long context no cache:",
|
||||
_format_top_requests(
|
||||
summary["anomalies"]["long_context_no_cache"],
|
||||
["request_id", "session_id", "input_tokens", "cache_hit_ratio"],
|
||||
),
|
||||
"",
|
||||
]
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def write_report(records, features, output_dir):
|
||||
output_dir = ensure_output_dir(output_dir)
|
||||
summary = build_summary(records, features)
|
||||
|
||||
summary_path = output_dir / "summary.json"
|
||||
with open(summary_path, "w", encoding="utf-8") as handle:
|
||||
json.dump(summary, handle, ensure_ascii=False, indent=2)
|
||||
|
||||
report_path = output_dir / "report.md"
|
||||
with open(report_path, "w", encoding="utf-8") as handle:
|
||||
handle.write(build_markdown_report(summary))
|
||||
|
||||
return summary_path, report_path
|
||||
228
trace_analyzer/reporting.py
Normal file
228
trace_analyzer/reporting.py
Normal file
@@ -0,0 +1,228 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import json
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
|
||||
from trace_analyzer.helpers import safe_float, safe_int, series_stats
|
||||
from trace_analyzer.layout import resolve_details_summary_path
|
||||
from trace_analyzer.report import build_markdown_report
|
||||
|
||||
|
||||
def _iter_feature_rows(features_path: str | Path):
|
||||
with Path(features_path).open("r", encoding="utf-8") as handle:
|
||||
for row in csv.DictReader(handle):
|
||||
row["message_count"] = safe_int(row.get("message_count"))
|
||||
row["conversation_depth"] = safe_int(row.get("conversation_depth"))
|
||||
row["declared_tool_count"] = safe_int(row.get("declared_tool_count"))
|
||||
row["assistant_msg_count"] = safe_int(row.get("assistant_msg_count"))
|
||||
row["tool_msg_count"] = safe_int(row.get("tool_msg_count"))
|
||||
row["user_msg_count"] = safe_int(row.get("user_msg_count"))
|
||||
row["system_msg_count"] = safe_int(row.get("system_msg_count"))
|
||||
row["assistant_to_tool_count"] = safe_int(row.get("assistant_to_tool_count"))
|
||||
row["tool_to_assistant_count"] = safe_int(row.get("tool_to_assistant_count"))
|
||||
row["tool_to_tool_count"] = safe_int(row.get("tool_to_tool_count"))
|
||||
row["assistant_to_user_count"] = safe_int(row.get("assistant_to_user_count"))
|
||||
row["user_to_assistant_count"] = safe_int(row.get("user_to_assistant_count"))
|
||||
row["max_consecutive_tool_msgs"] = safe_int(row.get("max_consecutive_tool_msgs"))
|
||||
row["avg_tool_burst_len"] = safe_float(row.get("avg_tool_burst_len"))
|
||||
row["has_tool_loop"] = safe_int(row.get("has_tool_loop"))
|
||||
row["input_tokens"] = safe_int(row.get("input_tokens"))
|
||||
row["output_tokens"] = safe_int(row.get("output_tokens"))
|
||||
row["total_tokens"] = safe_int(row.get("total_tokens"))
|
||||
row["reasoning_tokens"] = safe_int(row.get("reasoning_tokens"))
|
||||
row["cached_tokens"] = safe_int(row.get("cached_tokens"))
|
||||
row["cache_hit_ratio"] = safe_float(row.get("cache_hit_ratio"))
|
||||
row["uncached_prompt_tokens"] = safe_int(row.get("uncached_prompt_tokens"))
|
||||
row["output_input_ratio"] = safe_float(row.get("output_input_ratio"))
|
||||
row["latency_ms"] = safe_int(row.get("latency_ms"))
|
||||
row["ms_per_input_token"] = safe_float(row.get("ms_per_input_token"))
|
||||
row["ms_per_output_token"] = safe_float(row.get("ms_per_output_token"))
|
||||
row["long_context"] = safe_int(row.get("long_context"))
|
||||
row["high_cache"] = safe_int(row.get("high_cache"))
|
||||
row["tool_burst_alert"] = safe_int(row.get("tool_burst_alert"))
|
||||
row["tool_loop_alert"] = safe_int(row.get("tool_loop_alert"))
|
||||
row["slow_request"] = safe_int(row.get("slow_request"))
|
||||
row["pattern_labels"] = [label for label in str(row.get("pattern_labels", "")).split(";") if label]
|
||||
yield row
|
||||
|
||||
|
||||
def build_summary_from_features(features_path: str | Path) -> dict:
|
||||
model_counts = Counter()
|
||||
status_code_counts = Counter()
|
||||
role_transition_counts = Counter()
|
||||
session_ids: set[str] = set()
|
||||
latencies: list[int] = []
|
||||
cache_ratios: list[float] = []
|
||||
cached_tokens_list: list[int] = []
|
||||
declared_tool_counts: list[int] = []
|
||||
burst_values: list[int] = []
|
||||
record_count = 0
|
||||
success_count = 0
|
||||
high_burst_requests: list[dict] = []
|
||||
slow_despite_cache: list[dict] = []
|
||||
long_context_no_cache: list[dict] = []
|
||||
tool_burst_alert_count = 0
|
||||
tool_loop_alert_count = 0
|
||||
cache_bucket_input = {
|
||||
"lt_0_2": {"latencies": [], "ratios": [], "count": 0},
|
||||
"0_2_to_0_8": {"latencies": [], "ratios": [], "count": 0},
|
||||
"ge_0_8": {"latencies": [], "ratios": [], "count": 0},
|
||||
}
|
||||
|
||||
for row in _iter_feature_rows(features_path):
|
||||
record_count += 1
|
||||
model_counts[row.get("model") or "unknown"] += 1
|
||||
status_code_counts[row.get("status_code") or "unknown"] += 1
|
||||
if row.get("session_id"):
|
||||
session_ids.add(row["session_id"])
|
||||
if row.get("status_code") in {"1000", "200"}:
|
||||
success_count += 1
|
||||
role_transition_counts["assistant->tool"] += row["assistant_to_tool_count"]
|
||||
role_transition_counts["tool->assistant"] += row["tool_to_assistant_count"]
|
||||
role_transition_counts["tool->tool"] += row["tool_to_tool_count"]
|
||||
role_transition_counts["assistant->user"] += row["assistant_to_user_count"]
|
||||
role_transition_counts["user->assistant"] += row["user_to_assistant_count"]
|
||||
latencies.append(row["latency_ms"])
|
||||
cache_ratios.append(row["cache_hit_ratio"])
|
||||
cached_tokens_list.append(row["cached_tokens"])
|
||||
declared_tool_counts.append(row["declared_tool_count"])
|
||||
burst_values.append(row["max_consecutive_tool_msgs"])
|
||||
tool_burst_alert_count += row["tool_burst_alert"]
|
||||
tool_loop_alert_count += row["tool_loop_alert"]
|
||||
|
||||
if row["tool_burst_alert"]:
|
||||
high_burst_requests.append(
|
||||
{
|
||||
"request_id": row["request_id"],
|
||||
"session_id": row["session_id"],
|
||||
"max_consecutive_tool_msgs": row["max_consecutive_tool_msgs"],
|
||||
"tool_to_tool_count": row["tool_to_tool_count"],
|
||||
}
|
||||
)
|
||||
high_burst_requests.sort(
|
||||
key=lambda item: (item["max_consecutive_tool_msgs"], item["tool_to_tool_count"]),
|
||||
reverse=True,
|
||||
)
|
||||
del high_burst_requests[10:]
|
||||
|
||||
if "slow-despite-cache" in row["pattern_labels"]:
|
||||
slow_despite_cache.append(
|
||||
{
|
||||
"request_id": row["request_id"],
|
||||
"session_id": row["session_id"],
|
||||
"latency_ms": row["latency_ms"],
|
||||
"cache_hit_ratio": row["cache_hit_ratio"],
|
||||
}
|
||||
)
|
||||
slow_despite_cache.sort(key=lambda item: item["latency_ms"], reverse=True)
|
||||
del slow_despite_cache[10:]
|
||||
|
||||
if "long-context-no-cache" in row["pattern_labels"]:
|
||||
long_context_no_cache.append(
|
||||
{
|
||||
"request_id": row["request_id"],
|
||||
"session_id": row["session_id"],
|
||||
"input_tokens": row["input_tokens"],
|
||||
"cache_hit_ratio": row["cache_hit_ratio"],
|
||||
}
|
||||
)
|
||||
long_context_no_cache.sort(key=lambda item: item["input_tokens"], reverse=True)
|
||||
del long_context_no_cache[10:]
|
||||
|
||||
ratio = row["cache_hit_ratio"]
|
||||
if ratio < 0.2:
|
||||
bucket_name = "lt_0_2"
|
||||
elif ratio < 0.8:
|
||||
bucket_name = "0_2_to_0_8"
|
||||
else:
|
||||
bucket_name = "ge_0_8"
|
||||
cache_bucket_input[bucket_name]["count"] += 1
|
||||
cache_bucket_input[bucket_name]["latencies"].append(row["latency_ms"])
|
||||
cache_bucket_input[bucket_name]["ratios"].append(row["cache_hit_ratio"])
|
||||
|
||||
latency_stats = series_stats(latencies)
|
||||
cache_ratio_stats = series_stats(cache_ratios)
|
||||
cached_token_stats = series_stats(cached_tokens_list)
|
||||
declared_tool_stats = series_stats(declared_tool_counts)
|
||||
burst_stats = series_stats(burst_values)
|
||||
|
||||
cache_buckets = []
|
||||
for label in ["lt_0_2", "0_2_to_0_8", "ge_0_8"]:
|
||||
bucket = cache_bucket_input[label]
|
||||
cache_buckets.append(
|
||||
{
|
||||
"bucket": label,
|
||||
"count": bucket["count"],
|
||||
"avg_latency_ms": series_stats(bucket["latencies"])["mean"],
|
||||
"avg_cache_hit_ratio": series_stats(bucket["ratios"])["mean"],
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"record_count": record_count,
|
||||
"success_count": success_count,
|
||||
"session_count": len(session_ids),
|
||||
"model_counts": dict(model_counts),
|
||||
"status_code_counts": dict(status_code_counts),
|
||||
"thresholds": {
|
||||
"long_context": 32000,
|
||||
"high_cache": 0.8,
|
||||
"tool_burst_alert": 4,
|
||||
"tool_loop_alert": 3,
|
||||
"slow_request_p90_latency_ms": latency_stats["p90"],
|
||||
},
|
||||
"tool_patterns": {
|
||||
"role_transitions": dict(role_transition_counts),
|
||||
"declared_tool_count": declared_tool_stats,
|
||||
"max_consecutive_tool_msgs": burst_stats,
|
||||
"tool_burst_alert_count": tool_burst_alert_count,
|
||||
"tool_loop_alert_count": tool_loop_alert_count,
|
||||
"high_burst_requests": high_burst_requests,
|
||||
},
|
||||
"cache_patterns": {
|
||||
"cached_tokens": cached_token_stats,
|
||||
"cache_hit_ratio": cache_ratio_stats,
|
||||
"latency_ms": latency_stats,
|
||||
"cache_buckets": cache_buckets,
|
||||
},
|
||||
"anomalies": {
|
||||
"slow_despite_cache": slow_despite_cache,
|
||||
"long_context_no_cache": long_context_no_cache,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def write_reports(
|
||||
*,
|
||||
features_path: str | Path,
|
||||
output_dir: str | Path,
|
||||
pipeline_summary: dict | None = None,
|
||||
) -> dict:
|
||||
output_root = Path(output_dir)
|
||||
output_root.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
summary = build_summary_from_features(features_path)
|
||||
summary_path = output_root / "summary.json"
|
||||
summary_path.write_text(json.dumps(summary, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
report_path = output_root / "report.md"
|
||||
report_path.write_text(build_markdown_report(summary), encoding="utf-8")
|
||||
|
||||
combined = {
|
||||
"summary": summary,
|
||||
"pipeline": pipeline_summary or {},
|
||||
}
|
||||
|
||||
details_summary_path = resolve_details_summary_path(output_root)
|
||||
if details_summary_path is not None:
|
||||
combined["details_summary"] = json.loads(details_summary_path.read_text(encoding="utf-8"))
|
||||
|
||||
combined_path = output_root / "analysis_snapshot.json"
|
||||
combined_path.write_text(json.dumps(combined, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
return {
|
||||
"summary_path": str(summary_path),
|
||||
"report_path": str(report_path),
|
||||
"analysis_snapshot_path": str(combined_path),
|
||||
}
|
||||
801
trace_analyzer/resume_advanced.py
Normal file
801
trace_analyzer/resume_advanced.py
Normal file
@@ -0,0 +1,801 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import json
|
||||
import time
|
||||
from collections import Counter
|
||||
from itertools import islice
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
from .layout import DETAILS_SUMMARY_FILENAME, preferred_details_dir, resolve_details_dir
|
||||
|
||||
|
||||
PROGRESS_FLUSH_INTERVAL_S = 5.0
|
||||
PROGRESS_REFRESH_INTERVAL_S = 0.5
|
||||
PROGRESS_REFRESH_INTERVAL_REQ = 256
|
||||
|
||||
DEFAULT_INPUT_LENGTH_BUCKET_THRESHOLDS = [32 * 1024, 85 * 1024, 128 * 1024]
|
||||
|
||||
FIRST_SEEN_MS = 0
|
||||
LAST_SEEN_MS = 1
|
||||
LAST_REUSE_MS = 2
|
||||
FIRST_REQUEST_ID = 3
|
||||
LAST_REQUEST_ID = 4
|
||||
LAST_REUSE_REQUEST_ID = 5
|
||||
REUSE_COUNT = 6
|
||||
|
||||
|
||||
def _format_bucket_boundary(value: int) -> str:
|
||||
if value == 0:
|
||||
return "0"
|
||||
if value % (1024 * 1024) == 0:
|
||||
return f"{value // (1024 * 1024)}Mi"
|
||||
if value % 1024 == 0:
|
||||
return f"{value // 1024}Ki"
|
||||
return str(value)
|
||||
|
||||
|
||||
def build_input_length_bucket_defs(thresholds=None):
|
||||
parsed_thresholds = (
|
||||
list(DEFAULT_INPUT_LENGTH_BUCKET_THRESHOLDS)
|
||||
if thresholds is None
|
||||
else sorted(set(int(value) for value in thresholds))
|
||||
)
|
||||
if not parsed_thresholds:
|
||||
raise ValueError("At least one input-length bucket threshold is required.")
|
||||
if any(value <= 0 for value in parsed_thresholds):
|
||||
raise ValueError("Input-length bucket thresholds must be positive integers.")
|
||||
if parsed_thresholds == DEFAULT_INPUT_LENGTH_BUCKET_THRESHOLDS:
|
||||
return [
|
||||
("0-32Ki", 0, 32 * 1024),
|
||||
("32-85Ki", 32 * 1024, 85 * 1024),
|
||||
("85-128Ki", 85 * 1024, 128 * 1024),
|
||||
("128Ki+", 128 * 1024, None),
|
||||
]
|
||||
bucket_defs = []
|
||||
lower_bound = 0
|
||||
for upper_bound in parsed_thresholds:
|
||||
bucket_defs.append(
|
||||
(
|
||||
f"{_format_bucket_boundary(lower_bound)}-{_format_bucket_boundary(upper_bound)}",
|
||||
lower_bound,
|
||||
upper_bound,
|
||||
)
|
||||
)
|
||||
lower_bound = upper_bound
|
||||
bucket_defs.append((f"{_format_bucket_boundary(lower_bound)}+", lower_bound, None))
|
||||
return bucket_defs
|
||||
|
||||
|
||||
def assign_input_length_bucket(input_tokens: int, bucket_defs=None) -> str:
|
||||
bucket_defs = bucket_defs or build_input_length_bucket_defs()
|
||||
for bucket_label, lower_bound, upper_bound in bucket_defs:
|
||||
if input_tokens >= lower_bound and (upper_bound is None or input_tokens < upper_bound):
|
||||
return bucket_label
|
||||
return bucket_defs[-1][0]
|
||||
|
||||
|
||||
def write_csv(path: Path, rows: list[dict], fieldnames: list[str] | None = None) -> Path:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if fieldnames is None and rows:
|
||||
fieldnames = list(rows[0].keys())
|
||||
fieldnames = fieldnames or []
|
||||
with path.open("w", encoding="utf-8", newline="") as handle:
|
||||
writer = csv.DictWriter(handle, fieldnames=fieldnames)
|
||||
if fieldnames:
|
||||
writer.writeheader()
|
||||
if rows:
|
||||
writer.writerows(rows)
|
||||
return path
|
||||
|
||||
|
||||
def _estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, fraction_done):
|
||||
baseline = max(current_rss_mb, peak_rss_mb)
|
||||
headroom = 1.0 + 0.25 * max(0.0, 1.0 - fraction_done)
|
||||
return baseline * headroom
|
||||
|
||||
|
||||
def _progress_postfix(process, peak_rss_mb, fraction_done, **extra):
|
||||
current_rss_mb = process.memory_info().rss / (1024 * 1024)
|
||||
peak_rss_mb = max(peak_rss_mb, current_rss_mb)
|
||||
postfix = {
|
||||
"rss_mb": f"{current_rss_mb:.0f}",
|
||||
"est_peak_mb": f"{_estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, fraction_done):.0f}",
|
||||
}
|
||||
postfix.update(extra)
|
||||
return postfix, peak_rss_mb
|
||||
|
||||
|
||||
def _format_duration(seconds):
|
||||
if seconds is None or seconds < 0:
|
||||
return "?"
|
||||
if seconds < 60:
|
||||
return f"{seconds:.0f}s"
|
||||
if seconds < 3600:
|
||||
return f"{seconds / 60:.1f}m"
|
||||
return f"{seconds / 3600:.2f}h"
|
||||
|
||||
|
||||
def _write_progress_state(
|
||||
path,
|
||||
*,
|
||||
total_requests,
|
||||
processed_requests,
|
||||
started_at,
|
||||
current_rss_mb,
|
||||
peak_rss_mb,
|
||||
est_peak_mb,
|
||||
source_path,
|
||||
features_path,
|
||||
last_request_id,
|
||||
block_state_count,
|
||||
bucket_state_count,
|
||||
):
|
||||
elapsed_s = max(time.monotonic() - started_at, 1e-9)
|
||||
req_per_s = processed_requests / elapsed_s
|
||||
eta_s = ((total_requests - processed_requests) / req_per_s) if req_per_s > 0 and processed_requests < total_requests else 0.0
|
||||
payload = {
|
||||
"source_path": str(source_path),
|
||||
"features_path": str(features_path),
|
||||
"total_requests": total_requests,
|
||||
"processed_requests": processed_requests,
|
||||
"fraction_done": (processed_requests / total_requests) if total_requests else 1.0,
|
||||
"elapsed_s": elapsed_s,
|
||||
"req_per_s": req_per_s,
|
||||
"eta_s": eta_s,
|
||||
"eta_human": _format_duration(eta_s),
|
||||
"rss_mb": current_rss_mb,
|
||||
"peak_rss_mb": peak_rss_mb,
|
||||
"est_peak_mb": est_peak_mb,
|
||||
"block_state_count": block_state_count,
|
||||
"bucket_state_count": bucket_state_count,
|
||||
"last_request_id": last_request_id,
|
||||
"updated_at_epoch_s": time.time(),
|
||||
}
|
||||
tmp_path = path.with_suffix(path.suffix + ".tmp")
|
||||
tmp_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
tmp_path.replace(path)
|
||||
|
||||
|
||||
def _count_lines(path):
|
||||
with open(path, "r", encoding="utf-8") as handle:
|
||||
return sum(1 for _ in handle)
|
||||
|
||||
|
||||
def _count_feature_rows(path):
|
||||
total_lines = _count_lines(path)
|
||||
return max(total_lines - 1, 0)
|
||||
|
||||
|
||||
class InMemoryBlockCache:
|
||||
def __init__(self):
|
||||
self.state = {}
|
||||
|
||||
def get(self, block_id):
|
||||
return self.state.get(block_id)
|
||||
|
||||
def put(self, block_id, meta):
|
||||
self.state[block_id] = meta
|
||||
|
||||
def iter_blocks(self):
|
||||
for block_id, meta in self.state.items():
|
||||
yield (
|
||||
block_id,
|
||||
meta[FIRST_SEEN_MS],
|
||||
meta[LAST_SEEN_MS],
|
||||
meta[LAST_REUSE_MS],
|
||||
meta[FIRST_REQUEST_ID],
|
||||
meta[LAST_REQUEST_ID],
|
||||
meta[LAST_REUSE_REQUEST_ID],
|
||||
meta[REUSE_COUNT],
|
||||
)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.state)
|
||||
|
||||
|
||||
def _normalize_source_row(row):
|
||||
meta = row.get("meta", {}) if isinstance(row.get("meta", {}), dict) else {}
|
||||
declared_tools = row.get("declared_tools", [])
|
||||
raw_messages = row.get("raw_messages", [])
|
||||
return {
|
||||
"meta": meta,
|
||||
"declared_tools": [tool for tool in declared_tools if isinstance(tool, dict)],
|
||||
"raw_messages": [message for message in raw_messages if isinstance(message, dict)],
|
||||
}
|
||||
|
||||
|
||||
def _read_source_minimal(path):
|
||||
with open(path, "r", encoding="utf-8") as handle:
|
||||
for line in handle:
|
||||
row = _normalize_source_row(json.loads(line))
|
||||
meta = row["meta"]
|
||||
yield {
|
||||
"request_id": meta["request_id"],
|
||||
"session_id": meta["session_id"],
|
||||
"request_ready_time_ms": meta["request_ready_time_ms"],
|
||||
"request_end_time_ms": meta["request_end_time_ms"],
|
||||
"declared_tool_names": [
|
||||
tool["name"] for tool in row.get("declared_tools", []) if tool.get("name")
|
||||
],
|
||||
"raw_messages": row["raw_messages"],
|
||||
}
|
||||
|
||||
|
||||
def _count_child_refs_by_chat_id(path, limit=None):
|
||||
counts = Counter()
|
||||
for index, row in enumerate(_iter_release_rows(path), start=1):
|
||||
if limit is not None and index > limit:
|
||||
break
|
||||
parent_chat_id = int(row.get("parent_chat_id", -1) or -1)
|
||||
if parent_chat_id != -1:
|
||||
counts[parent_chat_id] += 1
|
||||
return counts
|
||||
|
||||
|
||||
def _new_block_meta(request_id, ready_ms):
|
||||
return [ready_ms, ready_ms, 0, request_id, request_id, "", 0]
|
||||
|
||||
|
||||
def _build_alive_block_timeline_from_events(events):
|
||||
alive_rows = []
|
||||
alive_count = 0
|
||||
peak_alive_blocks = 0
|
||||
for timestamp_ms in sorted(events):
|
||||
alive_count += events[timestamp_ms]
|
||||
peak_alive_blocks = max(peak_alive_blocks, alive_count)
|
||||
alive_rows.append(
|
||||
{
|
||||
"timestamp_ms": timestamp_ms,
|
||||
"delta_alive_blocks": events[timestamp_ms],
|
||||
"alive_block_count": alive_count,
|
||||
}
|
||||
)
|
||||
return {
|
||||
"peak_alive_blocks": peak_alive_blocks,
|
||||
"event_count": len(alive_rows),
|
||||
}, alive_rows
|
||||
|
||||
|
||||
def _compute_prefix_hits(
|
||||
global_store,
|
||||
bucket_store,
|
||||
*,
|
||||
hash_ids,
|
||||
request_id,
|
||||
ready_ms,
|
||||
reuse_gap_counts=None,
|
||||
):
|
||||
global_prefix_active = True
|
||||
bucket_prefix_active = True
|
||||
global_prefix_match_blocks = 0
|
||||
bucket_prefix_match_blocks = 0
|
||||
global_source_request_id = ""
|
||||
bucket_source_request_id = ""
|
||||
|
||||
for block_id in hash_ids:
|
||||
global_meta = global_store.get(block_id)
|
||||
if global_meta is not None and global_prefix_active:
|
||||
global_prefix_match_blocks += 1
|
||||
global_source_request_id = global_meta[LAST_REQUEST_ID]
|
||||
if reuse_gap_counts is not None:
|
||||
reuse_gap_counts[max(ready_ms - global_meta[LAST_SEEN_MS], 0)] += 1
|
||||
global_meta[LAST_REUSE_MS] = ready_ms
|
||||
global_meta[LAST_REUSE_REQUEST_ID] = request_id
|
||||
global_meta[REUSE_COUNT] += 1
|
||||
elif global_meta is None:
|
||||
global_prefix_active = False
|
||||
global_meta = _new_block_meta(request_id, ready_ms)
|
||||
else:
|
||||
global_prefix_active = False
|
||||
|
||||
global_meta[LAST_SEEN_MS] = ready_ms
|
||||
global_meta[LAST_REQUEST_ID] = request_id
|
||||
global_store.put(block_id, global_meta)
|
||||
|
||||
bucket_meta = bucket_store.get(block_id)
|
||||
if bucket_meta is not None and bucket_prefix_active:
|
||||
bucket_prefix_match_blocks += 1
|
||||
bucket_source_request_id = bucket_meta[LAST_REQUEST_ID]
|
||||
bucket_meta[LAST_REUSE_MS] = ready_ms
|
||||
bucket_meta[LAST_REUSE_REQUEST_ID] = request_id
|
||||
bucket_meta[REUSE_COUNT] += 1
|
||||
elif bucket_meta is None:
|
||||
bucket_prefix_active = False
|
||||
bucket_meta = _new_block_meta(request_id, ready_ms)
|
||||
else:
|
||||
bucket_prefix_active = False
|
||||
|
||||
bucket_meta[LAST_SEEN_MS] = ready_ms
|
||||
bucket_meta[LAST_REQUEST_ID] = request_id
|
||||
bucket_store.put(block_id, bucket_meta)
|
||||
|
||||
return (
|
||||
global_prefix_match_blocks,
|
||||
global_source_request_id,
|
||||
bucket_prefix_match_blocks,
|
||||
bucket_source_request_id,
|
||||
)
|
||||
|
||||
|
||||
def _iter_release_rows(path):
|
||||
with open(path, "r", encoding="utf-8") as handle:
|
||||
for line in handle:
|
||||
row = json.loads(line)
|
||||
yield {
|
||||
"chat_id": int(row.get("chat_id", -1) or -1),
|
||||
"parent_chat_id": int(row.get("parent_chat_id", -1) or -1),
|
||||
"timestamp": row.get("timestamp"),
|
||||
"turn": int(row.get("turn", 0) or 0),
|
||||
"type": row.get("type", ""),
|
||||
"input_length": int(row.get("input_length", 0) or 0),
|
||||
"output_length": int(row.get("output_length", 0) or 0),
|
||||
"hash_ids": [int(value) for value in row.get("hash_ids", [])],
|
||||
}
|
||||
|
||||
|
||||
def _message_signature(message: dict) -> str:
|
||||
return str(message.get("role", ""))
|
||||
|
||||
|
||||
def _common_prefix_message_count(previous_messages, current_messages):
|
||||
count = 0
|
||||
for previous, current in zip(previous_messages, current_messages):
|
||||
if _message_signature(previous) != _message_signature(current):
|
||||
break
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def _classify_trigger(previous_messages, current_messages):
|
||||
common_prefix_count = _common_prefix_message_count(previous_messages, current_messages)
|
||||
appended_messages = current_messages[common_prefix_count:]
|
||||
appended_message_count = len(appended_messages)
|
||||
last_role = str(current_messages[-1].get("role", "unknown")) if current_messages else "unknown"
|
||||
trigger_group = last_role
|
||||
trigger_detail = f"last_message_role={last_role}"
|
||||
|
||||
return {
|
||||
"common_prefix_message_count": common_prefix_count,
|
||||
"appended_message_count": appended_message_count,
|
||||
"first_new_role": str(appended_messages[0].get("role", "unknown")) if appended_messages else "",
|
||||
"trigger_group": trigger_group,
|
||||
"trigger_detail": trigger_detail,
|
||||
}
|
||||
|
||||
|
||||
def _bucket_definition_rows(bucket_defs):
|
||||
rows = []
|
||||
for bucket, lower_bound, upper_bound in bucket_defs:
|
||||
rows.append(
|
||||
{
|
||||
"bucket": bucket,
|
||||
"input_tokens_min_inclusive": lower_bound,
|
||||
"input_tokens_max_exclusive": upper_bound,
|
||||
}
|
||||
)
|
||||
return rows
|
||||
|
||||
|
||||
def _clear_details_dir(details_dir: Path) -> None:
|
||||
details_dir.mkdir(parents=True, exist_ok=True)
|
||||
for path in details_dir.iterdir():
|
||||
if path.is_file():
|
||||
path.unlink()
|
||||
|
||||
|
||||
def collect_existing_detail_paths(output_dir):
|
||||
details_dir = resolve_details_dir(output_dir)
|
||||
return {
|
||||
"details_dir": details_dir,
|
||||
"progress": details_dir / "progress.json",
|
||||
"request_metrics": details_dir / "request_metrics.csv",
|
||||
"theoretical_block_reuse_gaps": details_dir / "theoretical_block_reuse_gaps.csv",
|
||||
"theoretical_block_lifetimes": details_dir / "theoretical_block_lifetimes.csv",
|
||||
"theoretical_alive_block_timeline": details_dir / "theoretical_alive_block_timeline.csv",
|
||||
"session_bucket_boundary_miss": details_dir / "session_bucket_boundary_miss.csv",
|
||||
"details_summary": details_dir / DETAILS_SUMMARY_FILENAME,
|
||||
}
|
||||
|
||||
|
||||
def run_advanced_from_existing(
|
||||
source_path,
|
||||
release_path,
|
||||
features_path,
|
||||
output_dir,
|
||||
input_length_bucket_thresholds=None,
|
||||
show_progress=True,
|
||||
limit=None,
|
||||
):
|
||||
output_dir = Path(output_dir)
|
||||
details_dir = preferred_details_dir(output_dir)
|
||||
_clear_details_dir(details_dir)
|
||||
|
||||
source_path = Path(source_path)
|
||||
release_path = Path(release_path)
|
||||
features_path = Path(features_path)
|
||||
|
||||
total_requests = limit if limit is not None else _count_feature_rows(features_path)
|
||||
release_request_count = _count_lines(release_path)
|
||||
if limit is None and release_request_count != total_requests:
|
||||
raise ValueError(
|
||||
f"release/features row count mismatch: release={release_request_count} vs features={total_requests}"
|
||||
)
|
||||
|
||||
process = psutil.Process()
|
||||
peak_rss_mb = 0.0
|
||||
started_at = time.monotonic()
|
||||
bucket_defs = build_input_length_bucket_defs(input_length_bucket_thresholds)
|
||||
child_ref_counts = _count_child_refs_by_chat_id(release_path, limit=limit)
|
||||
|
||||
store = InMemoryBlockCache()
|
||||
bucket_stores = {bucket_label: InMemoryBlockCache() for bucket_label, _, _ in bucket_defs}
|
||||
progress_state_path = details_dir / "progress.json"
|
||||
next_progress_flush_at = started_at + PROGRESS_FLUSH_INTERVAL_S
|
||||
|
||||
request_metrics_path = details_dir / "request_metrics.csv"
|
||||
processed_requests = 0
|
||||
last_request_id = ""
|
||||
reuse_gap_counts = Counter()
|
||||
bucket_reused_block_totals = Counter()
|
||||
total_prompt_blocks = 0
|
||||
total_global_reused_blocks = 0
|
||||
session_last = {}
|
||||
chat_state_for_children = {}
|
||||
session_bucket_totals = {
|
||||
bucket_label: {
|
||||
"edge_count": 0,
|
||||
"reusable_edge_count": 0,
|
||||
"cross_bucket_edge_count": 0,
|
||||
"shared_prefix_units_sum": 0,
|
||||
"cross_bucket_shared_prefix_units_sum": 0,
|
||||
}
|
||||
for bucket_label, _, _ in bucket_defs
|
||||
}
|
||||
|
||||
with request_metrics_path.open("w", encoding="utf-8", newline="") as request_metrics_handle, features_path.open(
|
||||
"r", encoding="utf-8"
|
||||
) as features_handle:
|
||||
feature_reader = csv.DictReader(features_handle)
|
||||
source_iter = _read_source_minimal(source_path)
|
||||
release_iter = _iter_release_rows(release_path)
|
||||
if limit is not None:
|
||||
feature_reader = islice(feature_reader, limit)
|
||||
source_iter = islice(source_iter, limit)
|
||||
release_iter = islice(release_iter, limit)
|
||||
|
||||
request_metrics_writer = None
|
||||
progress = tqdm(
|
||||
total=total_requests,
|
||||
desc="Build details",
|
||||
unit="req",
|
||||
dynamic_ncols=True,
|
||||
disable=not show_progress,
|
||||
)
|
||||
last_progress_refresh_at = started_at
|
||||
try:
|
||||
for source_row, feature_row, release_row in zip(source_iter, feature_reader, release_iter):
|
||||
request_id = source_row["request_id"]
|
||||
session_id = source_row["session_id"]
|
||||
ready_ms = int(source_row["request_ready_time_ms"])
|
||||
end_ms = int(source_row["request_end_time_ms"])
|
||||
tool_names = source_row["declared_tool_names"]
|
||||
raw_messages = source_row["raw_messages"]
|
||||
hash_ids = release_row["hash_ids"]
|
||||
|
||||
release_input_length = int(release_row["input_length"])
|
||||
release_output_length = int(release_row["output_length"])
|
||||
feature_input_tokens = int(feature_row["input_tokens"])
|
||||
feature_output_tokens = int(feature_row["output_tokens"])
|
||||
if feature_input_tokens != release_input_length:
|
||||
raise ValueError(
|
||||
f"release/raw mismatch at request {request_id}: "
|
||||
f"features.input_tokens={feature_row['input_tokens']} vs release.input_length={release_input_length}"
|
||||
)
|
||||
if feature_output_tokens != release_output_length:
|
||||
raise ValueError(
|
||||
f"release/raw mismatch at request {request_id}: "
|
||||
f"features.output_tokens={feature_row['output_tokens']} vs release.output_length={release_output_length}"
|
||||
)
|
||||
|
||||
input_tokens = feature_input_tokens
|
||||
bucket_label = assign_input_length_bucket(input_tokens, bucket_defs)
|
||||
bucket_store = bucket_stores[bucket_label]
|
||||
(
|
||||
prefix_match_blocks,
|
||||
global_source_request_id,
|
||||
bucketed_prefix_match_blocks,
|
||||
bucketed_source_request_id,
|
||||
) = _compute_prefix_hits(
|
||||
store,
|
||||
bucket_store,
|
||||
hash_ids=hash_ids,
|
||||
request_id=request_id,
|
||||
ready_ms=ready_ms,
|
||||
reuse_gap_counts=reuse_gap_counts,
|
||||
)
|
||||
|
||||
prompt_block_count = len(hash_ids)
|
||||
theoretical_prefix_hit_ratio = prefix_match_blocks / prompt_block_count if prompt_block_count else 0.0
|
||||
bucketed_theoretical_prefix_hit_ratio = (
|
||||
bucketed_prefix_match_blocks / prompt_block_count if prompt_block_count else 0.0
|
||||
)
|
||||
|
||||
previous_session_state = session_last.get(session_id)
|
||||
trigger = _classify_trigger(
|
||||
previous_session_state["raw_messages"] if previous_session_state is not None else [],
|
||||
raw_messages,
|
||||
)
|
||||
|
||||
feature_row["request_ready_time_ms"] = ready_ms
|
||||
feature_row["request_end_time_ms"] = end_ms
|
||||
feature_row["turn"] = release_row["turn"]
|
||||
feature_row["chat_id"] = release_row["chat_id"]
|
||||
feature_row["parent_chat_id"] = release_row["parent_chat_id"]
|
||||
feature_row["trigger_group"] = trigger["trigger_group"]
|
||||
feature_row["trigger_detail"] = trigger["trigger_detail"]
|
||||
feature_row["first_new_role"] = trigger["first_new_role"]
|
||||
feature_row["common_prefix_message_count"] = trigger["common_prefix_message_count"]
|
||||
feature_row["appended_message_count"] = trigger["appended_message_count"]
|
||||
feature_row["input_length_bucket"] = bucket_label
|
||||
feature_row["declared_tool_names"] = ";".join(tool_names)
|
||||
feature_row["theoretical_prompt_unit_length"] = prompt_block_count
|
||||
feature_row["theoretical_prefix_hit_blocks"] = prefix_match_blocks
|
||||
feature_row["theoretical_prefix_hit_ratio"] = theoretical_prefix_hit_ratio
|
||||
feature_row["theoretical_source_request_id"] = global_source_request_id
|
||||
feature_row["bucketed_theoretical_prefix_hit_blocks"] = bucketed_prefix_match_blocks
|
||||
feature_row["bucketed_theoretical_prefix_hit_ratio"] = bucketed_theoretical_prefix_hit_ratio
|
||||
feature_row["bucketed_theoretical_source_request_id"] = bucketed_source_request_id
|
||||
feature_row["theoretical_bucket_boundary_loss_blocks"] = max(
|
||||
prefix_match_blocks - bucketed_prefix_match_blocks,
|
||||
0,
|
||||
)
|
||||
feature_row["theoretical_bucket_boundary_loss_ratio"] = (
|
||||
feature_row["theoretical_bucket_boundary_loss_blocks"] / prompt_block_count
|
||||
if prompt_block_count
|
||||
else 0.0
|
||||
)
|
||||
|
||||
if request_metrics_writer is None:
|
||||
request_metrics_writer = csv.DictWriter(
|
||||
request_metrics_handle,
|
||||
fieldnames=list(feature_row.keys()),
|
||||
)
|
||||
request_metrics_writer.writeheader()
|
||||
request_metrics_writer.writerow(feature_row)
|
||||
|
||||
chat_id = release_row["chat_id"]
|
||||
parent_chat_id = release_row["parent_chat_id"]
|
||||
if parent_chat_id != -1:
|
||||
parent_state = chat_state_for_children.get(parent_chat_id)
|
||||
if parent_state is not None:
|
||||
shared_prefix_units = 0
|
||||
for parent_block_id, child_block_id in zip(parent_state["hash_ids"], hash_ids):
|
||||
if parent_block_id != child_block_id:
|
||||
break
|
||||
shared_prefix_units += 1
|
||||
bucket_totals = session_bucket_totals[bucket_label]
|
||||
bucket_totals["edge_count"] += 1
|
||||
if shared_prefix_units > 0:
|
||||
bucket_totals["reusable_edge_count"] += 1
|
||||
if parent_state["bucket_label"] != bucket_label:
|
||||
bucket_totals["cross_bucket_edge_count"] += 1
|
||||
bucket_totals["cross_bucket_shared_prefix_units_sum"] += shared_prefix_units
|
||||
bucket_totals["shared_prefix_units_sum"] += shared_prefix_units
|
||||
|
||||
remaining_children = child_ref_counts.get(parent_chat_id, 0) - 1
|
||||
if remaining_children > 0:
|
||||
child_ref_counts[parent_chat_id] = remaining_children
|
||||
else:
|
||||
child_ref_counts.pop(parent_chat_id, None)
|
||||
chat_state_for_children.pop(parent_chat_id, None)
|
||||
|
||||
if chat_id != -1 and child_ref_counts.get(chat_id, 0) > 0:
|
||||
chat_state_for_children[chat_id] = {
|
||||
"bucket_label": bucket_label,
|
||||
"hash_ids": hash_ids,
|
||||
}
|
||||
|
||||
total_prompt_blocks += prompt_block_count
|
||||
total_global_reused_blocks += prefix_match_blocks
|
||||
bucket_reused_block_totals[bucket_label] += bucketed_prefix_match_blocks
|
||||
|
||||
session_last[session_id] = {
|
||||
"request_id": request_id,
|
||||
"request_ready_time_ms": ready_ms,
|
||||
"request_end_time_ms": end_ms,
|
||||
"raw_messages": raw_messages,
|
||||
}
|
||||
|
||||
processed_requests += 1
|
||||
last_request_id = request_id
|
||||
progress.update(1)
|
||||
|
||||
now = time.monotonic()
|
||||
should_refresh_progress = (
|
||||
processed_requests == 1
|
||||
or processed_requests % PROGRESS_REFRESH_INTERVAL_REQ == 0
|
||||
or now - last_progress_refresh_at >= PROGRESS_REFRESH_INTERVAL_S
|
||||
or processed_requests == total_requests
|
||||
)
|
||||
if should_refresh_progress:
|
||||
fraction_done = progress.n / progress.total if progress.total else 0.0
|
||||
elapsed_s = max(now - started_at, 1e-9)
|
||||
req_per_s = progress.n / elapsed_s
|
||||
eta_s = ((progress.total - progress.n) / req_per_s) if req_per_s > 0 and progress.total else 0.0
|
||||
total_bucket_state_count = sum(len(each_store) for each_store in bucket_stores.values())
|
||||
postfix, peak_rss_mb = _progress_postfix(
|
||||
process,
|
||||
peak_rss_mb,
|
||||
fraction_done,
|
||||
req_s=f"{req_per_s:.1f}",
|
||||
eta=_format_duration(eta_s),
|
||||
blocks=len(store),
|
||||
bucket_blocks=total_bucket_state_count,
|
||||
sessions=len(session_last),
|
||||
)
|
||||
progress.set_postfix(postfix)
|
||||
last_progress_refresh_at = now
|
||||
|
||||
if processed_requests and now >= next_progress_flush_at:
|
||||
current_rss_mb = process.memory_info().rss / (1024 * 1024)
|
||||
peak_rss_mb = max(peak_rss_mb, current_rss_mb)
|
||||
est_peak_mb = _estimate_peak_rss_mb(
|
||||
current_rss_mb,
|
||||
peak_rss_mb,
|
||||
(processed_requests / total_requests) if total_requests else 1.0,
|
||||
)
|
||||
_write_progress_state(
|
||||
progress_state_path,
|
||||
total_requests=total_requests,
|
||||
processed_requests=processed_requests,
|
||||
started_at=started_at,
|
||||
current_rss_mb=current_rss_mb,
|
||||
peak_rss_mb=peak_rss_mb,
|
||||
est_peak_mb=est_peak_mb,
|
||||
source_path=f"{source_path} + {release_path}",
|
||||
features_path=features_path,
|
||||
last_request_id=last_request_id,
|
||||
block_state_count=len(store),
|
||||
bucket_state_count=total_bucket_state_count,
|
||||
)
|
||||
next_progress_flush_at = now + PROGRESS_FLUSH_INTERVAL_S
|
||||
finally:
|
||||
progress.close()
|
||||
|
||||
theoretical_block_reuse_gaps_path = details_dir / "theoretical_block_reuse_gaps.csv"
|
||||
write_csv(
|
||||
theoretical_block_reuse_gaps_path,
|
||||
[
|
||||
{"reuse_gap_ms": reuse_gap_ms, "count": count}
|
||||
for reuse_gap_ms, count in sorted(reuse_gap_counts.items())
|
||||
],
|
||||
fieldnames=["reuse_gap_ms", "count"],
|
||||
)
|
||||
|
||||
theoretical_block_lifetimes_path = details_dir / "theoretical_block_lifetimes.csv"
|
||||
alive_block_events = Counter()
|
||||
block_lifetime_rows = []
|
||||
for (
|
||||
block_hash,
|
||||
first_seen_ms,
|
||||
last_seen_ms,
|
||||
last_reuse_ms,
|
||||
first_request_id,
|
||||
last_request_id_for_block,
|
||||
last_reuse_request_id,
|
||||
reuse_count,
|
||||
) in store.iter_blocks():
|
||||
lifecycle_end_ms = last_reuse_ms if reuse_count > 0 else first_seen_ms
|
||||
lifetime_ms = max(lifecycle_end_ms - first_seen_ms, 0)
|
||||
block_lifetime_rows.append(
|
||||
{
|
||||
"hash": block_hash,
|
||||
"first_request_id": first_request_id,
|
||||
"last_request_id": last_request_id_for_block,
|
||||
"first_seen_ms": first_seen_ms,
|
||||
"last_seen_ms": last_seen_ms,
|
||||
"last_reuse_ms": last_reuse_ms,
|
||||
"last_reuse_request_id": last_reuse_request_id,
|
||||
"reuse_count": reuse_count,
|
||||
"lifetime_ms": lifetime_ms,
|
||||
"span_end_ms": lifecycle_end_ms,
|
||||
"reused": 1 if reuse_count > 0 else 0,
|
||||
}
|
||||
)
|
||||
alive_block_events[first_seen_ms] += 1
|
||||
alive_block_events[lifecycle_end_ms + 1] -= 1
|
||||
write_csv(theoretical_block_lifetimes_path, block_lifetime_rows)
|
||||
|
||||
alive_block_timeline_summary, alive_block_timeline_rows = _build_alive_block_timeline_from_events(alive_block_events)
|
||||
theoretical_alive_block_timeline_path = details_dir / "theoretical_alive_block_timeline.csv"
|
||||
write_csv(theoretical_alive_block_timeline_path, alive_block_timeline_rows)
|
||||
|
||||
session_bucket_boundary_rows = []
|
||||
for bucket_label, _, _ in bucket_defs:
|
||||
bucket_totals = session_bucket_totals[bucket_label]
|
||||
total_bucket_reused_blocks = bucket_reused_block_totals[bucket_label]
|
||||
session_bucket_boundary_rows.append(
|
||||
{
|
||||
"bucket": bucket_label,
|
||||
"edge_count": bucket_totals["edge_count"],
|
||||
"reusable_edge_count": bucket_totals["reusable_edge_count"],
|
||||
"cross_bucket_edge_count": bucket_totals["cross_bucket_edge_count"],
|
||||
"cross_bucket_edge_fraction": (
|
||||
bucket_totals["cross_bucket_edge_count"] / bucket_totals["edge_count"]
|
||||
if bucket_totals["edge_count"]
|
||||
else 0.0
|
||||
),
|
||||
"shared_prefix_units_sum": bucket_totals["shared_prefix_units_sum"],
|
||||
"cross_bucket_shared_prefix_units_sum": bucket_totals["cross_bucket_shared_prefix_units_sum"],
|
||||
"cross_bucket_shared_prefix_unit_fraction": (
|
||||
bucket_totals["cross_bucket_shared_prefix_units_sum"] / bucket_totals["shared_prefix_units_sum"]
|
||||
if bucket_totals["shared_prefix_units_sum"]
|
||||
else 0.0
|
||||
),
|
||||
"bucket_total_reused_blocks": total_bucket_reused_blocks,
|
||||
"reduced_reused_blocks_ratio": (
|
||||
bucket_totals["cross_bucket_shared_prefix_units_sum"] / total_bucket_reused_blocks
|
||||
if total_bucket_reused_blocks
|
||||
else 0.0
|
||||
),
|
||||
}
|
||||
)
|
||||
session_bucket_boundary_miss_path = details_dir / "session_bucket_boundary_miss.csv"
|
||||
write_csv(session_bucket_boundary_miss_path, session_bucket_boundary_rows)
|
||||
|
||||
details_summary_path = details_dir / DETAILS_SUMMARY_FILENAME
|
||||
details_summary = {
|
||||
"schema_version": 3,
|
||||
"request_count": total_requests,
|
||||
"figure_count": 13,
|
||||
"cache_analysis_mode": "release_hash_ids",
|
||||
"release_path": str(release_path),
|
||||
"bucket_definition": {"buckets": _bucket_definition_rows(bucket_defs)},
|
||||
"global_prompt_blocks": total_prompt_blocks,
|
||||
"global_reused_blocks": total_global_reused_blocks,
|
||||
"global_reuse_ratio": (total_global_reused_blocks / total_prompt_blocks) if total_prompt_blocks else 0.0,
|
||||
"alive_block_timeline_summary": alive_block_timeline_summary,
|
||||
"detail_files": [
|
||||
"request_metrics.csv",
|
||||
"theoretical_block_reuse_gaps.csv",
|
||||
"theoretical_block_lifetimes.csv",
|
||||
"theoretical_alive_block_timeline.csv",
|
||||
"session_bucket_boundary_miss.csv",
|
||||
DETAILS_SUMMARY_FILENAME,
|
||||
"progress.json",
|
||||
],
|
||||
}
|
||||
details_summary_path.write_text(json.dumps(details_summary, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
current_rss_mb = process.memory_info().rss / (1024 * 1024)
|
||||
peak_rss_mb = max(peak_rss_mb, current_rss_mb)
|
||||
est_peak_mb = _estimate_peak_rss_mb(current_rss_mb, peak_rss_mb, 1.0)
|
||||
_write_progress_state(
|
||||
progress_state_path,
|
||||
total_requests=total_requests,
|
||||
processed_requests=processed_requests,
|
||||
started_at=started_at,
|
||||
current_rss_mb=current_rss_mb,
|
||||
peak_rss_mb=peak_rss_mb,
|
||||
est_peak_mb=est_peak_mb,
|
||||
source_path=f"{source_path} + {release_path}",
|
||||
features_path=features_path,
|
||||
last_request_id=last_request_id,
|
||||
block_state_count=len(store),
|
||||
bucket_state_count=sum(len(bucket_store) for bucket_store in bucket_stores.values()),
|
||||
)
|
||||
|
||||
return {
|
||||
"details_dir": details_dir,
|
||||
"progress": progress_state_path,
|
||||
"request_metrics": request_metrics_path,
|
||||
"theoretical_block_reuse_gaps": theoretical_block_reuse_gaps_path,
|
||||
"theoretical_block_lifetimes": theoretical_block_lifetimes_path,
|
||||
"theoretical_alive_block_timeline": theoretical_alive_block_timeline_path,
|
||||
"session_bucket_boundary_miss": session_bucket_boundary_miss_path,
|
||||
"details_summary": details_summary_path,
|
||||
}
|
||||
3264
trace_analyzer/study.py
Normal file
3264
trace_analyzer/study.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user