256 lines
9.8 KiB
Python
256 lines
9.8 KiB
Python
#!/usr/bin/env python3
|
|
"""Aggregate ReplayServe Frontier run directories into CSV and Markdown."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import csv
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
|
|
REPLAYSERVE_ROOT = Path(__file__).resolve().parents[1]
|
|
|
|
|
|
FIELDNAMES = [
|
|
"suite_id",
|
|
"sim",
|
|
"fixture",
|
|
"config_id",
|
|
"status",
|
|
"exit_code",
|
|
"runtime_seconds",
|
|
"frontier_mode",
|
|
"frontier_head",
|
|
"frontier_dirty",
|
|
"attn_tp",
|
|
"attn_dp",
|
|
"moe_tp",
|
|
"moe_ep",
|
|
"batch_size_cap",
|
|
"max_tokens_in_batch",
|
|
"block_size",
|
|
"enable_prefix_caching",
|
|
"enable_chunked_prefill",
|
|
"long_prefill_token_threshold",
|
|
"frontier_block_hit_ratio",
|
|
"replayserve_token_hit_ratio",
|
|
"cache_metrics_available",
|
|
"cache_metrics_unavailable_reason",
|
|
"cache_metric_rows_complete",
|
|
"cache_metric_rows_total",
|
|
"cache_metric_rows_missing",
|
|
"completion_is_complete",
|
|
"missing_latency_request_ids",
|
|
"preemption_events",
|
|
"preempted_requests",
|
|
"ttft_mean_ms",
|
|
"ttft_p50_ms",
|
|
"ttft_p95_ms",
|
|
"tpot_mean_ms",
|
|
"tpot_p50_ms",
|
|
"tpot_p95_ms",
|
|
"e2e_mean_ms",
|
|
"e2e_p50_ms",
|
|
"e2e_p95_ms",
|
|
"requests_per_second",
|
|
"tokens_per_second",
|
|
"decode_tokens_per_second",
|
|
"completed_requests",
|
|
"total_requests",
|
|
"run_dir",
|
|
]
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(description="Aggregate ReplayServe run outputs.")
|
|
parser.add_argument("suite_dir", type=Path, help="Run suite directory.")
|
|
parser.add_argument(
|
|
"--output-csv",
|
|
type=Path,
|
|
help="Output CSV path. Defaults to <suite_dir>/summary.csv.",
|
|
)
|
|
parser.add_argument(
|
|
"--output-md",
|
|
type=Path,
|
|
help="Output Markdown path. Defaults to <suite_dir>/summary.md.",
|
|
)
|
|
return parser.parse_args()
|
|
|
|
|
|
def load_json(path: Path) -> dict[str, Any]:
|
|
if not path.exists():
|
|
return {}
|
|
with path.open("r", encoding="utf-8") as handle:
|
|
data = json.load(handle)
|
|
return data if isinstance(data, dict) else {}
|
|
|
|
|
|
def read_int(path: Path) -> int | None:
|
|
try:
|
|
return int(path.read_text(encoding="utf-8").strip())
|
|
except (FileNotFoundError, ValueError):
|
|
return None
|
|
|
|
|
|
def nested(data: dict[str, Any], *keys: str) -> Any:
|
|
value: Any = data
|
|
for key in keys:
|
|
if not isinstance(value, dict):
|
|
return None
|
|
value = value.get(key)
|
|
return value
|
|
|
|
|
|
def fmt(value: Any) -> str:
|
|
if value is None:
|
|
return ""
|
|
if isinstance(value, bool):
|
|
return "true" if value else "false"
|
|
if isinstance(value, float):
|
|
return f"{value:.8g}"
|
|
return str(value)
|
|
|
|
|
|
def summarize_run(run_dir: Path) -> dict[str, Any]:
|
|
manifest = load_json(run_dir / "run_manifest.json")
|
|
status_json = load_json(run_dir / "run_status.json")
|
|
post = load_json(run_dir / "postprocess_summary.json")
|
|
system_metrics_path = post.get("system_metrics") if post else None
|
|
system_metrics = load_json(Path(system_metrics_path)) if system_metrics_path else {}
|
|
knobs = manifest.get("knobs", {}) if isinstance(manifest.get("knobs"), dict) else {}
|
|
frontier = manifest.get("frontier", {}) if isinstance(manifest.get("frontier"), dict) else {}
|
|
prefix = post.get("prefix_cache_postprocess", {}) if isinstance(post.get("prefix_cache_postprocess"), dict) else {}
|
|
frontier_block = prefix.get("frontier_block_level", {}) if isinstance(prefix.get("frontier_block_level"), dict) else {}
|
|
token_weighted = prefix.get("replayserve_token_weighted", {}) if isinstance(prefix.get("replayserve_token_weighted"), dict) else {}
|
|
missing_rows = prefix.get("rows_with_missing_cache_metrics") or []
|
|
if not isinstance(missing_rows, list):
|
|
missing_rows = []
|
|
preemption = post.get("preemption_statistics", {}) if isinstance(post.get("preemption_statistics"), dict) else {}
|
|
completion = post.get("completion", {}) if isinstance(post.get("completion"), dict) else {}
|
|
simulation = system_metrics.get("simulation_metadata", {}) if isinstance(system_metrics.get("simulation_metadata"), dict) else {}
|
|
throughput = system_metrics.get("throughput_metrics", {}) if isinstance(system_metrics.get("throughput_metrics"), dict) else {}
|
|
|
|
exit_code = status_json.get("exit_code")
|
|
if exit_code is None:
|
|
exit_code = read_int(run_dir / "exit_code.txt")
|
|
runtime = status_json.get("runtime_seconds")
|
|
if runtime is None:
|
|
runtime = read_int(run_dir / "runtime_seconds.txt")
|
|
status = status_json.get("status") or ("pass" if exit_code == 0 else "fail")
|
|
if completion and not completion.get("is_complete", True):
|
|
status = "incomplete"
|
|
missing_latency_ids = completion.get("missing_latency_request_ids") or []
|
|
if not isinstance(missing_latency_ids, list):
|
|
missing_latency_ids = []
|
|
|
|
return {
|
|
"suite_id": manifest.get("suite_id"),
|
|
"sim": manifest.get("sim"),
|
|
"fixture": manifest.get("fixture"),
|
|
"config_id": manifest.get("config_id"),
|
|
"status": status,
|
|
"exit_code": exit_code,
|
|
"runtime_seconds": runtime,
|
|
"frontier_mode": frontier.get("mode"),
|
|
"frontier_head": frontier.get("head"),
|
|
"frontier_dirty": bool((frontier.get("status_short") or "").strip()),
|
|
"attn_tp": knobs.get("attn_tensor_parallel_size"),
|
|
"attn_dp": knobs.get("attn_data_parallel_size"),
|
|
"moe_tp": knobs.get("moe_tensor_parallel_size"),
|
|
"moe_ep": knobs.get("moe_expert_parallel_size"),
|
|
"batch_size_cap": knobs.get("batch_size_cap"),
|
|
"max_tokens_in_batch": knobs.get("max_tokens_in_batch"),
|
|
"block_size": knobs.get("block_size"),
|
|
"enable_prefix_caching": knobs.get("enable_prefix_caching"),
|
|
"enable_chunked_prefill": knobs.get("enable_chunked_prefill"),
|
|
"long_prefill_token_threshold": knobs.get("long_prefill_token_threshold"),
|
|
"frontier_block_hit_ratio": frontier_block.get("hit_ratio"),
|
|
"replayserve_token_hit_ratio": token_weighted.get("hit_ratio"),
|
|
"cache_metrics_available": prefix.get("available"),
|
|
"cache_metrics_unavailable_reason": prefix.get("reason"),
|
|
"cache_metric_rows_complete": prefix.get("completed_request_rows"),
|
|
"cache_metric_rows_total": prefix.get("total_request_metric_rows"),
|
|
"cache_metric_rows_missing": len(missing_rows),
|
|
"completion_is_complete": completion.get("is_complete"),
|
|
"missing_latency_request_ids": ",".join(str(value) for value in missing_latency_ids),
|
|
"preemption_events": preemption.get("total_preemption_events"),
|
|
"preempted_requests": preemption.get("total_preempted_requests"),
|
|
"ttft_mean_ms": nested(system_metrics, "ttft_statistics", "mean"),
|
|
"ttft_p50_ms": nested(system_metrics, "ttft_statistics", "p50"),
|
|
"ttft_p95_ms": nested(system_metrics, "ttft_statistics", "p95"),
|
|
"tpot_mean_ms": nested(system_metrics, "tpot_statistics", "mean"),
|
|
"tpot_p50_ms": nested(system_metrics, "tpot_statistics", "p50"),
|
|
"tpot_p95_ms": nested(system_metrics, "tpot_statistics", "p95"),
|
|
"e2e_mean_ms": nested(system_metrics, "request_e2e_time_statistics", "mean"),
|
|
"e2e_p50_ms": nested(system_metrics, "request_e2e_time_statistics", "p50"),
|
|
"e2e_p95_ms": nested(system_metrics, "request_e2e_time_statistics", "p95"),
|
|
"requests_per_second": throughput.get("requests_per_second"),
|
|
"tokens_per_second": throughput.get("tokens_per_second"),
|
|
"decode_tokens_per_second": throughput.get("decode_tokens_per_second"),
|
|
"completed_requests": simulation.get("completed_requests"),
|
|
"total_requests": simulation.get("total_requests"),
|
|
"run_dir": str(run_dir),
|
|
}
|
|
|
|
|
|
def write_csv(path: Path, rows: list[dict[str, Any]]) -> None:
|
|
with path.open("w", encoding="utf-8", newline="") as handle:
|
|
writer = csv.DictWriter(handle, fieldnames=FIELDNAMES)
|
|
writer.writeheader()
|
|
for row in rows:
|
|
writer.writerow({key: fmt(row.get(key)) for key in FIELDNAMES})
|
|
|
|
|
|
def write_markdown(path: Path, rows: list[dict[str, Any]], suite_dir: Path) -> None:
|
|
columns = [
|
|
"config_id",
|
|
"fixture",
|
|
"status",
|
|
"runtime_seconds",
|
|
"enable_prefix_caching",
|
|
"enable_chunked_prefill",
|
|
"frontier_block_hit_ratio",
|
|
"replayserve_token_hit_ratio",
|
|
"cache_metric_rows_missing",
|
|
"completion_is_complete",
|
|
"preemption_events",
|
|
"ttft_mean_ms",
|
|
"tpot_mean_ms",
|
|
"e2e_mean_ms",
|
|
"tokens_per_second",
|
|
]
|
|
with path.open("w", encoding="utf-8") as handle:
|
|
handle.write(f"# Sweep Summary: {suite_dir.name}\n\n")
|
|
handle.write(f"- Suite dir: `{suite_dir}`\n")
|
|
handle.write(f"- Runs: `{len(rows)}`\n\n")
|
|
handle.write("| " + " | ".join(columns) + " |\n")
|
|
handle.write("|" + "|".join(["---"] * len(columns)) + "|\n")
|
|
for row in rows:
|
|
handle.write("| " + " | ".join(fmt(row.get(col)) for col in columns) + " |\n")
|
|
handle.write("\n")
|
|
handle.write(
|
|
"Latency and throughput values are Frontier smoke outputs from the "
|
|
"configured predictor/profile mode. RS3 tiny smoke uses dummy execution "
|
|
"time, so these are harness plumbing checks, not performance claims.\n"
|
|
)
|
|
|
|
|
|
def main() -> int:
|
|
args = parse_args()
|
|
suite_dir = args.suite_dir.resolve()
|
|
run_dirs = sorted(path.parent for path in suite_dir.glob("**/run_manifest.json"))
|
|
rows = [summarize_run(path) for path in run_dirs]
|
|
output_csv = args.output_csv or (suite_dir / "summary.csv")
|
|
output_md = args.output_md or (suite_dir / "summary.md")
|
|
write_csv(output_csv, rows)
|
|
write_markdown(output_md, rows, suite_dir)
|
|
print(f"wrote {output_csv}")
|
|
print(f"wrote {output_md}")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|