The old filter `if row.latency_s is not None` accepted SGLang's fast input-length-aborts (latency_s ~ 0.08s, finish_reason='abort/BadRequest') as if they were successful zero-cost requests. This deflated mean/p50 of any run where the model rejected oversized inputs. Impact on existing comparisons (ts=1 4-run validation + v2): KVC v2 has 40 aborts + 5 ReadTimeouts (was reported as just 5); DP 4w has 67 aborts (was reported as 5). Both runs have abort behavior; the asymmetry (40 vs 67) is purely from SGLang's mem-fraction-derived max-input-len: KVC decode-only worker gets ~10 GB free GPU mem -> max-input=92098, DP fused worker gets ~9 GB -> max-input=87811, because DP also needs chunked-prefill workspace. The KVC-vs-DP latency-win direction holds and widens slightly under the fixed filter (lat mean delta: -0.8% -> -1.4%); see V2_DEEP_ANALYSIS_ZH §4.3 for the recomputed table. Changes: - metrics.py: new _is_failed_request(row) helper; latency/ttft/tpot stats now exclude both errors and aborts. New summary fields abort_count and failure_count expose the counts directly. - scripts/analysis/recompute_summary.py: re-derives summary.json from existing metrics.jsonl using the fixed code, with optional --diff against the old buggy summary for inspection. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
106 lines
4.0 KiB
Python
106 lines
4.0 KiB
Python
#!/usr/bin/env python3
|
|
"""Re-derive summary.json from existing metrics.jsonl using the fixed metrics.py.
|
|
|
|
Bug fixed: requests aborted by SGLang (e.g. input > max-input-len returns
|
|
a fast 400 with latency_s ~ 0.08s) were previously counted in latency_stats
|
|
as if successful, deflating mean/p50/p90. The fixed metrics.py excludes
|
|
all failed requests (errors or aborts) from latency/ttft/tpot stats and
|
|
exposes abort_count / failure_count.
|
|
|
|
Usage:
|
|
python3 scripts/analysis/recompute_summary.py path/to/metrics.jsonl ...
|
|
python3 scripts/analysis/recompute_summary.py --diff path/to/metrics.jsonl path/to/old_summary.json
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
sys.path.insert(0, str(Path(__file__).resolve().parents[2] / "src"))
|
|
|
|
from agentic_pd_hybrid.metrics import RequestMetrics, write_summary_json
|
|
|
|
|
|
def load_rows(metrics_path: Path) -> list[RequestMetrics]:
|
|
rows = []
|
|
field_names = {f for f in RequestMetrics.__dataclass_fields__}
|
|
with metrics_path.open() as handle:
|
|
for line in handle:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
raw = json.loads(line)
|
|
kwargs = {k: raw.get(k) for k in field_names}
|
|
rows.append(RequestMetrics(**kwargs))
|
|
return rows
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("metrics_paths", nargs="+", type=Path)
|
|
parser.add_argument(
|
|
"--out",
|
|
type=Path,
|
|
default=None,
|
|
help="output summary path (default: alongside metrics with .recomputed_summary.json)",
|
|
)
|
|
parser.add_argument(
|
|
"--diff",
|
|
action="store_true",
|
|
help="print before/after diff against the old <metrics>.summary.json",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
for metrics_path in args.metrics_paths:
|
|
rows = load_rows(metrics_path)
|
|
out_path = args.out or metrics_path.with_suffix(".recomputed_summary.json")
|
|
write_summary_json(
|
|
out_path,
|
|
rows,
|
|
trace_path=metrics_path,
|
|
router_url=None,
|
|
)
|
|
new = json.load(out_path.open())
|
|
print(f"\n=== {metrics_path} ===")
|
|
print(f" written: {out_path}")
|
|
print(f" total rows: {new['request_count']}")
|
|
print(f" error_count: {new['error_count']}")
|
|
print(f" abort_count: {new.get('abort_count', '?')}")
|
|
print(f" failure_count: {new.get('failure_count', '?')}")
|
|
ls = new.get("latency_stats_s", {}) or {}
|
|
ts = new.get("ttft_stats_s", {}) or {}
|
|
print(f" lat: n={ls.get('count')} mean={ls.get('mean'):.4f} p50={ls.get('p50'):.4f} p90={ls.get('p90'):.4f} p99={ls.get('p99'):.4f}")
|
|
print(f" ttft: n={ts.get('count')} mean={ts.get('mean'):.4f} p50={ts.get('p50'):.4f} p90={ts.get('p90'):.4f} p99={ts.get('p99'):.4f}")
|
|
|
|
if args.diff:
|
|
# find old summary (sibling file)
|
|
candidates = [
|
|
metrics_path.parent / f"{metrics_path.stem}.summary.json",
|
|
metrics_path.with_suffix(".summary.json"),
|
|
]
|
|
old_path = next((p for p in candidates if p.exists()), None)
|
|
if old_path:
|
|
old = json.load(old_path.open())
|
|
print(f" vs old {old_path}:")
|
|
old_ls = old.get("latency_stats_s", {}) or {}
|
|
old_ts = old.get("ttft_stats_s", {}) or {}
|
|
for k in ("count", "mean", "p50", "p90", "p99"):
|
|
o = old_ls.get(k)
|
|
n = ls.get(k)
|
|
if o is not None and n is not None:
|
|
delta = n - o
|
|
print(f" lat.{k}: {o:.4f} -> {n:.4f} ({delta:+.4f})")
|
|
for k in ("count", "mean", "p50", "p90", "p99"):
|
|
o = old_ts.get(k)
|
|
n = ts.get(k)
|
|
if o is not None and n is not None:
|
|
delta = n - o
|
|
print(f" ttft.{k}: {o:.4f} -> {n:.4f} ({delta:+.4f})")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|