A1: replayer instrumentation for cross-process join

RequestMetrics gains absolute unix timestamps (t_dispatch_unix,
t_first_token_unix, t_finish_unix), the proxy_request_id, the chosen
endpoint URL, and the trace hash_ids. Replayer sends
X-Request-Id: <session_id>:<turn_id>:<chat_id>:<idx> so proxy
breakdown rows can be joined to metrics by exact key.

Required by Batch 0 (online sequentiality proof) and Batch 1 reuse
decomposition; existing metrics.jsonl couldn't establish either.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-25 16:18:52 +08:00
parent e5761fa6f3
commit d57e338366
3 changed files with 96 additions and 1 deletions

View File

@@ -125,6 +125,8 @@ async def _dispatch_request(
}
start = time.perf_counter()
t_dispatch_unix = time.time()
t_first_token_unix: float | None = None
ttft_s = None
n_output = 0
cached_tokens = 0
@@ -133,7 +135,10 @@ async def _dispatch_request(
token_times: list[float] = []
output_token_ids: list[int] = []
req_headers = {"X-Session-Id": req.session_id}
req_headers = {
"X-Session-Id": req.session_id,
"X-Request-Id": req.request_id,
}
async with sem:
try:
@@ -169,11 +174,13 @@ async def _dispatch_request(
if clean_ids:
if ttft_s is None:
ttft_s = now - start
t_first_token_unix = time.time()
output_token_ids.extend(clean_ids)
token_times.extend([now] * len(clean_ids))
elif delta:
if ttft_s is None:
ttft_s = now - start
t_first_token_unix = time.time()
token_times.append(now)
fr = choices[0].get("finish_reason")
if fr:
@@ -187,6 +194,7 @@ async def _dispatch_request(
err = repr(exc)[:300]
end = time.perf_counter()
t_finish_unix = time.time()
e2e = end - start
if output_token_ids:
n_output = len(output_token_ids)
@@ -222,6 +230,12 @@ async def _dispatch_request(
requested_output_tokens=req.output_length,
finish_reason=finish_reason,
error=err,
t_dispatch_unix=t_dispatch_unix,
t_first_token_unix=t_first_token_unix,
t_finish_unix=t_finish_unix,
proxy_request_id=req.request_id,
endpoint_url=endpoint,
trace_hash_ids=req.hash_ids,
),
output_token_ids=output_token_ids,
)