From 2c7f7fdaaee966d3291659f2158e7f18fa015a0f Mon Sep 17 00:00:00 2001 From: Gahow Wang Date: Sat, 23 May 2026 21:02:26 +0800 Subject: [PATCH] replayer: restore optional max_inflight_sessions for backwards compat Co-Authored-By: Claude Opus 4.6 (1M context) --- replayer/replay.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/replayer/replay.py b/replayer/replay.py index 13f336a..3da2a9d 100644 --- a/replayer/replay.py +++ b/replayer/replay.py @@ -59,6 +59,7 @@ class ReplayConfig: request_timeout_s: float = 600.0 request_limit: int | None = None model_name: str = "default" + max_inflight_sessions: int | None = None # cap on concurrent sessions; None = unlimited def _build_prompt_token_ids(req: TraceRequest) -> list[int]: @@ -215,21 +216,28 @@ async def _run_session( earliest_ts: float, sweep_start: float, sink: IncrementalMetricSink, + session_sem: asyncio.Semaphore | None = None, ) -> list[RequestMetrics]: - for req in state.turns: - # Wait until this request's trace timestamp - target_wall = (req.timestamp_s - earliest_ts) - elapsed = time.perf_counter() - sweep_start - if elapsed < target_wall: - await asyncio.sleep(target_wall - elapsed) + if session_sem is not None: + await session_sem.acquire() + try: + for req in state.turns: + # Wait until this request's trace timestamp + target_wall = (req.timestamp_s - earliest_ts) + elapsed = time.perf_counter() - sweep_start + if elapsed < target_wall: + await asyncio.sleep(target_wall - elapsed) - token_ids = _build_prompt_token_ids(req) - metric = await _dispatch_request( - client=client, config=config, req=req, - prompt_token_ids=token_ids, sem=request_sem, - ) - state.metrics.append(metric) - await sink.append(metric) + token_ids = _build_prompt_token_ids(req) + metric = await _dispatch_request( + client=client, config=config, req=req, + prompt_token_ids=token_ids, sem=request_sem, + ) + state.metrics.append(metric) + await sink.append(metric) + finally: + if session_sem is not None: + session_sem.release() return state.metrics