Fix retry syntax: async generator can't use return, use break+try/finally

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-23 22:37:32 +08:00
parent daeb95eca0
commit a7514fc3d5

View File

@@ -446,28 +446,24 @@ async def _handle_combined(api, req_data, token_ids, input_length, session_id, h
async def generate():
prefill_done = False
last_err = None
for attempt in range(MAX_STREAM_RETRIES):
try:
async with inst.client.stream("POST", api, json=req_data, headers=headers) as resp:
resp.raise_for_status()
async for chunk in resp.aiter_bytes():
if not prefill_done:
inst.pending_prefill_tokens -= estimated_new
inst.ongoing_decode_tokens += input_length
breakdown["t_first_token"] = _time.monotonic()
prefill_done = True
yield chunk
inst.record_prefix(token_ids)
return
except (httpx.ConnectError, httpx.RemoteProtocolError) as e:
last_err = e
if prefill_done:
raise
if attempt < MAX_STREAM_RETRIES - 1:
try:
for attempt in range(MAX_STREAM_RETRIES):
try:
async with inst.client.stream("POST", api, json=req_data, headers=headers) as resp:
resp.raise_for_status()
async for chunk in resp.aiter_bytes():
if not prefill_done:
inst.pending_prefill_tokens -= estimated_new
inst.ongoing_decode_tokens += input_length
breakdown["t_first_token"] = _time.monotonic()
prefill_done = True
yield chunk
inst.record_prefix(token_ids)
break
except (httpx.ConnectError, httpx.RemoteProtocolError):
if prefill_done or attempt >= MAX_STREAM_RETRIES - 1:
raise
await asyncio.sleep(RETRY_DELAY_S)
if last_err:
raise last_err
finally:
if not prefill_done:
inst.pending_prefill_tokens -= estimated_new