Fix retry syntax: async generator can't use return, use break+try/finally
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -446,28 +446,24 @@ async def _handle_combined(api, req_data, token_ids, input_length, session_id, h
|
||||
|
||||
async def generate():
|
||||
prefill_done = False
|
||||
last_err = None
|
||||
for attempt in range(MAX_STREAM_RETRIES):
|
||||
try:
|
||||
async with inst.client.stream("POST", api, json=req_data, headers=headers) as resp:
|
||||
resp.raise_for_status()
|
||||
async for chunk in resp.aiter_bytes():
|
||||
if not prefill_done:
|
||||
inst.pending_prefill_tokens -= estimated_new
|
||||
inst.ongoing_decode_tokens += input_length
|
||||
breakdown["t_first_token"] = _time.monotonic()
|
||||
prefill_done = True
|
||||
yield chunk
|
||||
inst.record_prefix(token_ids)
|
||||
return
|
||||
except (httpx.ConnectError, httpx.RemoteProtocolError) as e:
|
||||
last_err = e
|
||||
if prefill_done:
|
||||
raise
|
||||
if attempt < MAX_STREAM_RETRIES - 1:
|
||||
try:
|
||||
for attempt in range(MAX_STREAM_RETRIES):
|
||||
try:
|
||||
async with inst.client.stream("POST", api, json=req_data, headers=headers) as resp:
|
||||
resp.raise_for_status()
|
||||
async for chunk in resp.aiter_bytes():
|
||||
if not prefill_done:
|
||||
inst.pending_prefill_tokens -= estimated_new
|
||||
inst.ongoing_decode_tokens += input_length
|
||||
breakdown["t_first_token"] = _time.monotonic()
|
||||
prefill_done = True
|
||||
yield chunk
|
||||
inst.record_prefix(token_ids)
|
||||
break
|
||||
except (httpx.ConnectError, httpx.RemoteProtocolError):
|
||||
if prefill_done or attempt >= MAX_STREAM_RETRIES - 1:
|
||||
raise
|
||||
await asyncio.sleep(RETRY_DELAY_S)
|
||||
if last_err:
|
||||
raise last_err
|
||||
finally:
|
||||
if not prefill_done:
|
||||
inst.pending_prefill_tokens -= estimated_new
|
||||
|
||||
Reference in New Issue
Block a user