Wrap socket/timeout errors in HTTP client as HttpClientError

stream_chat_completion (and the LLM stream/chat paths) only caught HTTPError, so a
request exceeding request_timeout_s raised a raw TimeoutError mid-stream that escaped
_run_one_request (which only catches HttpClientError), propagated through the probe,
and crashed the whole trial ("failed: timed out"). A timed-out request is a failed
request (SLO miss), not a trial crash. Catch OSError (covers TimeoutError, URLError,
ConnectionError) after HTTPError and wrap it. Exposed by lowering request_timeout_s
to 180s on the 27B run.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-15 22:58:28 +08:00
parent 3541065675
commit 2fcaf80450
2 changed files with 38 additions and 0 deletions

View File

@@ -179,6 +179,9 @@ def chat_completion(
except urllib.error.HTTPError as exc: except urllib.error.HTTPError as exc:
detail = exc.read().decode("utf-8", errors="replace") detail = exc.read().decode("utf-8", errors="replace")
raise HttpClientError(f"llm_completion failed: {exc.code} {detail}") from exc raise HttpClientError(f"llm_completion failed: {exc.code} {detail}") from exc
except OSError as exc:
# TimeoutError (socket.timeout), URLError, ConnectionError all subclass OSError.
raise HttpClientError(f"llm_completion failed: {exc}") from exc
def stream_text_completion( def stream_text_completion(
@@ -232,6 +235,8 @@ def stream_text_completion(
except urllib.error.HTTPError as exc: except urllib.error.HTTPError as exc:
detail = exc.read().decode("utf-8", errors="replace") detail = exc.read().decode("utf-8", errors="replace")
raise HttpClientError(f"stream_text_completion failed: {exc.code} {detail}") from exc raise HttpClientError(f"stream_text_completion failed: {exc.code} {detail}") from exc
except OSError as exc:
raise HttpClientError(f"stream_text_completion failed: {exc}") from exc
return "".join(parts) return "".join(parts)
@@ -293,6 +298,10 @@ def stream_chat_completion(
except urllib.error.HTTPError as exc: except urllib.error.HTTPError as exc:
detail = exc.read().decode("utf-8", errors="replace") detail = exc.read().decode("utf-8", errors="replace")
raise HttpClientError(f"stream_chat_completion failed: {exc.code} {detail}") from exc raise HttpClientError(f"stream_chat_completion failed: {exc.code} {detail}") from exc
except OSError as exc:
# A request that exceeds request_timeout_s raises TimeoutError mid-stream;
# treat it as a failed request (SLO miss), not a crashed trial.
raise HttpClientError(f"stream_chat_completion failed: {exc}") from exc
ttft_ms = None if first_token_at is None else (first_token_at - start) * 1000.0 ttft_ms = None if first_token_at is None else (first_token_at - start) * 1000.0
if completion_tokens is None and chunk_token_count > 0: if completion_tokens is None and chunk_token_count > 0:
completion_tokens = chunk_token_count completion_tokens = chunk_token_count

View File

@@ -16,6 +16,7 @@ from aituner.cli import main as cli_main
from aituner.compare import _aggregate_summary, load_compare_spec, run_compare from aituner.compare import _aggregate_summary, load_compare_spec, run_compare
from aituner.engine import build_launch_recipe from aituner.engine import build_launch_recipe
from aituner.http_client import ( from aituner.http_client import (
HttpClientError,
StreamMetrics, StreamMetrics,
_auth_headers, _auth_headers,
_openai_url, _openai_url,
@@ -560,6 +561,34 @@ class CoreFlowTests(unittest.TestCase):
self.assertTrue(ev(0, 3900).passed) self.assertTrue(ev(0, 3900).passed)
self.assertFalse(ev(0, 4100).passed) self.assertFalse(ev(0, 4100).passed)
def test_streaming_socket_timeout_is_a_failed_request_not_a_crash(self) -> None:
# A request that exceeds request_timeout_s raises TimeoutError mid-stream;
# it must surface as HttpClientError (a failed request), never escape to
# crash the trial.
with mock.patch(
"aituner.http_client._urlopen", side_effect=TimeoutError("timed out")
):
with self.assertRaises(HttpClientError):
stream_chat_completion(
base_url="http://127.0.0.1:1/v1",
body={"messages": [{"role": "user", "content": "hi"}], "stream": True},
timeout_s=0.5,
)
outcome = _run_one_request(
TraceRequest(
row_id="r",
arrival_s=0.0,
sampling_u=1.0,
body={"messages": [{"role": "user", "content": "hi"}], "stream": True},
prompt_tokens_hint=10,
completion_tokens_hint=None,
),
base_url="http://127.0.0.1:1/v1",
timeout_s=0.5,
)
self.assertFalse(outcome.success)
self.assertIn("timed out", outcome.error)
def test_lca_similarity_matrix_separates_different_profiles(self) -> None: def test_lca_similarity_matrix_separates_different_profiles(self) -> None:
window = WindowRecord( window = WindowRecord(
window_id="base", window_id="base",