Wrap socket/timeout errors in HTTP client as HttpClientError
stream_chat_completion (and the LLM stream/chat paths) only caught HTTPError, so a
request exceeding request_timeout_s raised a raw TimeoutError mid-stream that escaped
_run_one_request (which only catches HttpClientError), propagated through the probe,
and crashed the whole trial ("failed: timed out"). A timed-out request is a failed
request (SLO miss), not a trial crash. Catch OSError (covers TimeoutError, URLError,
ConnectionError) after HTTPError and wrap it. Exposed by lowering request_timeout_s
to 180s on the 27B run.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -16,6 +16,7 @@ from aituner.cli import main as cli_main
|
||||
from aituner.compare import _aggregate_summary, load_compare_spec, run_compare
|
||||
from aituner.engine import build_launch_recipe
|
||||
from aituner.http_client import (
|
||||
HttpClientError,
|
||||
StreamMetrics,
|
||||
_auth_headers,
|
||||
_openai_url,
|
||||
@@ -560,6 +561,34 @@ class CoreFlowTests(unittest.TestCase):
|
||||
self.assertTrue(ev(0, 3900).passed)
|
||||
self.assertFalse(ev(0, 4100).passed)
|
||||
|
||||
def test_streaming_socket_timeout_is_a_failed_request_not_a_crash(self) -> None:
|
||||
# A request that exceeds request_timeout_s raises TimeoutError mid-stream;
|
||||
# it must surface as HttpClientError (a failed request), never escape to
|
||||
# crash the trial.
|
||||
with mock.patch(
|
||||
"aituner.http_client._urlopen", side_effect=TimeoutError("timed out")
|
||||
):
|
||||
with self.assertRaises(HttpClientError):
|
||||
stream_chat_completion(
|
||||
base_url="http://127.0.0.1:1/v1",
|
||||
body={"messages": [{"role": "user", "content": "hi"}], "stream": True},
|
||||
timeout_s=0.5,
|
||||
)
|
||||
outcome = _run_one_request(
|
||||
TraceRequest(
|
||||
row_id="r",
|
||||
arrival_s=0.0,
|
||||
sampling_u=1.0,
|
||||
body={"messages": [{"role": "user", "content": "hi"}], "stream": True},
|
||||
prompt_tokens_hint=10,
|
||||
completion_tokens_hint=None,
|
||||
),
|
||||
base_url="http://127.0.0.1:1/v1",
|
||||
timeout_s=0.5,
|
||||
)
|
||||
self.assertFalse(outcome.success)
|
||||
self.assertIn("timed out", outcome.error)
|
||||
|
||||
def test_lca_similarity_matrix_separates_different_profiles(self) -> None:
|
||||
window = WindowRecord(
|
||||
window_id="base",
|
||||
|
||||
Reference in New Issue
Block a user