Make early-stop engine relaunch opt-in

2026-04-26 01:26:26 +08:00
parent d76ac49198
commit a53445868e
3 changed files with 48 additions and 26 deletions
--- a/src/aituner/llm.py
+++ b/src/aituner/llm.py
@@ -578,27 +578,39 @@ def call_llm_for_proposal(
 ) -> str:
    if policy.endpoint is None:
        raise RuntimeError("study.llm.endpoint is not configured")
-    if policy.endpoint.stream:
-        return stream_text_completion(
-            base_url=policy.endpoint.base_url,
-            api_key_env=policy.endpoint.api_key_env,
-            provider=policy.endpoint.provider,
-            wire_api=policy.endpoint.wire_api,
-            model=policy.endpoint.model,
-            messages=[{"role": "user", "content": prompt}],
-            timeout_s=policy.endpoint.timeout_s,
-            system_prompt=policy.system_prompt,
-            reasoning_effort=policy.endpoint.reasoning_effort,
-        )
-    response = chat_completion(
-        base_url=policy.endpoint.base_url,
-        api_key_env=policy.endpoint.api_key_env,
-        provider=policy.endpoint.provider,
-        wire_api=policy.endpoint.wire_api,
-        model=policy.endpoint.model,
-        messages=[{"role": "user", "content": prompt}],
-        timeout_s=policy.endpoint.timeout_s,
-        system_prompt=policy.system_prompt,
-        reasoning_effort=policy.endpoint.reasoning_effort,
-    )
-    return _extract_response_text(response)
+    last_error: Exception | None = None
+    for attempt in range(2):
+        try:
+            if policy.endpoint.stream:
+                text = stream_text_completion(
+                    base_url=policy.endpoint.base_url,
+                    api_key_env=policy.endpoint.api_key_env,
+                    provider=policy.endpoint.provider,
+                    wire_api=policy.endpoint.wire_api,
+                    model=policy.endpoint.model,
+                    messages=[{"role": "user", "content": prompt}],
+                    timeout_s=policy.endpoint.timeout_s,
+                    system_prompt=policy.system_prompt,
+                    reasoning_effort=policy.endpoint.reasoning_effort,
+                )
+            else:
+                response = chat_completion(
+                    base_url=policy.endpoint.base_url,
+                    api_key_env=policy.endpoint.api_key_env,
+                    provider=policy.endpoint.provider,
+                    wire_api=policy.endpoint.wire_api,
+                    model=policy.endpoint.model,
+                    messages=[{"role": "user", "content": prompt}],
+                    timeout_s=policy.endpoint.timeout_s,
+                    system_prompt=policy.system_prompt,
+                    reasoning_effort=policy.endpoint.reasoning_effort,
+                )
+                text = _extract_response_text(response)
+            if text.strip():
+                return text
+            last_error = RuntimeError("LLM response content is empty")
+        except Exception as exc:  # noqa: BLE001
+            last_error = exc
+        if attempt == 0:
+            continue
+    raise RuntimeError(f"LLM proposal failed after retry: {last_error}") from last_error
--- a/src/aituner/spec.py
+++ b/src/aituner/spec.py
@@ -327,6 +327,7 @@ class TraceSpec:
    replay_time_scale: float = 1.0
    early_stop_max_lag_s: float | None = None
    early_stop_max_elapsed_s: float | None = None
+    restart_engine_after_early_stop: bool = False

    @classmethod
    def from_dict(cls, data: Mapping[str, Any]) -> "TraceSpec":
@@ -389,6 +390,14 @@ class TraceSpec:
                if data.get("early_stop_max_elapsed_s") is not None
                else None
            ),
+            restart_engine_after_early_stop=(
+                _require_bool(
+                    data.get("restart_engine_after_early_stop"),
+                    context="trace.restart_engine_after_early_stop",
+                )
+                if data.get("restart_engine_after_early_stop") is not None
+                else False
+            ),
        )


--- a/src/aituner/worker.py
+++ b/src/aituner/worker.py
@@ -367,6 +367,7 @@ def run_trial(trial_spec_path: Path) -> dict[str, Any]:
            def evaluator(threshold: float) -> ThresholdProbe[ProbePayload]:
                nonlocal process
                selected = select_requests_for_threshold(requests, threshold=threshold)
+                restart_after_early_stop = study.trace.restart_engine_after_early_stop
                outcomes, early_stopped, early_stop_reason = _replay_requests(
                    selected,
                    base_url=recipe.base_url,
@@ -376,7 +377,7 @@ def run_trial(trial_spec_path: Path) -> dict[str, Any]:
                    max_lag_s=study.trace.early_stop_max_lag_s,
                    max_elapsed_s=study.trace.early_stop_max_elapsed_s,
                    evaluate_outcome=lambda outcome: evaluate_request(outcome, study.slo),
-                    drain_inflight_on_early_stop=False,
+                    drain_inflight_on_early_stop=not restart_after_early_stop,
                )
                evaluations, summary = summarize_evaluations(outcomes, study.slo)
                request_rate = (
@@ -423,7 +424,7 @@ def run_trial(trial_spec_path: Path) -> dict[str, Any]:
                }
                probe_history.append(probe_record)
                StudyStore.write_json(Path(trial.probe_log_path), probe_history)
-                if early_stopped:
+                if early_stopped and restart_after_early_stop:
                    _terminate_process_tree(process, timeout_s=30.0)
                    process = launch_process()
                    _wait_for_server_or_exit(