Make early-stop engine relaunch opt-in
This commit is contained in:
@@ -578,8 +578,11 @@ def call_llm_for_proposal(
|
||||
) -> str:
|
||||
if policy.endpoint is None:
|
||||
raise RuntimeError("study.llm.endpoint is not configured")
|
||||
last_error: Exception | None = None
|
||||
for attempt in range(2):
|
||||
try:
|
||||
if policy.endpoint.stream:
|
||||
return stream_text_completion(
|
||||
text = stream_text_completion(
|
||||
base_url=policy.endpoint.base_url,
|
||||
api_key_env=policy.endpoint.api_key_env,
|
||||
provider=policy.endpoint.provider,
|
||||
@@ -590,6 +593,7 @@ def call_llm_for_proposal(
|
||||
system_prompt=policy.system_prompt,
|
||||
reasoning_effort=policy.endpoint.reasoning_effort,
|
||||
)
|
||||
else:
|
||||
response = chat_completion(
|
||||
base_url=policy.endpoint.base_url,
|
||||
api_key_env=policy.endpoint.api_key_env,
|
||||
@@ -601,4 +605,12 @@ def call_llm_for_proposal(
|
||||
system_prompt=policy.system_prompt,
|
||||
reasoning_effort=policy.endpoint.reasoning_effort,
|
||||
)
|
||||
return _extract_response_text(response)
|
||||
text = _extract_response_text(response)
|
||||
if text.strip():
|
||||
return text
|
||||
last_error = RuntimeError("LLM response content is empty")
|
||||
except Exception as exc: # noqa: BLE001
|
||||
last_error = exc
|
||||
if attempt == 0:
|
||||
continue
|
||||
raise RuntimeError(f"LLM proposal failed after retry: {last_error}") from last_error
|
||||
|
||||
@@ -327,6 +327,7 @@ class TraceSpec:
|
||||
replay_time_scale: float = 1.0
|
||||
early_stop_max_lag_s: float | None = None
|
||||
early_stop_max_elapsed_s: float | None = None
|
||||
restart_engine_after_early_stop: bool = False
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Mapping[str, Any]) -> "TraceSpec":
|
||||
@@ -389,6 +390,14 @@ class TraceSpec:
|
||||
if data.get("early_stop_max_elapsed_s") is not None
|
||||
else None
|
||||
),
|
||||
restart_engine_after_early_stop=(
|
||||
_require_bool(
|
||||
data.get("restart_engine_after_early_stop"),
|
||||
context="trace.restart_engine_after_early_stop",
|
||||
)
|
||||
if data.get("restart_engine_after_early_stop") is not None
|
||||
else False
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -367,6 +367,7 @@ def run_trial(trial_spec_path: Path) -> dict[str, Any]:
|
||||
def evaluator(threshold: float) -> ThresholdProbe[ProbePayload]:
|
||||
nonlocal process
|
||||
selected = select_requests_for_threshold(requests, threshold=threshold)
|
||||
restart_after_early_stop = study.trace.restart_engine_after_early_stop
|
||||
outcomes, early_stopped, early_stop_reason = _replay_requests(
|
||||
selected,
|
||||
base_url=recipe.base_url,
|
||||
@@ -376,7 +377,7 @@ def run_trial(trial_spec_path: Path) -> dict[str, Any]:
|
||||
max_lag_s=study.trace.early_stop_max_lag_s,
|
||||
max_elapsed_s=study.trace.early_stop_max_elapsed_s,
|
||||
evaluate_outcome=lambda outcome: evaluate_request(outcome, study.slo),
|
||||
drain_inflight_on_early_stop=False,
|
||||
drain_inflight_on_early_stop=not restart_after_early_stop,
|
||||
)
|
||||
evaluations, summary = summarize_evaluations(outcomes, study.slo)
|
||||
request_rate = (
|
||||
@@ -423,7 +424,7 @@ def run_trial(trial_spec_path: Path) -> dict[str, Any]:
|
||||
}
|
||||
probe_history.append(probe_record)
|
||||
StudyStore.write_json(Path(trial.probe_log_path), probe_history)
|
||||
if early_stopped:
|
||||
if early_stopped and restart_after_early_stop:
|
||||
_terminate_process_tree(process, timeout_s=30.0)
|
||||
process = launch_process()
|
||||
_wait_for_server_or_exit(
|
||||
|
||||
Reference in New Issue
Block a user