Make early-stop engine relaunch opt-in
This commit is contained in:
@@ -578,8 +578,11 @@ def call_llm_for_proposal(
|
|||||||
) -> str:
|
) -> str:
|
||||||
if policy.endpoint is None:
|
if policy.endpoint is None:
|
||||||
raise RuntimeError("study.llm.endpoint is not configured")
|
raise RuntimeError("study.llm.endpoint is not configured")
|
||||||
|
last_error: Exception | None = None
|
||||||
|
for attempt in range(2):
|
||||||
|
try:
|
||||||
if policy.endpoint.stream:
|
if policy.endpoint.stream:
|
||||||
return stream_text_completion(
|
text = stream_text_completion(
|
||||||
base_url=policy.endpoint.base_url,
|
base_url=policy.endpoint.base_url,
|
||||||
api_key_env=policy.endpoint.api_key_env,
|
api_key_env=policy.endpoint.api_key_env,
|
||||||
provider=policy.endpoint.provider,
|
provider=policy.endpoint.provider,
|
||||||
@@ -590,6 +593,7 @@ def call_llm_for_proposal(
|
|||||||
system_prompt=policy.system_prompt,
|
system_prompt=policy.system_prompt,
|
||||||
reasoning_effort=policy.endpoint.reasoning_effort,
|
reasoning_effort=policy.endpoint.reasoning_effort,
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
response = chat_completion(
|
response = chat_completion(
|
||||||
base_url=policy.endpoint.base_url,
|
base_url=policy.endpoint.base_url,
|
||||||
api_key_env=policy.endpoint.api_key_env,
|
api_key_env=policy.endpoint.api_key_env,
|
||||||
@@ -601,4 +605,12 @@ def call_llm_for_proposal(
|
|||||||
system_prompt=policy.system_prompt,
|
system_prompt=policy.system_prompt,
|
||||||
reasoning_effort=policy.endpoint.reasoning_effort,
|
reasoning_effort=policy.endpoint.reasoning_effort,
|
||||||
)
|
)
|
||||||
return _extract_response_text(response)
|
text = _extract_response_text(response)
|
||||||
|
if text.strip():
|
||||||
|
return text
|
||||||
|
last_error = RuntimeError("LLM response content is empty")
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
last_error = exc
|
||||||
|
if attempt == 0:
|
||||||
|
continue
|
||||||
|
raise RuntimeError(f"LLM proposal failed after retry: {last_error}") from last_error
|
||||||
|
|||||||
@@ -327,6 +327,7 @@ class TraceSpec:
|
|||||||
replay_time_scale: float = 1.0
|
replay_time_scale: float = 1.0
|
||||||
early_stop_max_lag_s: float | None = None
|
early_stop_max_lag_s: float | None = None
|
||||||
early_stop_max_elapsed_s: float | None = None
|
early_stop_max_elapsed_s: float | None = None
|
||||||
|
restart_engine_after_early_stop: bool = False
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_dict(cls, data: Mapping[str, Any]) -> "TraceSpec":
|
def from_dict(cls, data: Mapping[str, Any]) -> "TraceSpec":
|
||||||
@@ -389,6 +390,14 @@ class TraceSpec:
|
|||||||
if data.get("early_stop_max_elapsed_s") is not None
|
if data.get("early_stop_max_elapsed_s") is not None
|
||||||
else None
|
else None
|
||||||
),
|
),
|
||||||
|
restart_engine_after_early_stop=(
|
||||||
|
_require_bool(
|
||||||
|
data.get("restart_engine_after_early_stop"),
|
||||||
|
context="trace.restart_engine_after_early_stop",
|
||||||
|
)
|
||||||
|
if data.get("restart_engine_after_early_stop") is not None
|
||||||
|
else False
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -367,6 +367,7 @@ def run_trial(trial_spec_path: Path) -> dict[str, Any]:
|
|||||||
def evaluator(threshold: float) -> ThresholdProbe[ProbePayload]:
|
def evaluator(threshold: float) -> ThresholdProbe[ProbePayload]:
|
||||||
nonlocal process
|
nonlocal process
|
||||||
selected = select_requests_for_threshold(requests, threshold=threshold)
|
selected = select_requests_for_threshold(requests, threshold=threshold)
|
||||||
|
restart_after_early_stop = study.trace.restart_engine_after_early_stop
|
||||||
outcomes, early_stopped, early_stop_reason = _replay_requests(
|
outcomes, early_stopped, early_stop_reason = _replay_requests(
|
||||||
selected,
|
selected,
|
||||||
base_url=recipe.base_url,
|
base_url=recipe.base_url,
|
||||||
@@ -376,7 +377,7 @@ def run_trial(trial_spec_path: Path) -> dict[str, Any]:
|
|||||||
max_lag_s=study.trace.early_stop_max_lag_s,
|
max_lag_s=study.trace.early_stop_max_lag_s,
|
||||||
max_elapsed_s=study.trace.early_stop_max_elapsed_s,
|
max_elapsed_s=study.trace.early_stop_max_elapsed_s,
|
||||||
evaluate_outcome=lambda outcome: evaluate_request(outcome, study.slo),
|
evaluate_outcome=lambda outcome: evaluate_request(outcome, study.slo),
|
||||||
drain_inflight_on_early_stop=False,
|
drain_inflight_on_early_stop=not restart_after_early_stop,
|
||||||
)
|
)
|
||||||
evaluations, summary = summarize_evaluations(outcomes, study.slo)
|
evaluations, summary = summarize_evaluations(outcomes, study.slo)
|
||||||
request_rate = (
|
request_rate = (
|
||||||
@@ -423,7 +424,7 @@ def run_trial(trial_spec_path: Path) -> dict[str, Any]:
|
|||||||
}
|
}
|
||||||
probe_history.append(probe_record)
|
probe_history.append(probe_record)
|
||||||
StudyStore.write_json(Path(trial.probe_log_path), probe_history)
|
StudyStore.write_json(Path(trial.probe_log_path), probe_history)
|
||||||
if early_stopped:
|
if early_stopped and restart_after_early_stop:
|
||||||
_terminate_process_tree(process, timeout_s=30.0)
|
_terminate_process_tree(process, timeout_s=30.0)
|
||||||
process = launch_process()
|
process = launch_process()
|
||||||
_wait_for_server_or_exit(
|
_wait_for_server_or_exit(
|
||||||
|
|||||||
Reference in New Issue
Block a user