Relaunch engine after early-stopped probes

This commit is contained in:
2026-04-26 00:32:39 +08:00
parent 440f5b491b
commit d76ac49198

View File

@@ -341,15 +341,18 @@ def run_trial(trial_spec_path: Path) -> dict[str, Any]:
artifact_dir.mkdir(parents=True, exist_ok=True)
engine_log_path = Path(trial.engine_log_path)
with engine_log_path.open("w", encoding="utf-8") as engine_log:
process = subprocess.Popen( # noqa: S603
recipe.argv,
cwd=recipe.cwd,
env=recipe.env,
stdout=engine_log,
stderr=subprocess.STDOUT,
text=True,
start_new_session=True,
)
def launch_process() -> subprocess.Popen[str]:
return subprocess.Popen( # noqa: S603
recipe.argv,
cwd=recipe.cwd,
env=recipe.env,
stdout=engine_log,
stderr=subprocess.STDOUT,
text=True,
start_new_session=True,
)
process = launch_process()
probe_history: list[dict[str, Any]] = []
failure_stage = "engine_launch"
try:
@@ -362,6 +365,7 @@ def run_trial(trial_spec_path: Path) -> dict[str, Any]:
failure_stage = "probe_search"
def evaluator(threshold: float) -> ThresholdProbe[ProbePayload]:
nonlocal process
selected = select_requests_for_threshold(requests, threshold=threshold)
outcomes, early_stopped, early_stop_reason = _replay_requests(
selected,
@@ -372,6 +376,7 @@ def run_trial(trial_spec_path: Path) -> dict[str, Any]:
max_lag_s=study.trace.early_stop_max_lag_s,
max_elapsed_s=study.trace.early_stop_max_elapsed_s,
evaluate_outcome=lambda outcome: evaluate_request(outcome, study.slo),
drain_inflight_on_early_stop=False,
)
evaluations, summary = summarize_evaluations(outcomes, study.slo)
request_rate = (
@@ -418,6 +423,15 @@ def run_trial(trial_spec_path: Path) -> dict[str, Any]:
}
probe_history.append(probe_record)
StudyStore.write_json(Path(trial.probe_log_path), probe_history)
if early_stopped:
_terminate_process_tree(process, timeout_s=30.0)
process = launch_process()
_wait_for_server_or_exit(
process,
base_url=recipe.base_url,
healthcheck_path=recipe.healthcheck_path,
ready_timeout_s=recipe.ready_timeout_s,
)
return ThresholdProbe(
threshold=threshold,
feasible=payload.feasible,