Tear down the engine on SIGTERM instead of orphaning it

Killing `study tune` with a default SIGTERM skipped the finally blocks, leaving the vLLM engine and its EngineCore workers (which inherit the AITUNER_* marker env) alive on the GPUs — twice leaking GPU memory that needed a root reset. Install a SIGTERM handler in run_trial that raises KeyboardInterrupt so _terminate_process_tree runs, ignore SIGTERM during teardown so a second signal can't re-orphan it, and restore the prior handler afterward. Main-thread-guarded; unit-tested. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 09:08:06 +08:00
parent 93ce339d61
commit b17b213575
2 changed files with 61 additions and 0 deletions
--- a/tests/test_core_flow.py
+++ b/tests/test_core_flow.py
@@ -55,6 +55,8 @@ from aituner.store import StudyStore
 from aituner.trace import load_trace_requests, summarize_window
 from aituner.worker import (
    _adaptive_replay_set,
+    _install_sigterm_as_keyboardinterrupt,
+    _restore_sigterm,
    _should_extend_on_boundary,
    _best_feasible_probe_record,
    _latency_summary,
@@ -589,6 +591,18 @@ class CoreFlowTests(unittest.TestCase):
        self.assertFalse(outcome.success)
        self.assertIn("timed out", outcome.error)

+    def test_sigterm_is_converted_to_keyboardinterrupt(self) -> None:
+        # So a killed `study tune` runs the engine-teardown finally instead of
+        # orphaning the vLLM EngineCore workers on the GPUs.
+        import signal as _signal
+
+        previous = _install_sigterm_as_keyboardinterrupt()
+        try:
+            with self.assertRaises(KeyboardInterrupt):
+                _signal.raise_signal(_signal.SIGTERM)
+        finally:
+            _restore_sigterm(previous)
+
    def test_lca_similarity_matrix_separates_different_profiles(self) -> None:
        window = WindowRecord(
            window_id="base",