Add deeper infeasible probe diagnostics

2026-04-05 01:44:38 +08:00
parent 0aa607a4f1
commit 84c5d6bd80
5 changed files with 249 additions and 3 deletions
--- a/tests/test_core_flow.py
+++ b/tests/test_core_flow.py
@@ -12,11 +12,16 @@ from aituner.http_client import _openai_url, _should_bypass_proxy
 from aituner.job import append_job, build_trial_job
 from aituner.llm import build_prompt, parse_proposal_text
 from aituner.search import ThresholdProbe, binary_search_max_feasible
-from aituner.slo import RequestOutcome, summarize_evaluations
+from aituner.slo import RequestOutcome, evaluate_request, summarize_evaluations
 from aituner.spec import Proposal, StudyState, TrialSummary, load_study_spec
 from aituner.store import StudyStore
 from aituner.trace import load_trace_requests, summarize_window
-from aituner.worker import _replay_requests, _terminate_process_tree, _wait_for_server_or_exit
+from aituner.worker import (
+    _latency_summary,
+    _replay_requests,
+    _terminate_process_tree,
+    _wait_for_server_or_exit,
+)
 from aituner.trace import TraceRequest


@@ -406,6 +411,31 @@ class CoreFlowTests(unittest.TestCase):
        self.assertGreaterEqual(result.best_threshold, 0.5)
        self.assertIsNotNone(result.best_feasible_payload)

+    def test_binary_search_continues_below_tolerance_when_all_infeasible(self) -> None:
+        seen = []
+
+        def evaluator(threshold):
+            seen.append(threshold)
+            return ThresholdProbe(
+                threshold=threshold,
+                feasible=False,
+                payload={"threshold": threshold},
+            )
+
+        result = binary_search_max_feasible(
+            low=0.0,
+            high=1.0,
+            tolerance=0.1,
+            max_probes=6,
+            evaluator=evaluator,
+        )
+        self.assertIsNone(result.best_feasible_payload)
+        self.assertEqual(len(result.probes), 6)
+        self.assertEqual(
+            seen,
+            [0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625],
+        )
+
    def test_trace_max_requests_uses_window_wide_downsample(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            tmp_path = Path(tmp)
@@ -899,6 +929,35 @@ class CoreFlowTests(unittest.TestCase):
        self.assertEqual(len(replayed), 2)
        self.assertEqual(replayed[1].error, "slo_pass_rate_unrecoverable")

+    def test_latency_summary_reports_quantiles_and_slo(self) -> None:
+        study = load_study_spec(_write_study_assets(Path(tempfile.mkdtemp())))
+        outcomes = [
+            RequestOutcome(
+                request_id="r1",
+                success=True,
+                ttft_ms=100.0,
+                tpot_ms=10.0,
+                prompt_tokens=100,
+                completion_tokens=10,
+            ),
+            RequestOutcome(
+                request_id="r2",
+                success=True,
+                ttft_ms=200.0,
+                tpot_ms=20.0,
+                prompt_tokens=5000,
+                completion_tokens=10,
+            ),
+        ]
+        evaluations = [evaluate_request(item, study.slo) for item in outcomes]
+        summary = _latency_summary(outcomes=outcomes, evaluations=evaluations, study=study)
+        self.assertEqual(summary["observed_request_count"], 2)
+        self.assertEqual(summary["ttft_ms"]["mean"], 150.0)
+        self.assertEqual(summary["ttft_ms"]["p50"], 100.0)
+        self.assertEqual(summary["ttft_ms"]["p99"], 200.0)
+        self.assertEqual(summary["tpot_ms"]["mean"], 15.0)
+        self.assertEqual(summary["slo"]["target_pass_rate"], 0.95)
+
    def test_wait_for_server_or_exit_fails_fast_when_process_exits(self) -> None:
        process = mock.Mock()
        process.poll.return_value = 17