Add deeper infeasible probe diagnostics

This commit is contained in:
2026-04-05 01:44:38 +08:00
parent 0aa607a4f1
commit 84c5d6bd80
5 changed files with 249 additions and 3 deletions

View File

@@ -12,11 +12,16 @@ from aituner.http_client import _openai_url, _should_bypass_proxy
from aituner.job import append_job, build_trial_job
from aituner.llm import build_prompt, parse_proposal_text
from aituner.search import ThresholdProbe, binary_search_max_feasible
from aituner.slo import RequestOutcome, summarize_evaluations
from aituner.slo import RequestOutcome, evaluate_request, summarize_evaluations
from aituner.spec import Proposal, StudyState, TrialSummary, load_study_spec
from aituner.store import StudyStore
from aituner.trace import load_trace_requests, summarize_window
from aituner.worker import _replay_requests, _terminate_process_tree, _wait_for_server_or_exit
from aituner.worker import (
_latency_summary,
_replay_requests,
_terminate_process_tree,
_wait_for_server_or_exit,
)
from aituner.trace import TraceRequest
@@ -406,6 +411,31 @@ class CoreFlowTests(unittest.TestCase):
self.assertGreaterEqual(result.best_threshold, 0.5)
self.assertIsNotNone(result.best_feasible_payload)
def test_binary_search_continues_below_tolerance_when_all_infeasible(self) -> None:
seen = []
def evaluator(threshold):
seen.append(threshold)
return ThresholdProbe(
threshold=threshold,
feasible=False,
payload={"threshold": threshold},
)
result = binary_search_max_feasible(
low=0.0,
high=1.0,
tolerance=0.1,
max_probes=6,
evaluator=evaluator,
)
self.assertIsNone(result.best_feasible_payload)
self.assertEqual(len(result.probes), 6)
self.assertEqual(
seen,
[0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625],
)
def test_trace_max_requests_uses_window_wide_downsample(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
@@ -899,6 +929,35 @@ class CoreFlowTests(unittest.TestCase):
self.assertEqual(len(replayed), 2)
self.assertEqual(replayed[1].error, "slo_pass_rate_unrecoverable")
def test_latency_summary_reports_quantiles_and_slo(self) -> None:
study = load_study_spec(_write_study_assets(Path(tempfile.mkdtemp())))
outcomes = [
RequestOutcome(
request_id="r1",
success=True,
ttft_ms=100.0,
tpot_ms=10.0,
prompt_tokens=100,
completion_tokens=10,
),
RequestOutcome(
request_id="r2",
success=True,
ttft_ms=200.0,
tpot_ms=20.0,
prompt_tokens=5000,
completion_tokens=10,
),
]
evaluations = [evaluate_request(item, study.slo) for item in outcomes]
summary = _latency_summary(outcomes=outcomes, evaluations=evaluations, study=study)
self.assertEqual(summary["observed_request_count"], 2)
self.assertEqual(summary["ttft_ms"]["mean"], 150.0)
self.assertEqual(summary["ttft_ms"]["p50"], 100.0)
self.assertEqual(summary["ttft_ms"]["p99"], 200.0)
self.assertEqual(summary["tpot_ms"]["mean"], 15.0)
self.assertEqual(summary["slo"]["target_pass_rate"], 0.95)
def test_wait_for_server_or_exit_fails_fast_when_process_exits(self) -> None:
process = mock.Mock()
process.poll.return_value = 17