Add deeper infeasible probe diagnostics
This commit is contained in:
@@ -12,11 +12,16 @@ from aituner.http_client import _openai_url, _should_bypass_proxy
|
||||
from aituner.job import append_job, build_trial_job
|
||||
from aituner.llm import build_prompt, parse_proposal_text
|
||||
from aituner.search import ThresholdProbe, binary_search_max_feasible
|
||||
from aituner.slo import RequestOutcome, summarize_evaluations
|
||||
from aituner.slo import RequestOutcome, evaluate_request, summarize_evaluations
|
||||
from aituner.spec import Proposal, StudyState, TrialSummary, load_study_spec
|
||||
from aituner.store import StudyStore
|
||||
from aituner.trace import load_trace_requests, summarize_window
|
||||
from aituner.worker import _replay_requests, _terminate_process_tree, _wait_for_server_or_exit
|
||||
from aituner.worker import (
|
||||
_latency_summary,
|
||||
_replay_requests,
|
||||
_terminate_process_tree,
|
||||
_wait_for_server_or_exit,
|
||||
)
|
||||
from aituner.trace import TraceRequest
|
||||
|
||||
|
||||
@@ -406,6 +411,31 @@ class CoreFlowTests(unittest.TestCase):
|
||||
self.assertGreaterEqual(result.best_threshold, 0.5)
|
||||
self.assertIsNotNone(result.best_feasible_payload)
|
||||
|
||||
def test_binary_search_continues_below_tolerance_when_all_infeasible(self) -> None:
|
||||
seen = []
|
||||
|
||||
def evaluator(threshold):
|
||||
seen.append(threshold)
|
||||
return ThresholdProbe(
|
||||
threshold=threshold,
|
||||
feasible=False,
|
||||
payload={"threshold": threshold},
|
||||
)
|
||||
|
||||
result = binary_search_max_feasible(
|
||||
low=0.0,
|
||||
high=1.0,
|
||||
tolerance=0.1,
|
||||
max_probes=6,
|
||||
evaluator=evaluator,
|
||||
)
|
||||
self.assertIsNone(result.best_feasible_payload)
|
||||
self.assertEqual(len(result.probes), 6)
|
||||
self.assertEqual(
|
||||
seen,
|
||||
[0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625],
|
||||
)
|
||||
|
||||
def test_trace_max_requests_uses_window_wide_downsample(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
@@ -899,6 +929,35 @@ class CoreFlowTests(unittest.TestCase):
|
||||
self.assertEqual(len(replayed), 2)
|
||||
self.assertEqual(replayed[1].error, "slo_pass_rate_unrecoverable")
|
||||
|
||||
def test_latency_summary_reports_quantiles_and_slo(self) -> None:
|
||||
study = load_study_spec(_write_study_assets(Path(tempfile.mkdtemp())))
|
||||
outcomes = [
|
||||
RequestOutcome(
|
||||
request_id="r1",
|
||||
success=True,
|
||||
ttft_ms=100.0,
|
||||
tpot_ms=10.0,
|
||||
prompt_tokens=100,
|
||||
completion_tokens=10,
|
||||
),
|
||||
RequestOutcome(
|
||||
request_id="r2",
|
||||
success=True,
|
||||
ttft_ms=200.0,
|
||||
tpot_ms=20.0,
|
||||
prompt_tokens=5000,
|
||||
completion_tokens=10,
|
||||
),
|
||||
]
|
||||
evaluations = [evaluate_request(item, study.slo) for item in outcomes]
|
||||
summary = _latency_summary(outcomes=outcomes, evaluations=evaluations, study=study)
|
||||
self.assertEqual(summary["observed_request_count"], 2)
|
||||
self.assertEqual(summary["ttft_ms"]["mean"], 150.0)
|
||||
self.assertEqual(summary["ttft_ms"]["p50"], 100.0)
|
||||
self.assertEqual(summary["ttft_ms"]["p99"], 200.0)
|
||||
self.assertEqual(summary["tpot_ms"]["mean"], 15.0)
|
||||
self.assertEqual(summary["slo"]["target_pass_rate"], 0.95)
|
||||
|
||||
def test_wait_for_server_or_exit_fails_fast_when_process_exits(self) -> None:
|
||||
process = mock.Mock()
|
||||
process.poll.return_value = 17
|
||||
|
||||
Reference in New Issue
Block a user