Report latency stats for infeasible baseline
This commit is contained in:
@@ -3040,6 +3040,22 @@ class CoreFlowTests(unittest.TestCase):
|
||||
"request_rate": 1.0,
|
||||
"pass_rate": 0.5,
|
||||
"early_stop_reason": "slo_pass_rate_unrecoverable",
|
||||
"latency_summary": {
|
||||
"ttft_ms": {
|
||||
"count": 2,
|
||||
"mean": 1200.0,
|
||||
"p50": 1100.0,
|
||||
"p95": 1900.0,
|
||||
"p99": 1980.0,
|
||||
},
|
||||
"tpot_ms": {
|
||||
"count": 2,
|
||||
"mean": 35.0,
|
||||
"p50": 32.0,
|
||||
"p95": 48.0,
|
||||
"p99": 49.0,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
(trial_root / "result.json").write_text(json.dumps(result), encoding="utf-8")
|
||||
@@ -3068,6 +3084,15 @@ class CoreFlowTests(unittest.TestCase):
|
||||
self.assertEqual(len(state.trials), 1)
|
||||
self.assertEqual(state.tuning_stop_reason, "baseline_all_infeasible")
|
||||
self.assertIn("lowest_sampled_request_rate=1", state.tuning_stop_diagnosis)
|
||||
self.assertIn("lowest_probe_ttft_ms", state.tuning_stop_diagnosis)
|
||||
self.assertEqual(
|
||||
state.tuning_stop_details["lowest_probe_latency_ms"]["ttft"]["p95"],
|
||||
1900.0,
|
||||
)
|
||||
self.assertEqual(
|
||||
state.tuning_stop_details["lowest_probe_latency_ms"]["tpot"]["p99"],
|
||||
49.0,
|
||||
)
|
||||
|
||||
with mock.patch("aituner.cli.run_trial") as run_trial_mock:
|
||||
with mock.patch("aituner.cli.call_llm_for_proposal") as llm_mock:
|
||||
|
||||
Reference in New Issue
Block a user