Ignore non-SLO probe bookkeeping in bottleneck diagnosis
This commit is contained in:
@@ -50,3 +50,5 @@ The active run is now seeded from the real run5 baseline and continues from `tri
|
||||
## Follow-up Fix
|
||||
|
||||
The seeded prompt exposed a generic diagnosis issue: if the best feasible probe had no latency failures, the harness could miss the prior infeasible probe that showed the real bottleneck at higher load. The harness now scans the probe sequence backward and uses the nearest non-trivial bottleneck before falling back to the best feasible probe. This keeps decode-only runs focused on `decode_tpot` after a feasible low-load point, without adding testcase thresholds.
|
||||
|
||||
A second generic diagnosis bug was fixed: non-SLO bookkeeping counts such as `probe_elapsed_s>...` no longer collapse to `ttft_prefill` when TTFT/TPOT/request failure counts are all zero.
|
||||
|
||||
@@ -341,6 +341,8 @@ def _active_bottleneck(probe: dict[str, Any] | None) -> str:
|
||||
and not str(k).startswith("tpot")
|
||||
and not str(k).startswith("probe_elapsed_s>")
|
||||
)
|
||||
if ttft_count == 0 and tpot_count == 0 and request_failed_count == 0:
|
||||
return "none_obvious"
|
||||
if ttft_count >= max(tpot_count, request_failed_count):
|
||||
return "ttft_prefill"
|
||||
if tpot_count >= max(ttft_count, request_failed_count):
|
||||
|
||||
@@ -576,7 +576,10 @@ class CoreFlowTests(unittest.TestCase):
|
||||
"payload": {
|
||||
"request_rate": 1.0,
|
||||
"pass_rate": 1.0,
|
||||
"latency_summary": {"failed_reason_counts": {}},
|
||||
"early_stop_reason": "probe_elapsed_s>1200.0",
|
||||
"latency_summary": {
|
||||
"failed_reason_counts": {"probe_elapsed_s>1200.0": 1}
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
|
||||
Reference in New Issue
Block a user