From c9089cf4f0ae12b2f91577ee73d244c72c5e1f37 Mon Sep 17 00:00:00 2001
From: Gahow Wang <gahow.wang@gmail.com>
Date: Tue, 28 Apr 2026 06:58:38 +0800
Subject: [PATCH] Ignore non-SLO probe bookkeeping in bottleneck diagnosis

---
 docs/qwen235b-thinking-decode/harness-20260428.md | 2 ++
 src/aituner/harness.py                            | 2 ++
 tests/test_core_flow.py                           | 5 ++++-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/docs/qwen235b-thinking-decode/harness-20260428.md b/docs/qwen235b-thinking-decode/harness-20260428.md
index b3abb77..192e960 100644
--- a/docs/qwen235b-thinking-decode/harness-20260428.md
+++ b/docs/qwen235b-thinking-decode/harness-20260428.md
@@ -50,3 +50,5 @@ The active run is now seeded from the real run5 baseline and continues from `tri
 ## Follow-up Fix
 
 The seeded prompt exposed a generic diagnosis issue: if the best feasible probe had no latency failures, the harness could miss the prior infeasible probe that showed the real bottleneck at higher load. The harness now scans the probe sequence backward and uses the nearest non-trivial bottleneck before falling back to the best feasible probe. This keeps decode-only runs focused on `decode_tpot` after a feasible low-load point, without adding testcase thresholds.
+
+A second generic diagnosis bug was fixed: non-SLO bookkeeping counts such as `probe_elapsed_s>...` no longer collapse to `ttft_prefill` when TTFT/TPOT/request failure counts are all zero.
diff --git a/src/aituner/harness.py b/src/aituner/harness.py
index fa829de..8f9b10d 100644
--- a/src/aituner/harness.py
+++ b/src/aituner/harness.py
@@ -341,6 +341,8 @@ def _active_bottleneck(probe: dict[str, Any] | None) -> str:
         and not str(k).startswith("tpot")
         and not str(k).startswith("probe_elapsed_s>")
     )
+    if ttft_count == 0 and tpot_count == 0 and request_failed_count == 0:
+        return "none_obvious"
     if ttft_count >= max(tpot_count, request_failed_count):
         return "ttft_prefill"
     if tpot_count >= max(ttft_count, request_failed_count):
diff --git a/tests/test_core_flow.py b/tests/test_core_flow.py
index ef83fc5..68fd1bc 100644
--- a/tests/test_core_flow.py
+++ b/tests/test_core_flow.py
@@ -576,7 +576,10 @@ class CoreFlowTests(unittest.TestCase):
                                 "payload": {
                                     "request_rate": 1.0,
                                     "pass_rate": 1.0,
-                                    "latency_summary": {"failed_reason_counts": {}},
+                                    "early_stop_reason": "probe_elapsed_s>1200.0",
+                                    "latency_summary": {
+                                        "failed_reason_counts": {"probe_elapsed_s>1200.0": 1}
+                                    },
                                 },
                             },
                         ],