Switch 27B TP A/B to length-aware TTFT SLO (4s + L_in/8k), widen search

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-15 20:35:23 +08:00
parent ed2bbe0323
commit 7678c7d5e8
1 changed files with 4 additions and 15 deletions
--- a/configs/examples/dash0_qwen27b_tp_ab.json
+++ b/configs/examples/dash0_qwen27b_tp_ab.json
@@ -145,20 +145,9 @@
  "slo": {
    "target_pass_rate": 0.95,
    "ttft_rule": {
-      "kind": "step_ms",
-      "buckets": [
-        {
-          "max_input_tokens": 4096,
-          "threshold_ms": 2000
-        },
-        {
-          "max_input_tokens": 32768,
-          "threshold_ms": 4000
-        },
-        {
-          "threshold_ms": 6000
-        }
-      ]
+      "kind": "linear_ms",
+      "intercept_ms": 4000,
+      "per_token_ms": 0.125
    },
    "tpot_rule": {
      "kind": "fixed_ms",
@@ -167,7 +156,7 @@
  },
  "search": {
    "low": 0.0,
-    "high": 0.25,
+    "high": 0.5,
    "tolerance": 0.001,
    "max_probes": 7,
    "sample_seed": 20260325