Switch 27B TP A/B to length-aware TTFT SLO (4s + L_in/8k), widen search

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-15 20:35:23 +08:00
parent ed2bbe0323
commit 7678c7d5e8

View File

@@ -145,20 +145,9 @@
"slo": {
"target_pass_rate": 0.95,
"ttft_rule": {
"kind": "step_ms",
"buckets": [
{
"max_input_tokens": 4096,
"threshold_ms": 2000
},
{
"max_input_tokens": 32768,
"threshold_ms": 4000
},
{
"threshold_ms": 6000
}
]
"kind": "linear_ms",
"intercept_ms": 4000,
"per_token_ms": 0.125
},
"tpot_rule": {
"kind": "fixed_ms",
@@ -167,7 +156,7 @@
},
"search": {
"low": 0.0,
"high": 0.25,
"high": 0.5,
"tolerance": 0.001,
"max_probes": 7,
"sample_seed": 20260325