Switch 27B TP A/B to length-aware TTFT SLO (4s + L_in/8k), widen search
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -145,20 +145,9 @@
|
||||
"slo": {
|
||||
"target_pass_rate": 0.95,
|
||||
"ttft_rule": {
|
||||
"kind": "step_ms",
|
||||
"buckets": [
|
||||
{
|
||||
"max_input_tokens": 4096,
|
||||
"threshold_ms": 2000
|
||||
},
|
||||
{
|
||||
"max_input_tokens": 32768,
|
||||
"threshold_ms": 4000
|
||||
},
|
||||
{
|
||||
"threshold_ms": 6000
|
||||
}
|
||||
]
|
||||
"kind": "linear_ms",
|
||||
"intercept_ms": 4000,
|
||||
"per_token_ms": 0.125
|
||||
},
|
||||
"tpot_rule": {
|
||||
"kind": "fixed_ms",
|
||||
@@ -167,7 +156,7 @@
|
||||
},
|
||||
"search": {
|
||||
"low": 0.0,
|
||||
"high": 0.25,
|
||||
"high": 0.5,
|
||||
"tolerance": 0.001,
|
||||
"max_probes": 7,
|
||||
"sample_seed": 20260325
|
||||
|
||||
Reference in New Issue
Block a user