Add qwen235b prefill-only tuning support
This commit is contained in:
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"observation": "This is the prefill-only baseline aligned to run_qwen235b.sh. Keep the internal vLLM launch shape unchanged and replay the thinking trace with completion forced to 1 token.",
|
||||
"diagnosis": "A baseline measurement is required before proposing prefill-only TTFT improvements. Preserve all current envs and flags from run_qwen235b.sh to establish the first feasible sampling_u/request_rate point under the TTFT-only SLO.",
|
||||
"config_patch": {
|
||||
"env_patch": {},
|
||||
"flag_patch": {}
|
||||
},
|
||||
"expected_effects": [
|
||||
"Establish a launch-safe prefill-only baseline for qwen3-235b thinking traffic",
|
||||
"Seed later trials from the first feasible sampling_u if one exists"
|
||||
],
|
||||
"why_not_previous_failures": "No previous failures in this study."
|
||||
}
|
||||
Reference in New Issue
Block a user