Add qwen235b prefill-only tuning support

This commit is contained in:
2026-04-11 21:00:02 +08:00
parent 5e54e9c8f5
commit 3f20ddf87e
6 changed files with 280 additions and 1 deletions

View File

@@ -0,0 +1,13 @@
{
"observation": "This is the prefill-only baseline aligned to run_qwen235b.sh. Keep the internal vLLM launch shape unchanged and replay the thinking trace with completion forced to 1 token.",
"diagnosis": "A baseline measurement is required before proposing prefill-only TTFT improvements. Preserve all current envs and flags from run_qwen235b.sh to establish the first feasible sampling_u/request_rate point under the TTFT-only SLO.",
"config_patch": {
"env_patch": {},
"flag_patch": {}
},
"expected_effects": [
"Establish a launch-safe prefill-only baseline for qwen3-235b thinking traffic",
"Seed later trials from the first feasible sampling_u if one exists"
],
"why_not_previous_failures": "No previous failures in this study."
}