Prioritize topology exploration in decode tuning

This commit is contained in:
2026-04-10 10:25:41 +08:00
parent d582a8ed1b
commit 9422d43737
3 changed files with 117 additions and 4 deletions

View File

@@ -135,10 +135,7 @@
"disable-log-requests": true
},
"tunable_envs": [
"VLLM_ENABLE_TORCH_COMPILE",
"VLLM_ENABLE_TBO_OPT",
"VLLM_USE_FLASHINFER_SAMPLER",
"CUDA_DEVICE_MAX_CONNECTIONS"
"VLLM_ENABLE_TORCH_COMPILE"
],
"tunable_flags": [
"tensor-parallel-size",