Prioritize topology exploration in decode tuning

2026-04-10 10:25:41 +08:00
parent d582a8ed1b
commit 9422d43737
3 changed files with 117 additions and 4 deletions
--- a/configs/examples/dash0_qwen235b_decode_thinking_run2_tpot40.json
+++ b/configs/examples/dash0_qwen235b_decode_thinking_run2_tpot40.json
@@ -135,10 +135,7 @@
      "disable-log-requests": true
    },
    "tunable_envs": [
-      "VLLM_ENABLE_TORCH_COMPILE",
-      "VLLM_ENABLE_TBO_OPT",
-      "VLLM_USE_FLASHINFER_SAMPLER",
-      "CUDA_DEVICE_MAX_CONNECTIONS"
+      "VLLM_ENABLE_TORCH_COMPILE"
    ],
    "tunable_flags": [
      "tensor-parallel-size",