Fix topology-aware incumbents for qwen27b tuning

2026-04-11 00:32:41 +08:00
parent 06d4c380b3
commit a4d54442db
5 changed files with 282 additions and 22 deletions
--- a/configs/examples/dash0_qwen27b_tight_slo_run4_0_8k.json
+++ b/configs/examples/dash0_qwen27b_tight_slo_run4_0_8k.json
@@ -1,7 +1,7 @@
 {
  "study_id": "dash0-qwen27b-tight-slo-10min-run4-chat-0-8k",
  "hardware": {
-    "gpu_count": 4,
+    "gpu_count": 8,
    "gpu_model": "H20",
    "host_candidates": [
      "dash0"
@@ -26,7 +26,6 @@
      "/home/admin/resource/model/464482ce/qwen3.5-27b/256k-0223-internal"
    ],
    "base_envs": {
-      "CUDA_VISIBLE_DEVICES": "4,5,6,7",
      "VLLM_DISABLE_COMPILE_CACHE": "1",
      "DS_LLM_IGNORE_WARMUP": "1",
      "DS_LLM_IGNORE_CHECK_WARMUP": "1",
@@ -73,10 +72,7 @@
      "mamba-cache-dtype": "float32",
      "skip-mm-profiling": true,
      "quantization": "fp8",
-      "tensor-parallel-size": 4,
-      "data-parallel-size": 1,
-      "expert-parallel-size": 1,
-      "max-num-seqs": 16,
+      "tensor-parallel-size": 1,
      "disable-log-requests": true
    },
    "tunable_envs": [
@@ -100,7 +96,7 @@
      "require_enable_expert_parallel_when_ep_gt_one": true,
      "validate_cuda_graph_sizes_divisible_by_tp_when_tp_ep_reduce_scatter": true,
      "allowed_tp_dp_products": [1, 2, 4, 8],
-      "allowed_tensor_parallel_sizes": [1, 2, 4],
+      "allowed_tensor_parallel_sizes": [1, 2, 4, 8],
      "allowed_data_parallel_sizes": [1, 2, 4, 8],
      "allowed_expert_parallel_sizes": [1]
    },