Fix topology-aware incumbents for qwen27b tuning

This commit is contained in:
2026-04-11 00:32:41 +08:00
parent 06d4c380b3
commit a4d54442db
5 changed files with 282 additions and 22 deletions

View File

@@ -1,7 +1,7 @@
{
"study_id": "dash0-qwen27b-tight-slo-10min-run4-chat-0-8k",
"hardware": {
"gpu_count": 4,
"gpu_count": 8,
"gpu_model": "H20",
"host_candidates": [
"dash0"
@@ -26,7 +26,6 @@
"/home/admin/resource/model/464482ce/qwen3.5-27b/256k-0223-internal"
],
"base_envs": {
"CUDA_VISIBLE_DEVICES": "4,5,6,7",
"VLLM_DISABLE_COMPILE_CACHE": "1",
"DS_LLM_IGNORE_WARMUP": "1",
"DS_LLM_IGNORE_CHECK_WARMUP": "1",
@@ -73,10 +72,7 @@
"mamba-cache-dtype": "float32",
"skip-mm-profiling": true,
"quantization": "fp8",
"tensor-parallel-size": 4,
"data-parallel-size": 1,
"expert-parallel-size": 1,
"max-num-seqs": 16,
"tensor-parallel-size": 1,
"disable-log-requests": true
},
"tunable_envs": [
@@ -100,7 +96,7 @@
"require_enable_expert_parallel_when_ep_gt_one": true,
"validate_cuda_graph_sizes_divisible_by_tp_when_tp_ep_reduce_scatter": true,
"allowed_tp_dp_products": [1, 2, 4, 8],
"allowed_tensor_parallel_sizes": [1, 2, 4],
"allowed_tensor_parallel_sizes": [1, 2, 4, 8],
"allowed_data_parallel_sizes": [1, 2, 4, 8],
"allowed_expert_parallel_sizes": [1]
},