Fix topology-aware incumbents for qwen27b tuning
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"study_id": "dash0-qwen27b-tight-slo-10min-run4-chat-0-8k",
|
||||
"hardware": {
|
||||
"gpu_count": 4,
|
||||
"gpu_count": 8,
|
||||
"gpu_model": "H20",
|
||||
"host_candidates": [
|
||||
"dash0"
|
||||
@@ -26,7 +26,6 @@
|
||||
"/home/admin/resource/model/464482ce/qwen3.5-27b/256k-0223-internal"
|
||||
],
|
||||
"base_envs": {
|
||||
"CUDA_VISIBLE_DEVICES": "4,5,6,7",
|
||||
"VLLM_DISABLE_COMPILE_CACHE": "1",
|
||||
"DS_LLM_IGNORE_WARMUP": "1",
|
||||
"DS_LLM_IGNORE_CHECK_WARMUP": "1",
|
||||
@@ -73,10 +72,7 @@
|
||||
"mamba-cache-dtype": "float32",
|
||||
"skip-mm-profiling": true,
|
||||
"quantization": "fp8",
|
||||
"tensor-parallel-size": 4,
|
||||
"data-parallel-size": 1,
|
||||
"expert-parallel-size": 1,
|
||||
"max-num-seqs": 16,
|
||||
"tensor-parallel-size": 1,
|
||||
"disable-log-requests": true
|
||||
},
|
||||
"tunable_envs": [
|
||||
@@ -100,7 +96,7 @@
|
||||
"require_enable_expert_parallel_when_ep_gt_one": true,
|
||||
"validate_cuda_graph_sizes_divisible_by_tp_when_tp_ep_reduce_scatter": true,
|
||||
"allowed_tp_dp_products": [1, 2, 4, 8],
|
||||
"allowed_tensor_parallel_sizes": [1, 2, 4],
|
||||
"allowed_tensor_parallel_sizes": [1, 2, 4, 8],
|
||||
"allowed_data_parallel_sizes": [1, 2, 4, 8],
|
||||
"allowed_expert_parallel_sizes": [1]
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user