Add topology-aware tuning constraints
This commit is contained in:
@@ -141,11 +141,24 @@
|
||||
"CUDA_DEVICE_MAX_CONNECTIONS"
|
||||
],
|
||||
"tunable_flags": [
|
||||
"tensor-parallel-size",
|
||||
"data-parallel-size",
|
||||
"expert-parallel-size",
|
||||
"gpu-memory-utilization",
|
||||
"max-num-batched-tokens",
|
||||
"max-num-seqs",
|
||||
"block-size"
|
||||
],
|
||||
"topology_constraints": {
|
||||
"require_tp_dp_product_equals_gpu_count": true,
|
||||
"require_ep_size_leq_tp_dp_product": true,
|
||||
"require_ep_size_divides_tp_dp_product": true,
|
||||
"require_enable_expert_parallel_when_ep_gt_one": true,
|
||||
"validate_cuda_graph_sizes_divisible_by_tp_when_tp_ep_reduce_scatter": true,
|
||||
"allowed_tensor_parallel_sizes": [1, 2, 4, 8],
|
||||
"allowed_data_parallel_sizes": [1, 2, 4, 8],
|
||||
"allowed_expert_parallel_sizes": [1, 2, 4, 8]
|
||||
},
|
||||
"python_executable": "python3"
|
||||
},
|
||||
"trace": {
|
||||
|
||||
Reference in New Issue
Block a user