Add topology-aware tuning constraints

This commit is contained in:
2026-04-09 21:07:51 +08:00
parent 7371d6635c
commit ef78fe7eb5
6 changed files with 506 additions and 2 deletions

View File

@@ -141,11 +141,24 @@
"CUDA_DEVICE_MAX_CONNECTIONS"
],
"tunable_flags": [
"tensor-parallel-size",
"data-parallel-size",
"expert-parallel-size",
"gpu-memory-utilization",
"max-num-batched-tokens",
"max-num-seqs",
"block-size"
],
"topology_constraints": {
"require_tp_dp_product_equals_gpu_count": true,
"require_ep_size_leq_tp_dp_product": true,
"require_ep_size_divides_tp_dp_product": true,
"require_enable_expert_parallel_when_ep_gt_one": true,
"validate_cuda_graph_sizes_divisible_by_tp_when_tp_ep_reduce_scatter": true,
"allowed_tensor_parallel_sizes": [1, 2, 4, 8],
"allowed_data_parallel_sizes": [1, 2, 4, 8],
"allowed_expert_parallel_sizes": [1, 2, 4, 8]
},
"python_executable": "python3"
},
"trace": {