feat: new router and benchmark setup

This commit is contained in:
2026-04-16 14:23:53 +08:00
parent c86d931d8f
commit 996511f300
35 changed files with 1480 additions and 76 deletions

View File

@@ -17,6 +17,7 @@ pub const AVAILABLE: &[&str] = &[
"h100",
"h800",
"h20",
"h20-141g",
"a100-80gb",
"a100-40gb",
"b200",
@@ -30,6 +31,9 @@ pub const AVAILABLE: &[&str] = &[
"2xh20",
"4xh20",
"8xh20",
"2xh20-141g",
"4xh20-141g",
"8xh20-141g",
"2xb200",
"4xb200",
"8xb200",
@@ -49,6 +53,7 @@ pub fn resolve(name: &str) -> Option<HardwareConfig> {
"h100" => Some(make_config(count, &H100)),
"h800" => Some(make_config(count, &H800)),
"h20" => Some(make_config(count, &H20)),
"h20141g" | "h20141gb" => Some(make_config(count, &H20_141G)),
"a10080gb" | "a100" => Some(make_config(count, &A100_80GB)),
"a10040gb" => Some(make_config(count, &A100_40GB)),
"b200" => Some(make_config(count, &B200)),
@@ -113,6 +118,15 @@ const H20: GpuBase = GpuBase {
pcie_gen: 5,
};
const H20_141G: GpuBase = GpuBase {
flops: 1.48e14, // modeled as the same H20 compute envelope
fp8_flops: 2.96e14, // modeled as the same H20 FP8 throughput
fp4_flops: 0.0, // not supported
mem_bw: 4.8e12, // 141 GB HBM variant
hbm: 141.0e9, // 141 GB
pcie_gen: 5,
};
const A100_80GB: GpuBase = GpuBase {
flops: 3.12e14, // 312 TFLOPS BF16
fp8_flops: 0.0, // A100 has no FP8 tensor cores
@@ -193,7 +207,11 @@ fn make_config(n: u32, base: &GpuBase) -> HardwareConfig {
pcie_latency_us: pcie_lat,
rdma_bw: rdma_base * rdma_scale,
rdma_latency_us: rdma_lat,
intra_node_tp_bw: if base.pcie_gen >= 6 { 1.8e12 * f } else { 9.0e11 * f },
intra_node_tp_bw: if base.pcie_gen >= 6 {
1.8e12 * f
} else {
9.0e11 * f
},
intra_node_tp_latency_us: if base.pcie_gen >= 6 { 1.0 } else { 2.0 },
tp_degree: n,
max_batch_slots: 256,
@@ -227,6 +245,7 @@ mod tests {
assert!(resolve("H100").is_some());
assert!(resolve("8xB200").is_some());
assert!(resolve("8x-B200").is_some());
assert!(resolve("8xH20-141G").is_some());
assert!(resolve("a100-80gb").is_some());
assert!(resolve("A100_80GB").is_some());
assert!(resolve("a100_80gb").is_some());
@@ -258,4 +277,13 @@ mod tests {
assert!((s4.gpu_mem_bw - s1.gpu_mem_bw * 4.0).abs() < 1.0);
assert!((s8.hbm_bytes - s1.hbm_bytes * 8.0).abs() < 1.0);
}
#[test]
fn h20_141g_variant_has_larger_hbm() {
let h20 = resolve("8xh20").unwrap();
let h20_141g = resolve("8xh20-141g").unwrap();
assert!((h20_141g.gpu_flops - h20.gpu_flops).abs() < 1.0);
assert!(h20_141g.hbm_bytes > h20.hbm_bytes);
assert!(h20_141g.gpu_mem_bw > h20.gpu_mem_bw);
}
}