feat: new router and benchmark setup
This commit is contained in:
@@ -17,6 +17,7 @@ pub const AVAILABLE: &[&str] = &[
|
||||
"h100",
|
||||
"h800",
|
||||
"h20",
|
||||
"h20-141g",
|
||||
"a100-80gb",
|
||||
"a100-40gb",
|
||||
"b200",
|
||||
@@ -30,6 +31,9 @@ pub const AVAILABLE: &[&str] = &[
|
||||
"2xh20",
|
||||
"4xh20",
|
||||
"8xh20",
|
||||
"2xh20-141g",
|
||||
"4xh20-141g",
|
||||
"8xh20-141g",
|
||||
"2xb200",
|
||||
"4xb200",
|
||||
"8xb200",
|
||||
@@ -49,6 +53,7 @@ pub fn resolve(name: &str) -> Option<HardwareConfig> {
|
||||
"h100" => Some(make_config(count, &H100)),
|
||||
"h800" => Some(make_config(count, &H800)),
|
||||
"h20" => Some(make_config(count, &H20)),
|
||||
"h20141g" | "h20141gb" => Some(make_config(count, &H20_141G)),
|
||||
"a10080gb" | "a100" => Some(make_config(count, &A100_80GB)),
|
||||
"a10040gb" => Some(make_config(count, &A100_40GB)),
|
||||
"b200" => Some(make_config(count, &B200)),
|
||||
@@ -113,6 +118,15 @@ const H20: GpuBase = GpuBase {
|
||||
pcie_gen: 5,
|
||||
};
|
||||
|
||||
const H20_141G: GpuBase = GpuBase {
|
||||
flops: 1.48e14, // modeled as the same H20 compute envelope
|
||||
fp8_flops: 2.96e14, // modeled as the same H20 FP8 throughput
|
||||
fp4_flops: 0.0, // not supported
|
||||
mem_bw: 4.8e12, // 141 GB HBM variant
|
||||
hbm: 141.0e9, // 141 GB
|
||||
pcie_gen: 5,
|
||||
};
|
||||
|
||||
const A100_80GB: GpuBase = GpuBase {
|
||||
flops: 3.12e14, // 312 TFLOPS BF16
|
||||
fp8_flops: 0.0, // A100 has no FP8 tensor cores
|
||||
@@ -193,7 +207,11 @@ fn make_config(n: u32, base: &GpuBase) -> HardwareConfig {
|
||||
pcie_latency_us: pcie_lat,
|
||||
rdma_bw: rdma_base * rdma_scale,
|
||||
rdma_latency_us: rdma_lat,
|
||||
intra_node_tp_bw: if base.pcie_gen >= 6 { 1.8e12 * f } else { 9.0e11 * f },
|
||||
intra_node_tp_bw: if base.pcie_gen >= 6 {
|
||||
1.8e12 * f
|
||||
} else {
|
||||
9.0e11 * f
|
||||
},
|
||||
intra_node_tp_latency_us: if base.pcie_gen >= 6 { 1.0 } else { 2.0 },
|
||||
tp_degree: n,
|
||||
max_batch_slots: 256,
|
||||
@@ -227,6 +245,7 @@ mod tests {
|
||||
assert!(resolve("H100").is_some());
|
||||
assert!(resolve("8xB200").is_some());
|
||||
assert!(resolve("8x-B200").is_some());
|
||||
assert!(resolve("8xH20-141G").is_some());
|
||||
assert!(resolve("a100-80gb").is_some());
|
||||
assert!(resolve("A100_80GB").is_some());
|
||||
assert!(resolve("a100_80gb").is_some());
|
||||
@@ -258,4 +277,13 @@ mod tests {
|
||||
assert!((s4.gpu_mem_bw - s1.gpu_mem_bw * 4.0).abs() < 1.0);
|
||||
assert!((s8.hbm_bytes - s1.hbm_bytes * 8.0).abs() < 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn h20_141g_variant_has_larger_hbm() {
|
||||
let h20 = resolve("8xh20").unwrap();
|
||||
let h20_141g = resolve("8xh20-141g").unwrap();
|
||||
assert!((h20_141g.gpu_flops - h20.gpu_flops).abs() < 1.0);
|
||||
assert!(h20_141g.hbm_bytes > h20.hbm_bytes);
|
||||
assert!(h20_141g.gpu_mem_bw > h20.gpu_mem_bw);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user