//! Built-in hardware presets for common GPU configurations. //! //! Presets provide baseline specs for single GPUs and tensor-parallel (TP) //! groups. All values can be overridden in the YAML config by specifying //! explicit fields alongside `type`: //! //! ```yaml //! hardware: //! type: 8xb200 //! hbm_bytes: 500.0e9 # override total HBM with actual KV budget //! ``` use crate::config::HardwareConfig; /// All recognized preset names (for help/error messages). pub const AVAILABLE: &[&str] = &[ "h100", "h800", "h20", "h20-141g", "a100-80gb", "a100-40gb", "b200", "b300", "2xh100", "4xh100", "8xh100", "2xh800", "4xh800", "8xh800", "2xh20", "4xh20", "8xh20", "2xh20-141g", "4xh20-141g", "8xh20-141g", "2xb200", "4xb200", "8xb200", "2xb300", "4xb300", "8xb300", ]; /// Resolve a hardware preset by name. /// /// Case-insensitive; hyphens, underscores, and spaces are stripped before /// matching. Accepts `NxGPU` patterns (e.g. `8xb200`). pub fn resolve(name: &str) -> Option { let key = normalize(name); let (count, gpu) = parse_count_gpu(&key); match gpu.as_str() { "h100" => Some(make_config(count, &H100)), "h800" => Some(make_config(count, &H800)), "h20" => Some(make_config(count, &H20)), "h20141g" | "h20141gb" => Some(make_config(count, &H20_141G)), "a10080gb" | "a100" => Some(make_config(count, &A100_80GB)), "a10040gb" => Some(make_config(count, &A100_40GB)), "b200" => Some(make_config(count, &B200)), "b300" => Some(make_config(count, &B300)), _ => None, } } // --------------------------------------------------------------------------- // Internals // --------------------------------------------------------------------------- fn normalize(s: &str) -> String { s.to_ascii_lowercase().replace(['-', '_', ' '], "") } /// Parse `"8xh100"` → `(8, "h100")`, `"h100"` → `(1, "h100")`. fn parse_count_gpu(s: &str) -> (u32, String) { if let Some(pos) = s.find('x') { if let Ok(n) = s[..pos].parse::() { return (n, s[pos + 1..].to_string()); } } (1, s.to_string()) } // -- Per-GPU base specs (single die, BF16 dense) ----------------------------- struct GpuBase { flops: f64, // BF16 dense FLOPS fp8_flops: f64, // FP8 dense FLOPS (0 = not supported) fp4_flops: f64, // FP4 dense FLOPS (0 = not supported) mem_bw: f64, // HBM bandwidth (B/s) hbm: f64, // Total HBM (bytes) pcie_gen: u32, // PCIe generation (4/5/6) } const H100: GpuBase = GpuBase { flops: 9.89e14, // 989 TFLOPS BF16 dense fp8_flops: 1.979e15, // 1979 TFLOPS FP8 dense fp4_flops: 0.0, // not supported mem_bw: 3.35e12, // 3.35 TB/s HBM3 hbm: 80.0e9, // 80 GB pcie_gen: 5, }; const H800: GpuBase = GpuBase { flops: 9.89e14, // same die as H100 fp8_flops: 1.979e15, fp4_flops: 0.0, mem_bw: 3.35e12, // 3.35 TB/s HBM3 hbm: 80.0e9, // 80 GB pcie_gen: 5, }; const H20: GpuBase = GpuBase { flops: 1.48e14, // 148 TFLOPS BF16 (China-export Hopper) fp8_flops: 2.96e14, // 296 TFLOPS FP8 fp4_flops: 0.0, // not supported mem_bw: 4.0e12, // 4.0 TB/s HBM3 hbm: 96.0e9, // 96 GB pcie_gen: 5, }; const H20_141G: GpuBase = GpuBase { flops: 1.48e14, // modeled as the same H20 compute envelope fp8_flops: 2.96e14, // modeled as the same H20 FP8 throughput fp4_flops: 0.0, // not supported mem_bw: 4.8e12, // 141 GB HBM variant hbm: 141.0e9, // 141 GB pcie_gen: 5, }; const A100_80GB: GpuBase = GpuBase { flops: 3.12e14, // 312 TFLOPS BF16 fp8_flops: 0.0, // A100 has no FP8 tensor cores fp4_flops: 0.0, mem_bw: 2.0e12, // 2.0 TB/s HBM2e hbm: 80.0e9, // 80 GB pcie_gen: 4, }; const A100_40GB: GpuBase = GpuBase { flops: 3.12e14, // 312 TFLOPS BF16 fp8_flops: 0.0, fp4_flops: 0.0, mem_bw: 1.555e12, // 1.555 TB/s HBM2e hbm: 40.0e9, // 40 GB pcie_gen: 4, }; // DGX B200 (8 GPU) specs: BF16 18 PFLOPS, FP8 36 PFLOPS, FP4 72 PFLOPS (dense) const B200: GpuBase = GpuBase { flops: 2.25e15, // 2250 TFLOPS BF16 dense fp8_flops: 4.5e15, // 4500 TFLOPS FP8 dense fp4_flops: 9.0e15, // 9000 TFLOPS FP4 dense mem_bw: 8.0e12, // 8.0 TB/s HBM3e hbm: 192.0e9, // 192 GB pcie_gen: 6, }; // DGX B300 (8 GPU) specs: BF16 18 PFLOPS, FP8 ~54 PFLOPS, FP4 108 PFLOPS (dense) const B300: GpuBase = GpuBase { flops: 2.25e15, // 2250 TFLOPS BF16 dense (same GB202 die as B200) fp8_flops: 6.75e15, // 6750 TFLOPS FP8 dense (estimated from FP4/2) fp4_flops: 13.5e15, // 13500 TFLOPS FP4 dense (Blackwell Ultra enhanced) mem_bw: 12.0e12, // 12 TB/s HBM3e 12-Hi hbm: 288.0e9, // 288 GB HBM3e 12-Hi pcie_gen: 6, }; /// Build a [`HardwareConfig`] from a base GPU spec × TP count. /// /// Compute, HBM bandwidth, and HBM capacity scale linearly with `n`. /// PCIe bandwidth scales linearly (one link per GPU). RDMA bandwidth /// assumes one NIC for ≤4 GPUs and two NICs for ≥8. Server DRAM is a /// reasonable default based on typical deployment sizes. fn make_config(n: u32, base: &GpuBase) -> HardwareConfig { let f = n as f64; // PCIe per-GPU bandwidth and latency by generation let (pcie_per_gpu, pcie_lat) = match base.pcie_gen { 6 => (128.0e9, 4.0), // Gen6 x16 5 => (64.0e9, 5.0), // Gen5 x16 _ => (32.0e9, 5.0), // Gen4 x16 }; // RDMA: base NIC speed by PCIe gen, scaled for multi-NIC servers let (rdma_base, rdma_lat) = match base.pcie_gen { 6 => (50.0e9, 6.0), // 400 Gbps NIC _ => (25.0e9, 8.0), // 200 Gbps NIC }; let rdma_scale = if n >= 8 { 2.0 } else { 1.0 }; // Server DRAM: rough defaults by deployment size let dram = match n { 1 => 512.0e9, 2..=4 => 1.0e12, _ => 1.5e12, }; HardwareConfig { gpu_flops: base.flops * f, gpu_fp8_flops: base.fp8_flops * f, gpu_fp4_flops: base.fp4_flops * f, gpu_mem_bw: base.mem_bw * f, hbm_bytes: base.hbm * f, dram_bytes: dram, host_dram_bw: if n >= 8 { 9.0e11 } else { 5.0e11 }, pcie_bw: pcie_per_gpu * f, pcie_latency_us: pcie_lat, rdma_bw: rdma_base * rdma_scale, rdma_latency_us: rdma_lat, intra_node_tp_bw: if base.pcie_gen >= 6 { 1.8e12 * f } else { 9.0e11 * f }, intra_node_tp_latency_us: if base.pcie_gen >= 6 { 1.0 } else { 2.0 }, tp_degree: n, max_batch_slots: 256, prefill_chunk_tokens: if n >= 4 { 4096 } else { 2048 }, } } #[cfg(test)] mod tests { use super::*; #[test] fn resolve_single_gpu() { let hw = resolve("h100").unwrap(); assert!((hw.gpu_flops - 9.89e14).abs() < 1e10); assert!((hw.hbm_bytes - 80e9).abs() < 1e6); assert_eq!(hw.prefill_chunk_tokens, 2048); } #[test] fn resolve_tp_group() { let hw = resolve("8xb200").unwrap(); assert!((hw.gpu_flops - 2.25e15 * 8.0).abs() < 1e11); assert!((hw.hbm_bytes - 192e9 * 8.0).abs() < 1e6); assert!((hw.pcie_bw - 128e9 * 8.0).abs() < 1e6); assert_eq!(hw.prefill_chunk_tokens, 4096); } #[test] fn resolve_case_and_separator_insensitive() { assert!(resolve("H100").is_some()); assert!(resolve("8xB200").is_some()); assert!(resolve("8x-B200").is_some()); assert!(resolve("8xH20-141G").is_some()); assert!(resolve("a100-80gb").is_some()); assert!(resolve("A100_80GB").is_some()); assert!(resolve("a100_80gb").is_some()); } #[test] fn resolve_unknown_returns_none() { assert!(resolve("v100").is_none()); assert!(resolve("tpu-v5").is_none()); assert!(resolve("").is_none()); } #[test] fn a100_variants() { let a80 = resolve("a100-80gb").unwrap(); let a40 = resolve("a100-40gb").unwrap(); assert!((a80.hbm_bytes - 80e9).abs() < 1e6); assert!((a40.hbm_bytes - 40e9).abs() < 1e6); assert!(a80.gpu_mem_bw > a40.gpu_mem_bw); } #[test] fn scaling_is_linear() { let s1 = resolve("h100").unwrap(); let s4 = resolve("4xh100").unwrap(); let s8 = resolve("8xh100").unwrap(); assert!((s4.gpu_flops - s1.gpu_flops * 4.0).abs() < 1.0); assert!((s8.gpu_flops - s1.gpu_flops * 8.0).abs() < 1.0); assert!((s4.gpu_mem_bw - s1.gpu_mem_bw * 4.0).abs() < 1.0); assert!((s8.hbm_bytes - s1.hbm_bytes * 8.0).abs() < 1.0); } #[test] fn h20_141g_variant_has_larger_hbm() { let h20 = resolve("8xh20").unwrap(); let h20_141g = resolve("8xh20-141g").unwrap(); assert!((h20_141g.gpu_flops - h20.gpu_flops).abs() < 1.0); assert!(h20_141g.hbm_bytes > h20.hbm_bytes); assert!(h20_141g.gpu_mem_bw > h20.gpu_mem_bw); } }