feat: update ttft modeling and add cache affinity
This commit is contained in:
@@ -188,10 +188,14 @@ fn make_config(n: u32, base: &GpuBase) -> HardwareConfig {
|
||||
gpu_mem_bw: base.mem_bw * f,
|
||||
hbm_bytes: base.hbm * f,
|
||||
dram_bytes: dram,
|
||||
host_dram_bw: if n >= 8 { 9.0e11 } else { 5.0e11 },
|
||||
pcie_bw: pcie_per_gpu * f,
|
||||
pcie_latency_us: pcie_lat,
|
||||
rdma_bw: rdma_base * rdma_scale,
|
||||
rdma_latency_us: rdma_lat,
|
||||
intra_node_tp_bw: if base.pcie_gen >= 6 { 1.8e12 * f } else { 9.0e11 * f },
|
||||
intra_node_tp_latency_us: if base.pcie_gen >= 6 { 1.0 } else { 2.0 },
|
||||
tp_degree: n,
|
||||
max_batch_slots: 256,
|
||||
prefill_chunk_tokens: if n >= 4 { 4096 } else { 2048 },
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user