feat: update ttft modeling and add cache affinity

This commit is contained in:
2026-04-15 19:08:10 +08:00
parent ff316c6873
commit a3f386c858
15 changed files with 1276 additions and 99 deletions

View File

@@ -188,10 +188,14 @@ fn make_config(n: u32, base: &GpuBase) -> HardwareConfig {
gpu_mem_bw: base.mem_bw * f,
hbm_bytes: base.hbm * f,
dram_bytes: dram,
host_dram_bw: if n >= 8 { 9.0e11 } else { 5.0e11 },
pcie_bw: pcie_per_gpu * f,
pcie_latency_us: pcie_lat,
rdma_bw: rdma_base * rdma_scale,
rdma_latency_us: rdma_lat,
intra_node_tp_bw: if base.pcie_gen >= 6 { 1.8e12 * f } else { 9.0e11 * f },
intra_node_tp_latency_us: if base.pcie_gen >= 6 { 1.0 } else { 2.0 },
tp_degree: n,
max_batch_slots: 256,
prefill_chunk_tokens: if n >= 4 { 4096 } else { 2048 },
}