fix: kvcache evict workflow

This commit is contained in:
2026-04-14 15:46:36 +08:00
parent 663ca9c5b9
commit eaf574cd4e
4 changed files with 257 additions and 59 deletions

View File

@@ -14,11 +14,12 @@ model:
hardware:
type: 8xb300
hbm_bytes: 1900.0e9 # KV budget after FP4 weights (~372 GB)
dram_bytes: 1.5e12 # ~1.5 TB usable CPU DRAM per node
cluster:
num_instances: 32
num_instances: 8
meta_store:
ttl_seconds: 120.0
ttl_seconds: 300.0
router:
mode: prefix_affinity
prefix_k: 8