feat: new router and benchmark setup

This commit is contained in:
2026-04-16 14:23:53 +08:00
parent c86d931d8f
commit 996511f300
35 changed files with 1480 additions and 76 deletions

View File

@@ -7,7 +7,8 @@
model:
config_json: ../models/GLM-5-NVFP4/config.json
name: glm-5-nvfp4
compute_dtype: fp4 # FP4 weights → selects FP4 tensor core FLOPS
compute_dtype: fp8 # FP8 tensor-core execution
weight_dtype: fp4 # NVFP4 weights still set the HBM budget
dtype_bytes: 1 # FP8 KV cache
block_size_tokens: 512