feat: new router and benchmark setup
This commit is contained in:
@@ -7,7 +7,8 @@
|
||||
model:
|
||||
config_json: ../models/GLM-5-NVFP4/config.json
|
||||
name: glm-5-nvfp4
|
||||
compute_dtype: fp4 # FP4 weights → selects FP4 tensor core FLOPS
|
||||
compute_dtype: fp8 # FP8 tensor-core execution
|
||||
weight_dtype: fp4 # NVFP4 weights still set the HBM budget
|
||||
dtype_bytes: 1 # FP8 KV cache
|
||||
block_size_tokens: 512
|
||||
|
||||
|
||||
Reference in New Issue
Block a user