Files
kvcache-simulator/configs/glm5-nvfp4-fp8compute-8xb300-128k-plus-n1.yaml

34 lines
771 B
YAML

# GLM-5-NVFP4 on 8 x B300 for the 128k++ bucket.
# NVFP4 weights, FP8 compute. Routing is effectively irrelevant at one instance.
model:
config_json: ../models/GLM-5-NVFP4/config.json
name: glm-5-nvfp4
compute_dtype: fp8
weight_dtype: fp4
dtype_bytes: 1
block_size_tokens: 512
hardware:
type: 8xb300
hbm_bytes: 1900.0e9
dram_bytes: 1.5e12
max_batch_slots: 256
cluster:
num_instances: 1
meta_store:
ttl_seconds: 300.0
router:
mode: cache_affinity
precise_probe_topk: 1
sim:
trace_path: bailian-traces/glm_coder_blksz_512_040915-040917.jsonl
max_requests: null
output_dir: runs/glm5_nvfp4_fp8compute_8xb300_ablation_131073_plus_n1
sample_interval_s: 1.0
seed: 42
input_length_min: 131073
input_length_max: 4294967295