Files
kvcache-simulator/configs/qwen3-coder-480b-8xh20.yaml

31 lines
804 B
YAML

# Qwen3-Coder-480B-A35B (MoE, GQA) on 8 x H20 (96GB each).
# Architecture auto-loaded from HuggingFace config.json.
model:
config_json: ../models/Qwen3-Coder-480B-A35B-Instruct-FP8/config.json
name: qwen3-coder-480b
dtype_bytes: 1 # FP8 inference
block_size_tokens: 512
hardware:
type: 8xh20
hbm_bytes: 400.0e9 # KV budget after FP8 weights on 8x96GB
dram_bytes: 1.0e12 # ~1.0 TB usable CPU DRAM per node
cluster:
num_instances: 128
meta_store:
ttl_seconds: 300.0
router:
mode: min_pd
precise_probe_latency_us: 50.0
precise_probe_topk: 4
load_alpha: 1.0
sim:
trace_path: bailian-traces/qwen3_coder_blksz_512_040915-040917.jsonl
max_requests: null
output_dir: runs/qwen3_coder_8xh20
sample_interval_s: 1.0
seed: 42