kvcache-simulator/configs/qwen3-coder-480b-8xh20.yaml

# Qwen3-Coder-480B-A35B (MoE, GQA) on 8 x H20 (96GB each).
# Architecture auto-loaded from HuggingFace config.json.

model:
  config_json: ../models/Qwen3-Coder-480B-A35B-Instruct-FP8/config.json
  name: qwen3-coder-480b
  dtype_bytes: 1               # FP8 inference
  block_size_tokens: 512

hardware:
  type: 8xh20
  hbm_bytes: 400.0e9           # KV budget after FP8 weights on 8x96GB
  dram_bytes: 1.0e12             # ~1.0 TB usable CPU DRAM per node

cluster:
  num_instances: 128
  meta_store:
    ttl_seconds: 300.0
  router:
    mode: min_pd
    precise_probe_latency_us: 50.0
    precise_probe_topk: 4
    load_alpha: 1.0

sim:
  trace_path: bailian-traces/qwen3_coder_blksz_512_040915-040917.jsonl
  max_requests: null
  output_dir: runs/qwen3_coder_8xh20
  sample_interval_s: 1.0
  seed: 42