# Qwen2.5-Coder-7B using hardware preset. # # Model architecture is specified inline (no config.json needed for simple # models). Hardware uses preset "h800" with a single override for hbm_bytes. model: name: qwen2.5-coder-7b num_layers: 28 hidden_size: 3584 num_attention_heads: 28 num_kv_heads: 4 head_dim: 128 intermediate_size: 18944 dtype_bytes: 2 block_size_tokens: 16 hardware: type: h800 # single H800 SXM (80GB) hbm_bytes: 60.0e9 # KV budget after 7B model weights cluster: num_instances: 16 meta_store: ttl_seconds: 60.0 router: mode: ttl_aware precise_probe_latency_us: 50.0 precise_probe_topk: 4 load_alpha: 1.0 sim: trace_path: qwen-bailian-usagetraces-anon/qwen_coder_blksz_16.jsonl max_requests: null output_dir: runs/qwen7b_preset sample_interval_s: 1.0 seed: 42