chore: update ablation and clean configs
This commit is contained in:
@@ -5,16 +5,17 @@ model:
|
||||
config_json: ../models/Qwen3-Coder-480B-A35B-Instruct-FP8/config.json
|
||||
name: qwen3-coder-480b
|
||||
dtype_bytes: 1 # FP8 inference
|
||||
block_size_tokens: 16
|
||||
block_size_tokens: 512
|
||||
|
||||
hardware:
|
||||
type: 8xh20
|
||||
hbm_bytes: 400.0e9 # KV budget after FP8 weights on 8x96GB
|
||||
dram_bytes: 1.0e12 # ~1.0 TB usable CPU DRAM per node
|
||||
|
||||
cluster:
|
||||
num_instances: 32
|
||||
num_instances: 128
|
||||
meta_store:
|
||||
ttl_seconds: 120.0
|
||||
ttl_seconds: 300.0
|
||||
router:
|
||||
mode: min_pd
|
||||
precise_probe_latency_us: 50.0
|
||||
@@ -22,7 +23,7 @@ cluster:
|
||||
load_alpha: 1.0
|
||||
|
||||
sim:
|
||||
trace_path: traces/qwen_coder_blksz_16.jsonl
|
||||
trace_path: bailian-traces/qwen3_coder_blksz_512_040915-040917.jsonl
|
||||
max_requests: null
|
||||
output_dir: runs/qwen3_coder_8xh20
|
||||
sample_interval_s: 1.0
|
||||
|
||||
Reference in New Issue
Block a user