Files
replaysim/configs/rs3_tiny_sweep.json

52 lines
1.4 KiB
JSON

{
"suite_id": "rs3_tiny_smoke",
"sim": "frontier_patched",
"frontier": {
"root": "/tmp/replayserve-frontier-rs1b",
"mode": "patched_scratch",
"patch": "patches/frontier-vllm-v1-prefix-cache-chunked-prefill.patch"
},
"fixtures": [
"coder_100"
],
"defaults": {
"simulation_mode": "online",
"sys_arch": "co-location",
"cluster_scheduler": "sticky_round_robin",
"replica_scheduler": "vllm_v1",
"model_name": "Qwen/Qwen3-32B",
"device": "a800",
"network_device": "a800_dgx",
"attn_tensor_parallel_size": 2,
"attn_data_parallel_size": 1,
"moe_tensor_parallel_size": 1,
"moe_expert_parallel_size": 1,
"num_pipeline_stages": 1,
"num_replicas": 1,
"batch_size_cap": 128,
"max_tokens_in_batch": 32768,
"block_size": 16,
"enable_prefix_caching": true,
"enable_chunked_prefill": true,
"long_prefill_token_threshold": 64,
"trace_max_tokens": 32768,
"num_blocks_mode": "memory_planner",
"gpu_memory_utilization": 0.9,
"non_kv_cache_overhead_bytes": 0,
"dummy_execution_time_ms": 1.0
},
"configs": [
{
"id": "fixed_prefix_on",
"description": "RS1 fixed config on patched Frontier scratch."
},
{
"id": "prefix_cache_off",
"description": "Diagnosis/control config with prefix cache disabled; all other fixed scheduler knobs unchanged.",
"overrides": {
"enable_prefix_caching": false
}
}
]
}