From e9b5e9b9575553340a687864532bbc8fe54dc0a4 Mon Sep 17 00:00:00 2001 From: Gahow Wang Date: Sun, 5 Apr 2026 02:08:27 +0800 Subject: [PATCH] Add targeted low-threshold probe specs --- .../dash0_manual_trial2_probe_0015625.json | 97 +++++++++++++++++++ .../dash0_manual_trial2_probe_003125.json | 97 +++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 configs/examples/dash0_manual_trial2_probe_0015625.json create mode 100644 configs/examples/dash0_manual_trial2_probe_003125.json diff --git a/configs/examples/dash0_manual_trial2_probe_0015625.json b/configs/examples/dash0_manual_trial2_probe_0015625.json new file mode 100644 index 0000000..77426da --- /dev/null +++ b/configs/examples/dash0_manual_trial2_probe_0015625.json @@ -0,0 +1,97 @@ +{ + "study_id": "dash0-qwen30b-chat-10min-manual-trial2-probe-0015625", + "hardware": { + "gpu_count": 4, + "gpu_model": "H20", + "host_candidates": [ + "dash0" + ] + }, + "model": { + "model_id": "qwen3-30b-a3b", + "served_model_name": "qwen3-30b-smoke" + }, + "engine": { + "engine_name": "vllm", + "engine_version": "0.13.0rc2.dev2111+gb44b43f43.d20260309", + "exec_path": "/usr/local/bin/vllm", + "cwd": "/home/admin/cpfs/wjh/aituner/aituner", + "host": "127.0.0.1", + "port": 18081, + "healthcheck_path": "/v1/models", + "ready_timeout_s": 900, + "request_timeout_s": 900, + "launch_args": [ + "serve", + "/home/admin/resource/model/464482ce.qwen3-30b-a3b/1m-instruct-0726-fp4" + ], + "base_envs": { + "CUDA_VISIBLE_DEVICES": "4,5,6,7", + "VLLM_FP8_USE_BLADNN": "1", + "VLLM_MOE_USE_BLADNN": "1" + }, + "base_flags": { + "host": "127.0.0.1", + "port": 18081, + "served-model-name": "qwen3-30b-smoke", + "max-model-len": 65536, + "disable-log-requests": true, + "trust-remote-code": true + }, + "tunable_envs": [ + "VLLM_ATTENTION_BACKEND" + ], + "tunable_flags": [ + "tensor-parallel-size", + "max-num-seqs", + "max-num-batched-tokens", + "gpu-memory-utilization", + "block-size" + ], + "python_executable": "python3" + }, + "trace": { + "windows_path": "/home/admin/cpfs/wjh/aituner/aituner/trace_windows/windows.json", + "window_id": "chat_w20260311_1000", + "u_field": "sampling_u", + "timestamp_field": "timestamp", + "max_concurrency": 64, + "replay_time_scale": 1.0, + "early_stop_max_lag_s": 120.0, + "early_stop_max_elapsed_s": 900.0 + }, + "slo": { + "target_pass_rate": 0.95, + "ttft_rule": { + "kind": "step_ms", + "buckets": [ + { + "max_input_tokens": 4096, + "threshold_ms": 15000 + }, + { + "max_input_tokens": 16384, + "threshold_ms": 30000 + }, + { + "threshold_ms": 45000 + } + ] + }, + "tpot_rule": { + "kind": "fixed_ms", + "threshold_ms": 1500 + } + }, + "search": { + "low": 0.0, + "high": 0.03125, + "tolerance": 0.0, + "max_probes": 1, + "sample_seed": 20260325 + }, + "llm": { + "system_prompt": "Propose a single engine config patch that increases the maximum feasible sampling_u under the SLO target.", + "max_history_trials": 8 + } +} diff --git a/configs/examples/dash0_manual_trial2_probe_003125.json b/configs/examples/dash0_manual_trial2_probe_003125.json new file mode 100644 index 0000000..c328f79 --- /dev/null +++ b/configs/examples/dash0_manual_trial2_probe_003125.json @@ -0,0 +1,97 @@ +{ + "study_id": "dash0-qwen30b-chat-10min-manual-trial2-probe-003125", + "hardware": { + "gpu_count": 4, + "gpu_model": "H20", + "host_candidates": [ + "dash0" + ] + }, + "model": { + "model_id": "qwen3-30b-a3b", + "served_model_name": "qwen3-30b-smoke" + }, + "engine": { + "engine_name": "vllm", + "engine_version": "0.13.0rc2.dev2111+gb44b43f43.d20260309", + "exec_path": "/usr/local/bin/vllm", + "cwd": "/home/admin/cpfs/wjh/aituner/aituner", + "host": "127.0.0.1", + "port": 18081, + "healthcheck_path": "/v1/models", + "ready_timeout_s": 900, + "request_timeout_s": 900, + "launch_args": [ + "serve", + "/home/admin/resource/model/464482ce.qwen3-30b-a3b/1m-instruct-0726-fp4" + ], + "base_envs": { + "CUDA_VISIBLE_DEVICES": "4,5,6,7", + "VLLM_FP8_USE_BLADNN": "1", + "VLLM_MOE_USE_BLADNN": "1" + }, + "base_flags": { + "host": "127.0.0.1", + "port": 18081, + "served-model-name": "qwen3-30b-smoke", + "max-model-len": 65536, + "disable-log-requests": true, + "trust-remote-code": true + }, + "tunable_envs": [ + "VLLM_ATTENTION_BACKEND" + ], + "tunable_flags": [ + "tensor-parallel-size", + "max-num-seqs", + "max-num-batched-tokens", + "gpu-memory-utilization", + "block-size" + ], + "python_executable": "python3" + }, + "trace": { + "windows_path": "/home/admin/cpfs/wjh/aituner/aituner/trace_windows/windows.json", + "window_id": "chat_w20260311_1000", + "u_field": "sampling_u", + "timestamp_field": "timestamp", + "max_concurrency": 64, + "replay_time_scale": 1.0, + "early_stop_max_lag_s": 120.0, + "early_stop_max_elapsed_s": 900.0 + }, + "slo": { + "target_pass_rate": 0.95, + "ttft_rule": { + "kind": "step_ms", + "buckets": [ + { + "max_input_tokens": 4096, + "threshold_ms": 15000 + }, + { + "max_input_tokens": 16384, + "threshold_ms": 30000 + }, + { + "threshold_ms": 45000 + } + ] + }, + "tpot_rule": { + "kind": "fixed_ms", + "threshold_ms": 1500 + } + }, + "search": { + "low": 0.0, + "high": 0.0625, + "tolerance": 0.0, + "max_probes": 1, + "sample_seed": 20260325 + }, + "llm": { + "system_prompt": "Propose a single engine config patch that increases the maximum feasible sampling_u under the SLO target.", + "max_history_trials": 8 + } +}