diff --git a/configs/examples/dash0_llm_10min_study_run1f.json b/configs/examples/dash0_llm_10min_study_run1f.json new file mode 100644 index 0000000..014b257 --- /dev/null +++ b/configs/examples/dash0_llm_10min_study_run1f.json @@ -0,0 +1,103 @@ +{ + "study_id": "dash0-qwen30b-chat-10min-llm-run1f", + "hardware": { + "gpu_count": 4, + "gpu_model": "H20", + "host_candidates": [ + "dash0" + ] + }, + "model": { + "model_id": "qwen3-30b-a3b", + "served_model_name": "qwen3-30b-smoke" + }, + "engine": { + "engine_name": "vllm", + "engine_version": "0.13.0rc2.dev2111+gb44b43f43.d20260309", + "exec_path": "/usr/local/bin/vllm", + "cwd": "/home/admin/cpfs/wjh/aituner/aituner", + "host": "127.0.0.1", + "port": 18081, + "healthcheck_path": "/v1/models", + "ready_timeout_s": 900, + "request_timeout_s": 900, + "launch_args": [ + "serve", + "/home/admin/resource/model/464482ce.qwen3-30b-a3b/1m-instruct-0726-fp4" + ], + "base_envs": { + "CUDA_VISIBLE_DEVICES": "4,5,6,7", + "VLLM_FP8_USE_BLADNN": "1", + "VLLM_MOE_USE_BLADNN": "1" + }, + "base_flags": { + "host": "127.0.0.1", + "port": 18081, + "served-model-name": "qwen3-30b-smoke", + "max-model-len": 65536, + "disable-log-requests": true, + "trust-remote-code": true + }, + "tunable_envs": [ + "VLLM_ATTENTION_BACKEND" + ], + "tunable_flags": [ + "tensor-parallel-size", + "max-num-seqs", + "max-num-batched-tokens", + "gpu-memory-utilization", + "block-size" + ], + "python_executable": "python3" + }, + "trace": { + "windows_path": "/home/admin/cpfs/wjh/aituner/aituner/trace_windows/windows.json", + "window_id": "chat_w20260311_1000", + "u_field": "sampling_u", + "timestamp_field": "timestamp", + "max_concurrency": 64, + "replay_time_scale": 1.0, + "early_stop_max_lag_s": 120.0, + "early_stop_max_elapsed_s": 900.0 + }, + "slo": { + "target_pass_rate": 0.95, + "ttft_rule": { + "kind": "step_ms", + "buckets": [ + { + "max_input_tokens": 4096, + "threshold_ms": 15000 + }, + { + "max_input_tokens": 16384, + "threshold_ms": 30000 + }, + { + "threshold_ms": 45000 + } + ] + }, + "tpot_rule": { + "kind": "fixed_ms", + "threshold_ms": 1500 + } + }, + "search": { + "low": 0.0, + "high": 1.0, + "tolerance": 0.1, + "max_probes": 4, + "sample_seed": 20260325 + }, + "llm": { + "system_prompt": "Propose a single engine config patch that increases the maximum feasible sampling_u under the SLO target.", + "max_history_trials": 8, + "endpoint": { + "base_url": "http://tianx.ipads-lab.se.sjtu.edu.cn:8317/v1", + "model": "gpt-5.4", + "api_key_env": "OPENAI_API_KEY", + "timeout_s": 180 + } + } +} diff --git a/src/aituner/llm.py b/src/aituner/llm.py index 7664092..ce44fad 100644 --- a/src/aituner/llm.py +++ b/src/aituner/llm.py @@ -31,7 +31,9 @@ def build_prompt( "You are tuning an OpenAI-compatible serving engine.", "Return exactly one JSON object with keys: observation, diagnosis, config_patch, expected_effects, why_not_previous_failures.", "config_patch must contain env_patch and flag_patch.", + "expected_effects must be a JSON array of short strings, not an object.", "Only use allowed tunable env keys and allowed tunable flag keys.", + "Do not wrap the JSON in markdown fences or any extra text.", "", "Study stack:", json.dumps( diff --git a/src/aituner/spec.py b/src/aituner/spec.py index fc51b8b..d7b2a5e 100644 --- a/src/aituner/spec.py +++ b/src/aituner/spec.py @@ -398,6 +398,17 @@ class Proposal: expected_effects = data.get("expected_effects") if isinstance(expected_effects, str): expected_effects_value = [expected_effects.strip()] if expected_effects.strip() else [] + elif isinstance(expected_effects, Mapping): + expected_effects_value = [] + for key, value in expected_effects.items(): + key_text = str(key).strip() + value_text = str(value).strip() + if key_text and value_text: + expected_effects_value.append(f"{key_text}: {value_text}") + elif key_text: + expected_effects_value.append(key_text) + elif value_text: + expected_effects_value.append(value_text) else: expected_effects_value = _coerce_str_list( expected_effects, context="proposal.expected_effects" diff --git a/tests/test_core_flow.py b/tests/test_core_flow.py index ba182be..6bd50e1 100644 --- a/tests/test_core_flow.py +++ b/tests/test_core_flow.py @@ -682,6 +682,23 @@ class CoreFlowTests(unittest.TestCase): ) self.assertEqual(proposal.expected_effects, ["higher throughput"]) + def test_proposal_expected_effects_accepts_object(self) -> None: + proposal = Proposal.from_dict( + { + "observation": "obs", + "diagnosis": "diag", + "config_patch": {"env_patch": {}, "flag_patch": {}}, + "expected_effects": { + "throughput": "higher", + "ttft": "lower", + }, + } + ) + self.assertEqual( + proposal.expected_effects, + ["throughput: higher", "ttft: lower"], + ) + def test_replay_requests_early_stops_when_slo_is_unrecoverable(self) -> None: requests = [ TraceRequest(