Tighten LLM proposal schema

This commit is contained in:
2026-04-04 23:24:32 +08:00
parent 00778eff42
commit 8b024c72f1
4 changed files with 133 additions and 0 deletions

View File

@@ -0,0 +1,103 @@
{
"study_id": "dash0-qwen30b-chat-10min-llm-run1f",
"hardware": {
"gpu_count": 4,
"gpu_model": "H20",
"host_candidates": [
"dash0"
]
},
"model": {
"model_id": "qwen3-30b-a3b",
"served_model_name": "qwen3-30b-smoke"
},
"engine": {
"engine_name": "vllm",
"engine_version": "0.13.0rc2.dev2111+gb44b43f43.d20260309",
"exec_path": "/usr/local/bin/vllm",
"cwd": "/home/admin/cpfs/wjh/aituner/aituner",
"host": "127.0.0.1",
"port": 18081,
"healthcheck_path": "/v1/models",
"ready_timeout_s": 900,
"request_timeout_s": 900,
"launch_args": [
"serve",
"/home/admin/resource/model/464482ce.qwen3-30b-a3b/1m-instruct-0726-fp4"
],
"base_envs": {
"CUDA_VISIBLE_DEVICES": "4,5,6,7",
"VLLM_FP8_USE_BLADNN": "1",
"VLLM_MOE_USE_BLADNN": "1"
},
"base_flags": {
"host": "127.0.0.1",
"port": 18081,
"served-model-name": "qwen3-30b-smoke",
"max-model-len": 65536,
"disable-log-requests": true,
"trust-remote-code": true
},
"tunable_envs": [
"VLLM_ATTENTION_BACKEND"
],
"tunable_flags": [
"tensor-parallel-size",
"max-num-seqs",
"max-num-batched-tokens",
"gpu-memory-utilization",
"block-size"
],
"python_executable": "python3"
},
"trace": {
"windows_path": "/home/admin/cpfs/wjh/aituner/aituner/trace_windows/windows.json",
"window_id": "chat_w20260311_1000",
"u_field": "sampling_u",
"timestamp_field": "timestamp",
"max_concurrency": 64,
"replay_time_scale": 1.0,
"early_stop_max_lag_s": 120.0,
"early_stop_max_elapsed_s": 900.0
},
"slo": {
"target_pass_rate": 0.95,
"ttft_rule": {
"kind": "step_ms",
"buckets": [
{
"max_input_tokens": 4096,
"threshold_ms": 15000
},
{
"max_input_tokens": 16384,
"threshold_ms": 30000
},
{
"threshold_ms": 45000
}
]
},
"tpot_rule": {
"kind": "fixed_ms",
"threshold_ms": 1500
}
},
"search": {
"low": 0.0,
"high": 1.0,
"tolerance": 0.1,
"max_probes": 4,
"sample_seed": 20260325
},
"llm": {
"system_prompt": "Propose a single engine config patch that increases the maximum feasible sampling_u under the SLO target.",
"max_history_trials": 8,
"endpoint": {
"base_url": "http://tianx.ipads-lab.se.sjtu.edu.cn:8317/v1",
"model": "gpt-5.4",
"api_key_env": "OPENAI_API_KEY",
"timeout_s": 180
}
}
}

View File

@@ -31,7 +31,9 @@ def build_prompt(
"You are tuning an OpenAI-compatible serving engine.", "You are tuning an OpenAI-compatible serving engine.",
"Return exactly one JSON object with keys: observation, diagnosis, config_patch, expected_effects, why_not_previous_failures.", "Return exactly one JSON object with keys: observation, diagnosis, config_patch, expected_effects, why_not_previous_failures.",
"config_patch must contain env_patch and flag_patch.", "config_patch must contain env_patch and flag_patch.",
"expected_effects must be a JSON array of short strings, not an object.",
"Only use allowed tunable env keys and allowed tunable flag keys.", "Only use allowed tunable env keys and allowed tunable flag keys.",
"Do not wrap the JSON in markdown fences or any extra text.",
"", "",
"Study stack:", "Study stack:",
json.dumps( json.dumps(

View File

@@ -398,6 +398,17 @@ class Proposal:
expected_effects = data.get("expected_effects") expected_effects = data.get("expected_effects")
if isinstance(expected_effects, str): if isinstance(expected_effects, str):
expected_effects_value = [expected_effects.strip()] if expected_effects.strip() else [] expected_effects_value = [expected_effects.strip()] if expected_effects.strip() else []
elif isinstance(expected_effects, Mapping):
expected_effects_value = []
for key, value in expected_effects.items():
key_text = str(key).strip()
value_text = str(value).strip()
if key_text and value_text:
expected_effects_value.append(f"{key_text}: {value_text}")
elif key_text:
expected_effects_value.append(key_text)
elif value_text:
expected_effects_value.append(value_text)
else: else:
expected_effects_value = _coerce_str_list( expected_effects_value = _coerce_str_list(
expected_effects, context="proposal.expected_effects" expected_effects, context="proposal.expected_effects"

View File

@@ -682,6 +682,23 @@ class CoreFlowTests(unittest.TestCase):
) )
self.assertEqual(proposal.expected_effects, ["higher throughput"]) self.assertEqual(proposal.expected_effects, ["higher throughput"])
def test_proposal_expected_effects_accepts_object(self) -> None:
proposal = Proposal.from_dict(
{
"observation": "obs",
"diagnosis": "diag",
"config_patch": {"env_patch": {}, "flag_patch": {}},
"expected_effects": {
"throughput": "higher",
"ttft": "lower",
},
}
)
self.assertEqual(
proposal.expected_effects,
["throughput: higher", "ttft: lower"],
)
def test_replay_requests_early_stops_when_slo_is_unrecoverable(self) -> None: def test_replay_requests_early_stops_when_slo_is_unrecoverable(self) -> None:
requests = [ requests = [
TraceRequest( TraceRequest(