Tighten LLM proposal schema

2026-04-04 23:24:32 +08:00
parent 00778eff42
commit 8b024c72f1
4 changed files with 133 additions and 0 deletions
--- a/configs/examples/dash0_llm_10min_study_run1f.json
+++ b/configs/examples/dash0_llm_10min_study_run1f.json
@@ -0,0 +1,103 @@
+{
+  "study_id": "dash0-qwen30b-chat-10min-llm-run1f",
+  "hardware": {
+    "gpu_count": 4,
+    "gpu_model": "H20",
+    "host_candidates": [
+      "dash0"
+    ]
+  },
+  "model": {
+    "model_id": "qwen3-30b-a3b",
+    "served_model_name": "qwen3-30b-smoke"
+  },
+  "engine": {
+    "engine_name": "vllm",
+    "engine_version": "0.13.0rc2.dev2111+gb44b43f43.d20260309",
+    "exec_path": "/usr/local/bin/vllm",
+    "cwd": "/home/admin/cpfs/wjh/aituner/aituner",
+    "host": "127.0.0.1",
+    "port": 18081,
+    "healthcheck_path": "/v1/models",
+    "ready_timeout_s": 900,
+    "request_timeout_s": 900,
+    "launch_args": [
+      "serve",
+      "/home/admin/resource/model/464482ce.qwen3-30b-a3b/1m-instruct-0726-fp4"
+    ],
+    "base_envs": {
+      "CUDA_VISIBLE_DEVICES": "4,5,6,7",
+      "VLLM_FP8_USE_BLADNN": "1",
+      "VLLM_MOE_USE_BLADNN": "1"
+    },
+    "base_flags": {
+      "host": "127.0.0.1",
+      "port": 18081,
+      "served-model-name": "qwen3-30b-smoke",
+      "max-model-len": 65536,
+      "disable-log-requests": true,
+      "trust-remote-code": true
+    },
+    "tunable_envs": [
+      "VLLM_ATTENTION_BACKEND"
+    ],
+    "tunable_flags": [
+      "tensor-parallel-size",
+      "max-num-seqs",
+      "max-num-batched-tokens",
+      "gpu-memory-utilization",
+      "block-size"
+    ],
+    "python_executable": "python3"
+  },
+  "trace": {
+    "windows_path": "/home/admin/cpfs/wjh/aituner/aituner/trace_windows/windows.json",
+    "window_id": "chat_w20260311_1000",
+    "u_field": "sampling_u",
+    "timestamp_field": "timestamp",
+    "max_concurrency": 64,
+    "replay_time_scale": 1.0,
+    "early_stop_max_lag_s": 120.0,
+    "early_stop_max_elapsed_s": 900.0
+  },
+  "slo": {
+    "target_pass_rate": 0.95,
+    "ttft_rule": {
+      "kind": "step_ms",
+      "buckets": [
+        {
+          "max_input_tokens": 4096,
+          "threshold_ms": 15000
+        },
+        {
+          "max_input_tokens": 16384,
+          "threshold_ms": 30000
+        },
+        {
+          "threshold_ms": 45000
+        }
+      ]
+    },
+    "tpot_rule": {
+      "kind": "fixed_ms",
+      "threshold_ms": 1500
+    }
+  },
+  "search": {
+    "low": 0.0,
+    "high": 1.0,
+    "tolerance": 0.1,
+    "max_probes": 4,
+    "sample_seed": 20260325
+  },
+  "llm": {
+    "system_prompt": "Propose a single engine config patch that increases the maximum feasible sampling_u under the SLO target.",
+    "max_history_trials": 8,
+    "endpoint": {
+      "base_url": "http://tianx.ipads-lab.se.sjtu.edu.cn:8317/v1",
+      "model": "gpt-5.4",
+      "api_key_env": "OPENAI_API_KEY",
+      "timeout_s": 180
+    }
+  }
+}
--- a/src/aituner/llm.py
+++ b/src/aituner/llm.py
@@ -31,7 +31,9 @@ def build_prompt(
        "You are tuning an OpenAI-compatible serving engine.",
        "Return exactly one JSON object with keys: observation, diagnosis, config_patch, expected_effects, why_not_previous_failures.",
        "config_patch must contain env_patch and flag_patch.",
+        "expected_effects must be a JSON array of short strings, not an object.",
        "Only use allowed tunable env keys and allowed tunable flag keys.",
+        "Do not wrap the JSON in markdown fences or any extra text.",
        "",
        "Study stack:",
        json.dumps(
--- a/src/aituner/spec.py
+++ b/src/aituner/spec.py
@@ -398,6 +398,17 @@ class Proposal:
        expected_effects = data.get("expected_effects")
        if isinstance(expected_effects, str):
            expected_effects_value = [expected_effects.strip()] if expected_effects.strip() else []
+        elif isinstance(expected_effects, Mapping):
+            expected_effects_value = []
+            for key, value in expected_effects.items():
+                key_text = str(key).strip()
+                value_text = str(value).strip()
+                if key_text and value_text:
+                    expected_effects_value.append(f"{key_text}: {value_text}")
+                elif key_text:
+                    expected_effects_value.append(key_text)
+                elif value_text:
+                    expected_effects_value.append(value_text)
        else:
            expected_effects_value = _coerce_str_list(
                expected_effects, context="proposal.expected_effects"
--- a/tests/test_core_flow.py
+++ b/tests/test_core_flow.py
@@ -682,6 +682,23 @@ class CoreFlowTests(unittest.TestCase):
        )
        self.assertEqual(proposal.expected_effects, ["higher throughput"])

+    def test_proposal_expected_effects_accepts_object(self) -> None:
+        proposal = Proposal.from_dict(
+            {
+                "observation": "obs",
+                "diagnosis": "diag",
+                "config_patch": {"env_patch": {}, "flag_patch": {}},
+                "expected_effects": {
+                    "throughput": "higher",
+                    "ttft": "lower",
+                },
+            }
+        )
+        self.assertEqual(
+            proposal.expected_effects,
+            ["throughput: higher", "ttft: lower"],
+        )
+
    def test_replay_requests_early_stops_when_slo_is_unrecoverable(self) -> None:
        requests = [
            TraceRequest(