Support length-only trace windows

2026-04-04 21:31:11 +08:00
parent cdcca1d9d7
commit 647d241725
3 changed files with 109 additions and 2 deletions
--- a/src/aituner/spec.py
+++ b/src/aituner/spec.py
@@ -151,10 +151,12 @@ class TraceSpec:
    timestamp_field: str
    max_concurrency: int
    max_requests_per_probe: int | None = None
    synthetic_prompt_cap_tokens: int | None = None
    @classmethod
    def from_dict(cls, data: Mapping[str, Any]) -> "TraceSpec":
        max_requests = data.get("max_requests_per_probe")
        synthetic_prompt_cap = data.get("synthetic_prompt_cap_tokens")
        return cls(
            windows_path=_require_str(data.get("windows_path"), context="trace.windows_path"),
            window_id=_require_str(data.get("window_id"), context="trace.window_id"),
@@ -167,6 +169,9 @@ class TraceSpec:
                data.get("max_concurrency", 64), context="trace.max_concurrency"
            ),
            max_requests_per_probe=int(max_requests) if max_requests is not None else None,
            synthetic_prompt_cap_tokens=(
                int(synthetic_prompt_cap) if synthetic_prompt_cap is not None else None
            ),
        )
--- a/src/aituner/trace.py
+++ b/src/aituner/trace.py
@@ -81,6 +81,14 @@ def _coerce_messages(row: Mapping[str, Any]) -> list[dict[str, Any]]:
    raise TraceError("trace row is missing chat messages/prompt text")
 def _synthetic_prompt_from_tokens(token_count: int) -> str:
    if token_count <= 0:
        return "hello"
    # Keep it ASCII and structurally simple so the same trace can be replayed
    # on any OpenAI-compatible engine without extra tokenizer assets.
    return " ".join(["token"] * token_count)
 def _coerce_completion_tokens(row: Mapping[str, Any]) -> int | None:
    for key in ("max_completion_tokens", "max_tokens", "output_length", "completion_tokens"):
        value = row.get(key)
@@ -123,9 +131,24 @@ def load_trace_requests(study: StudySpec, *, study_spec_path: Path) -> tuple[Win
            sampling_u = row.get(study.trace.u_field, 1.0)
            if isinstance(sampling_u, bool) or not isinstance(sampling_u, (int, float)):
                raise TraceError(f"trace row {idx} is missing numeric {study.trace.u_field}")
            prompt_tokens_hint = _coerce_prompt_tokens(row)
            try:
                messages = _coerce_messages(row)
            except TraceError:
                capped_prompt_tokens = prompt_tokens_hint or 0
                if study.trace.synthetic_prompt_cap_tokens is not None:
                    capped_prompt_tokens = min(
                        capped_prompt_tokens, study.trace.synthetic_prompt_cap_tokens
                    )
                messages = [
                    {
                        "role": "user",
                        "content": _synthetic_prompt_from_tokens(capped_prompt_tokens),
                    }
                ]
            body: dict[str, Any] = {
                "model": study.model.served_model_name,
-                "messages": _coerce_messages(row),
+                "messages": messages,
                "stream": True,
                "stream_options": {"include_usage": True},
            }
@@ -141,7 +164,7 @@ def load_trace_requests(study: StudySpec, *, study_spec_path: Path) -> tuple[Win
                    arrival_s=float(timestamp),
                    sampling_u=float(sampling_u),
                    body=body,
-                    prompt_tokens_hint=_coerce_prompt_tokens(row),
+                    prompt_tokens_hint=prompt_tokens_hint,
                    completion_tokens_hint=completion_tokens,
                )
            )
--- a/tests/test_core_flow.py
+++ b/tests/test_core_flow.py
@@ -153,6 +153,85 @@ class CoreFlowTests(unittest.TestCase):
            self.assertIn("queueing_knee_by_bucket", prompt)
            self.assertTrue(study_root.exists())
    def test_length_only_trace_rows_are_synthesized(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            tmp_path = Path(tmp)
            trace_dir = tmp_path / "trace_windows" / "traces"
            trace_dir.mkdir(parents=True)
            trace_path = trace_dir / "chat_len_only.jsonl"
            with trace_path.open("w", encoding="utf-8") as handle:
                handle.write(
                    json.dumps(
                        {
                            "timestamp": 0.0,
                            "sampling_u": 0.1,
                            "input_length": 32,
                            "output_length": 16
                        }
                    )
                    + "\n"
                )
            windows_path = tmp_path / "trace_windows" / "windows.json"
            windows_path.write_text(
                json.dumps(
                    {
                        "windows": [
                            {
                                "window_id": "w1",
                                "trace_type": "chat",
                                "trace_file": "traces/chat_len_only.jsonl",
                                "window_start": 0.0,
                                "window_end": 10.0
                            }
                        ]
                    }
                ),
                encoding="utf-8",
            )
            study_path = tmp_path / "study.json"
            study_path.write_text(
                json.dumps(
                    {
                        "study_id": "study-len-only",
                        "hardware": {"gpu_count": 1},
                        "model": {
                            "model_id": "m1",
                            "served_model_name": "dummy-model"
                        },
                        "engine": {
                            "engine_name": "vllm",
                            "exec_path": "/usr/local/bin/vllm",
                            "host": "127.0.0.1",
                            "port": 8000,
                            "ready_timeout_s": 10,
                            "request_timeout_s": 10,
                            "healthcheck_path": "/v1/models",
                            "launch_args": [],
                            "base_envs": {},
                            "base_flags": {},
                            "tunable_envs": [],
                            "tunable_flags": []
                        },
                        "trace": {
                            "windows_path": str(windows_path),
                            "window_id": "w1",
                            "max_concurrency": 1,
                            "synthetic_prompt_cap_tokens": 8
                        },
                        "slo": {"target_pass_rate": 0.95},
                        "search": {"low": 0.0, "high": 1.0, "tolerance": 0.1, "max_probes": 2, "sample_seed": 1},
                        "llm": {"system_prompt": "", "max_history_trials": 1}
                    }
                ),
                encoding="utf-8",
            )
            study = load_study_spec(study_path)
            _, requests = load_trace_requests(study, study_spec_path=study_path)
            self.assertEqual(len(requests), 1)
            message = requests[0].body["messages"][0]["content"]
            self.assertEqual(message.count("token"), 8)
            self.assertEqual(requests[0].body["max_tokens"], 16)
    def test_slo_evaluation_step_and_fixed_rules(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            study = load_study_spec(_write_study_assets(Path(tmp)))