Add advisory harness attribution and descriptor planner MVP
This commit is contained in:
37
docs/harness-ablation/candidate-family-gap-log.md
Normal file
37
docs/harness-ablation/candidate-family-gap-log.md
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
# Candidate Family Gap Review Log
|
||||||
|
|
||||||
|
本文档维护 LLM 在 `advisory` 模式下提出 harness candidate set 之外配置、且该配置带来性能提升时的人工 review 入口。
|
||||||
|
|
||||||
|
运行时系统不会自动修改 harness,也不会把 LLM 的 out-of-set proposal 直接提升为规则。每条提升先写入 study artifact:
|
||||||
|
|
||||||
|
```text
|
||||||
|
.aituner/<study>/candidate_family_gaps/<trial-id>.json
|
||||||
|
```
|
||||||
|
|
||||||
|
然后人工 review 决定是否需要修改:
|
||||||
|
|
||||||
|
- `KnobDescriptor`
|
||||||
|
- generic operator
|
||||||
|
- acquisition / step-size policy
|
||||||
|
- evidence estimator
|
||||||
|
|
||||||
|
## Gap 分类
|
||||||
|
|
||||||
|
| 类型 | 含义 | 默认处理 |
|
||||||
|
|---|---|---|
|
||||||
|
| `same_operator_new_step` | harness 已有同 knob / 同方向候选,但 LLM 给了更好的 step/value | 优先检查 trust-region、step-size、candidate budget 和 acquisition |
|
||||||
|
| `missing_operator` | visible candidate set 中没有同 knob 或同 mechanism 的候选 | 检查是否缺 generic operator 或 descriptor 映射 |
|
||||||
|
| `missing_descriptor` | knob 不在 adapter descriptor 中 | 需要 engine adapter review |
|
||||||
|
| `missing_mechanism` | 现有机制词表无法表达该 proposal 的作用 | 需要 design review |
|
||||||
|
| `llm_independent_discovery` | LLM 发现无法归入当前 harness abstraction 的新方向 | 只作为 observation,不直接进入 harness |
|
||||||
|
|
||||||
|
## Review 原则
|
||||||
|
|
||||||
|
1. 不接受 case-specific 数值表,例如“这个 case 试 `max-num-seqs=24`”。
|
||||||
|
2. 若归类为 `same_operator_new_step`,只能修改通用 step policy,例如 grow/shrink factor、local grid budget、bracket 触发条件。
|
||||||
|
3. 若归类为 `missing_descriptor`,descriptor 只能表达 knob 语义、约束、search geometry 和 directional effects,不能表达具体目标答案。
|
||||||
|
4. 任何被接受的 gap 都需要新增 synthetic test,证明它不依赖 vLLM 常见取值或某个 bad-start case。
|
||||||
|
|
||||||
|
## Pending
|
||||||
|
|
||||||
|
当前 repo 内尚无已人工接受的 candidate family gap。实验产生的 JSON artifact 需要在这里补充 review 摘要后再进入代码设计。
|
||||||
@@ -1,10 +1,12 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
from dataclasses import replace
|
from dataclasses import replace
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
from .compare import run_compare
|
from .compare import run_compare
|
||||||
from .config_signature import (
|
from .config_signature import (
|
||||||
@@ -25,6 +27,7 @@ from .lca import (
|
|||||||
)
|
)
|
||||||
from .llm import build_prompt, call_llm_for_proposal, load_capability_profile, parse_proposal_text
|
from .llm import build_prompt, call_llm_for_proposal, load_capability_profile, parse_proposal_text
|
||||||
from .spec import (
|
from .spec import (
|
||||||
|
ConfigPatch,
|
||||||
Proposal,
|
Proposal,
|
||||||
SpecError,
|
SpecError,
|
||||||
StudySpec,
|
StudySpec,
|
||||||
@@ -68,6 +71,302 @@ def _reject_repeated_effective_config(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _effective_config_fingerprint(signature: str) -> str:
|
||||||
|
return hashlib.sha256(signature.encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def _proposal_effective_config_fingerprint(
|
||||||
|
*,
|
||||||
|
study: StudySpec,
|
||||||
|
state: StudyState,
|
||||||
|
proposal: Proposal,
|
||||||
|
) -> str | None:
|
||||||
|
if proposal.should_stop:
|
||||||
|
return None
|
||||||
|
signature = materialized_effective_config_signature(
|
||||||
|
study=study,
|
||||||
|
state=state,
|
||||||
|
proposal=proposal,
|
||||||
|
)
|
||||||
|
return _effective_config_fingerprint(signature)
|
||||||
|
|
||||||
|
|
||||||
|
def _visible_harness_candidates(
|
||||||
|
harness_context: dict[str, object] | None,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
if not isinstance(harness_context, dict):
|
||||||
|
return []
|
||||||
|
experiment_plan = harness_context.get("experiment_plan")
|
||||||
|
if not isinstance(experiment_plan, dict):
|
||||||
|
return []
|
||||||
|
candidate_set = experiment_plan.get("candidate_set")
|
||||||
|
if not isinstance(candidate_set, dict):
|
||||||
|
return []
|
||||||
|
candidates = candidate_set.get("eligible_candidates")
|
||||||
|
if not isinstance(candidates, list):
|
||||||
|
return []
|
||||||
|
return [item for item in candidates if isinstance(item, dict)]
|
||||||
|
|
||||||
|
|
||||||
|
def _candidate_set_hash(harness_context: dict[str, object] | None) -> object:
|
||||||
|
if not isinstance(harness_context, dict):
|
||||||
|
return None
|
||||||
|
experiment_plan = harness_context.get("experiment_plan")
|
||||||
|
if not isinstance(experiment_plan, dict):
|
||||||
|
return None
|
||||||
|
candidate_set = experiment_plan.get("candidate_set")
|
||||||
|
if not isinstance(candidate_set, dict):
|
||||||
|
return None
|
||||||
|
return candidate_set.get("candidate_set_hash")
|
||||||
|
|
||||||
|
|
||||||
|
def _match_visible_harness_candidate(
|
||||||
|
*,
|
||||||
|
proposal_fingerprint: str | None,
|
||||||
|
harness_context: dict[str, object] | None,
|
||||||
|
) -> dict[str, Any] | None:
|
||||||
|
if proposal_fingerprint is None:
|
||||||
|
return None
|
||||||
|
for candidate in _visible_harness_candidates(harness_context):
|
||||||
|
if candidate.get("effective_config_fingerprint") == proposal_fingerprint:
|
||||||
|
return candidate
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _proposal_source_label(
|
||||||
|
*,
|
||||||
|
proposal_name: str,
|
||||||
|
proposal_source: Path | None,
|
||||||
|
) -> str:
|
||||||
|
if proposal_name.startswith("baseline-"):
|
||||||
|
return "baseline"
|
||||||
|
if proposal_name.startswith("harness-stop-") or proposal_name.startswith("harness-proposal-"):
|
||||||
|
return "harness"
|
||||||
|
return str(proposal_source) if proposal_source else "llm"
|
||||||
|
|
||||||
|
|
||||||
|
def _classify_proposal_attribution(
|
||||||
|
*,
|
||||||
|
study: StudySpec,
|
||||||
|
state: StudyState,
|
||||||
|
proposal: Proposal,
|
||||||
|
proposal_name: str,
|
||||||
|
proposal_source: Path | None,
|
||||||
|
harness_context: dict[str, object] | None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
source_label = _proposal_source_label(
|
||||||
|
proposal_name=proposal_name,
|
||||||
|
proposal_source=proposal_source,
|
||||||
|
)
|
||||||
|
fingerprint = _proposal_effective_config_fingerprint(
|
||||||
|
study=study,
|
||||||
|
state=state,
|
||||||
|
proposal=proposal,
|
||||||
|
)
|
||||||
|
matched = _match_visible_harness_candidate(
|
||||||
|
proposal_fingerprint=fingerprint,
|
||||||
|
harness_context=harness_context,
|
||||||
|
)
|
||||||
|
matched_candidate_id = matched.get("candidate_id") if matched else None
|
||||||
|
if proposal.should_stop:
|
||||||
|
origin = (
|
||||||
|
"harness_stop"
|
||||||
|
if proposal_name.startswith("harness-stop-")
|
||||||
|
else "proposal_file_stop"
|
||||||
|
if proposal_source is not None
|
||||||
|
else "llm_stop"
|
||||||
|
)
|
||||||
|
elif proposal_name.startswith("baseline-"):
|
||||||
|
origin = "baseline"
|
||||||
|
elif proposal_name.startswith("harness-proposal-"):
|
||||||
|
origin = "harness_top1"
|
||||||
|
elif proposal_source is not None:
|
||||||
|
origin = "proposal_file"
|
||||||
|
elif study.llm.use_harness and harness_context is not None:
|
||||||
|
origin = "llm_selected_harness_candidate" if matched else "llm_out_of_set"
|
||||||
|
else:
|
||||||
|
origin = "llm_no_harness"
|
||||||
|
return {
|
||||||
|
"schema_version": 1,
|
||||||
|
"proposal_name": proposal_name,
|
||||||
|
"proposal_source": source_label,
|
||||||
|
"proposal_origin": origin,
|
||||||
|
"harness_candidate_policy": study.llm.harness_candidate_policy,
|
||||||
|
"candidate_set_hash": _candidate_set_hash(harness_context),
|
||||||
|
"proposal_effective_config_fingerprint": fingerprint,
|
||||||
|
"matched_effective_config_signature": matched is not None,
|
||||||
|
"matched_candidate_id": matched_candidate_id,
|
||||||
|
"matched_candidate": _compact_candidate_for_attribution(matched),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _compact_candidate_for_attribution(candidate: dict[str, Any] | None) -> dict[str, Any] | None:
|
||||||
|
if not candidate:
|
||||||
|
return None
|
||||||
|
return {
|
||||||
|
"candidate_id": candidate.get("candidate_id"),
|
||||||
|
"action_id": candidate.get("action_id"),
|
||||||
|
"knob_family": candidate.get("knob_family"),
|
||||||
|
"score": candidate.get("score"),
|
||||||
|
"config_patch": candidate.get("config_patch"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_harness_candidate_policy(attribution: dict[str, Any]) -> None:
|
||||||
|
if (
|
||||||
|
attribution.get("harness_candidate_policy") == "strict"
|
||||||
|
and attribution.get("proposal_origin") == "llm_out_of_set"
|
||||||
|
):
|
||||||
|
raise SpecError(
|
||||||
|
"LLM proposal is outside the visible harness eligible candidate set while "
|
||||||
|
"llm.harness_candidate_policy=strict. Use an eligible harness candidate, "
|
||||||
|
"switch to advisory mode, or disable harness context."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _config_patch_knob_keys(config_patch: object) -> set[str]:
|
||||||
|
if not isinstance(config_patch, dict):
|
||||||
|
return set()
|
||||||
|
keys: set[str] = set()
|
||||||
|
env_patch = config_patch.get("env_patch")
|
||||||
|
if isinstance(env_patch, dict):
|
||||||
|
keys.update(f"env:{key}" for key in env_patch)
|
||||||
|
flag_patch = config_patch.get("flag_patch")
|
||||||
|
if isinstance(flag_patch, dict):
|
||||||
|
keys.update(f"flag:{key}" for key in flag_patch)
|
||||||
|
return keys
|
||||||
|
|
||||||
|
|
||||||
|
def _proposal_knob_keys(proposal: Proposal) -> set[str]:
|
||||||
|
return {
|
||||||
|
*{f"env:{key}" for key in proposal.config_patch.env_patch},
|
||||||
|
*{f"flag:{key}" for key in proposal.config_patch.flag_patch},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _nearest_visible_harness_candidates(
|
||||||
|
*,
|
||||||
|
proposal: Proposal,
|
||||||
|
harness_context: dict[str, object] | None,
|
||||||
|
limit: int = 3,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
proposal_keys = _proposal_knob_keys(proposal)
|
||||||
|
scored: list[tuple[int, float, dict[str, Any]]] = []
|
||||||
|
for candidate in _visible_harness_candidates(harness_context):
|
||||||
|
candidate_keys = _config_patch_knob_keys(candidate.get("config_patch"))
|
||||||
|
overlap = len(proposal_keys & candidate_keys)
|
||||||
|
score = candidate.get("score")
|
||||||
|
candidate_score = float(score) if isinstance(score, (int, float)) else 0.0
|
||||||
|
scored.append((overlap, candidate_score, candidate))
|
||||||
|
scored.sort(key=lambda item: (item[0], item[1]), reverse=True)
|
||||||
|
return [_compact_candidate_for_attribution(item[2]) or {} for item in scored[:limit]]
|
||||||
|
|
||||||
|
|
||||||
|
def _candidate_family_gap_payload(
|
||||||
|
*,
|
||||||
|
study: StudySpec,
|
||||||
|
trial_id: str,
|
||||||
|
proposal_name: str,
|
||||||
|
proposal: Proposal,
|
||||||
|
attribution: dict[str, Any],
|
||||||
|
harness_context: dict[str, object] | None,
|
||||||
|
incumbent_rate_per_gpu: float,
|
||||||
|
result_rate_per_gpu: float,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
nearest = _nearest_visible_harness_candidates(
|
||||||
|
proposal=proposal,
|
||||||
|
harness_context=harness_context,
|
||||||
|
)
|
||||||
|
proposal_keys = _proposal_knob_keys(proposal)
|
||||||
|
has_same_knob_candidate = any(
|
||||||
|
proposal_keys
|
||||||
|
& _config_patch_knob_keys(candidate.get("config_patch") if isinstance(candidate, dict) else None)
|
||||||
|
for candidate in nearest
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"schema_version": 1,
|
||||||
|
"study_id": study.study_id,
|
||||||
|
"trial_id": trial_id,
|
||||||
|
"proposal_name": proposal_name,
|
||||||
|
"proposal_origin": attribution.get("proposal_origin"),
|
||||||
|
"gap_type": "same_operator_new_step" if has_same_knob_candidate else "missing_operator",
|
||||||
|
"review_status": "pending",
|
||||||
|
"incumbent_request_rate_per_gpu": incumbent_rate_per_gpu,
|
||||||
|
"result_request_rate_per_gpu": result_rate_per_gpu,
|
||||||
|
"absolute_gain": result_rate_per_gpu - incumbent_rate_per_gpu,
|
||||||
|
"relative_gain": (
|
||||||
|
(result_rate_per_gpu - incumbent_rate_per_gpu) / incumbent_rate_per_gpu
|
||||||
|
if incumbent_rate_per_gpu > 0
|
||||||
|
else None
|
||||||
|
),
|
||||||
|
"proposal_patch": {
|
||||||
|
"env_patch": dict(proposal.config_patch.env_patch),
|
||||||
|
"flag_patch": dict(proposal.config_patch.flag_patch),
|
||||||
|
},
|
||||||
|
"changed_knobs": sorted(proposal_keys),
|
||||||
|
"candidate_set_hash": attribution.get("candidate_set_hash"),
|
||||||
|
"nearest_harness_candidates": nearest,
|
||||||
|
"interpretation": (
|
||||||
|
"LLM changed a knob already present in the visible harness candidate set; "
|
||||||
|
"treat this as a step-size/acquisition gap until offline review accepts "
|
||||||
|
"a descriptor or operator change."
|
||||||
|
if has_same_knob_candidate
|
||||||
|
else "LLM changed knobs not represented by the visible harness candidates; "
|
||||||
|
"offline review should decide whether this is a missing operator, "
|
||||||
|
"descriptor, or mechanism."
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _result_request_rate_per_gpu(result: dict[str, object], parallel_size: int | None) -> float | None:
|
||||||
|
best_request_rate = result.get("best_request_rate")
|
||||||
|
if not isinstance(best_request_rate, (int, float)):
|
||||||
|
return None
|
||||||
|
if not isinstance(parallel_size, int) or parallel_size <= 0:
|
||||||
|
return None
|
||||||
|
return float(best_request_rate) / float(parallel_size)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_parallel_int(value: object, *, default: int = 1) -> int:
|
||||||
|
if value is None:
|
||||||
|
return default
|
||||||
|
if isinstance(value, bool):
|
||||||
|
raise SpecError("Boolean values are not valid topology settings.")
|
||||||
|
if isinstance(value, int):
|
||||||
|
return value
|
||||||
|
if isinstance(value, float) and value.is_integer():
|
||||||
|
return int(value)
|
||||||
|
if isinstance(value, str) and value.strip():
|
||||||
|
return int(value.strip())
|
||||||
|
raise SpecError(f"Unable to parse topology setting from {value!r}.")
|
||||||
|
|
||||||
|
|
||||||
|
def _parallel_size_for_config_patch(study: StudySpec, config_patch: object) -> int | None:
|
||||||
|
if not isinstance(config_patch, ConfigPatch):
|
||||||
|
return None
|
||||||
|
flags: dict[str, object] = dict(study.engine.base_flags)
|
||||||
|
flags.update(config_patch.flag_patch)
|
||||||
|
tp = _parse_parallel_int(flags.get("tensor-parallel-size"), default=1)
|
||||||
|
dp = _parse_parallel_int(flags.get("data-parallel-size"), default=1)
|
||||||
|
return tp * dp
|
||||||
|
|
||||||
|
|
||||||
|
def _is_candidate_family_gap(
|
||||||
|
*,
|
||||||
|
attribution: dict[str, Any],
|
||||||
|
incumbent_rate_per_gpu: float | None,
|
||||||
|
result_rate_per_gpu: float | None,
|
||||||
|
) -> bool:
|
||||||
|
if attribution.get("proposal_origin") != "llm_out_of_set":
|
||||||
|
return False
|
||||||
|
if not isinstance(incumbent_rate_per_gpu, (int, float)):
|
||||||
|
return False
|
||||||
|
if not isinstance(result_rate_per_gpu, (int, float)):
|
||||||
|
return False
|
||||||
|
min_gain = max(1e-6, float(incumbent_rate_per_gpu) * 0.01)
|
||||||
|
return float(result_rate_per_gpu) > float(incumbent_rate_per_gpu) + min_gain
|
||||||
|
|
||||||
|
|
||||||
def _harness_snapshot_payload(
|
def _harness_snapshot_payload(
|
||||||
*,
|
*,
|
||||||
study: StudySpec,
|
study: StudySpec,
|
||||||
@@ -418,8 +717,23 @@ def cmd_study_tune(args: argparse.Namespace) -> int:
|
|||||||
proposal=proposal,
|
proposal=proposal,
|
||||||
proposal_name=proposal_name,
|
proposal_name=proposal_name,
|
||||||
)
|
)
|
||||||
|
proposal_attribution = _classify_proposal_attribution(
|
||||||
|
study=study,
|
||||||
|
state=state,
|
||||||
|
proposal=proposal,
|
||||||
|
proposal_name=proposal_name,
|
||||||
|
proposal_source=proposal_source,
|
||||||
|
harness_context=harness_context,
|
||||||
|
)
|
||||||
|
_validate_harness_candidate_policy(proposal_attribution)
|
||||||
store.write_proposal(study.study_id, proposal_name, proposal)
|
store.write_proposal(study.study_id, proposal_name, proposal)
|
||||||
if proposal.should_stop:
|
if proposal.should_stop:
|
||||||
|
proposal_attribution["stopped"] = True
|
||||||
|
store.write_proposal_attribution(
|
||||||
|
study.study_id,
|
||||||
|
proposal_name,
|
||||||
|
proposal_attribution,
|
||||||
|
)
|
||||||
is_harness_stop = proposal_name.startswith("harness-stop-")
|
is_harness_stop = proposal_name.startswith("harness-stop-")
|
||||||
is_llm_stop = not is_harness_stop and proposal_source is None
|
is_llm_stop = not is_harness_stop and proposal_source is None
|
||||||
stop_authority = (
|
stop_authority = (
|
||||||
@@ -498,21 +812,55 @@ def cmd_study_tune(args: argparse.Namespace) -> int:
|
|||||||
and not state.trials
|
and not state.trials
|
||||||
and _is_empty_config_patch(proposal)
|
and _is_empty_config_patch(proposal)
|
||||||
)
|
)
|
||||||
|
pre_trial_best_rate_per_gpu = state.best_request_rate_per_gpu
|
||||||
trial, _ = store.materialize_trial(study=study, state=state, proposal=proposal)
|
trial, _ = store.materialize_trial(study=study, state=state, proposal=proposal)
|
||||||
|
proposal_attribution["trial_id"] = trial.trial_id
|
||||||
|
store.write_proposal_attribution(
|
||||||
|
study.study_id,
|
||||||
|
proposal_name,
|
||||||
|
proposal_attribution,
|
||||||
|
)
|
||||||
trial_spec_path = Path(trial.artifact_dir) / "trial_spec.json"
|
trial_spec_path = Path(trial.artifact_dir) / "trial_spec.json"
|
||||||
result = run_trial(trial_spec_path)
|
result = run_trial(trial_spec_path)
|
||||||
state = store.ingest_trial_results(study.study_id)
|
state = store.ingest_trial_results(study.study_id)
|
||||||
|
trial_parallel_size = _parallel_size_for_config_patch(study, trial.config_patch)
|
||||||
|
result_rate_per_gpu = _result_request_rate_per_gpu(result, trial_parallel_size)
|
||||||
|
gap_path: Path | None = None
|
||||||
|
if _is_candidate_family_gap(
|
||||||
|
attribution=proposal_attribution,
|
||||||
|
incumbent_rate_per_gpu=pre_trial_best_rate_per_gpu,
|
||||||
|
result_rate_per_gpu=result_rate_per_gpu,
|
||||||
|
):
|
||||||
|
gap_payload = _candidate_family_gap_payload(
|
||||||
|
study=study,
|
||||||
|
trial_id=trial.trial_id,
|
||||||
|
proposal_name=proposal_name,
|
||||||
|
proposal=proposal,
|
||||||
|
attribution=proposal_attribution,
|
||||||
|
harness_context=harness_context,
|
||||||
|
incumbent_rate_per_gpu=float(pre_trial_best_rate_per_gpu),
|
||||||
|
result_rate_per_gpu=float(result_rate_per_gpu),
|
||||||
|
)
|
||||||
|
gap_path = store.write_candidate_family_gap(
|
||||||
|
study.study_id,
|
||||||
|
trial.trial_id,
|
||||||
|
gap_payload,
|
||||||
|
)
|
||||||
executed.append(
|
executed.append(
|
||||||
{
|
{
|
||||||
"trial_id": trial.trial_id,
|
"trial_id": trial.trial_id,
|
||||||
"proposal_name": proposal_name,
|
"proposal_name": proposal_name,
|
||||||
"proposal_source": (
|
"proposal_source": (
|
||||||
"harness"
|
"harness"
|
||||||
if proposal_name.startswith("harness-proposal-")
|
if proposal_name.startswith("harness-proposal-")
|
||||||
else str(proposal_source) if proposal_source else "llm"
|
else str(proposal_source) if proposal_source else "llm"
|
||||||
),
|
),
|
||||||
|
"proposal_origin": proposal_attribution.get("proposal_origin"),
|
||||||
|
"matched_candidate_id": proposal_attribution.get("matched_candidate_id"),
|
||||||
|
"candidate_family_gap_path": str(gap_path) if gap_path else None,
|
||||||
"best_sampling_u": result.get("best_sampling_u"),
|
"best_sampling_u": result.get("best_sampling_u"),
|
||||||
"best_request_rate": result.get("best_request_rate"),
|
"best_request_rate": result.get("best_request_rate"),
|
||||||
|
"best_request_rate_per_gpu": result_rate_per_gpu,
|
||||||
"best_pass_rate": result.get("best_pass_rate"),
|
"best_pass_rate": result.get("best_pass_rate"),
|
||||||
"state_best_trial_id": state.best_trial_id,
|
"state_best_trial_id": state.best_trial_id,
|
||||||
"state_best_request_rate": state.best_request_rate,
|
"state_best_request_rate": state.best_request_rate,
|
||||||
|
|||||||
2
src/aituner/engine_adapters/__init__.py
Normal file
2
src/aituner/engine_adapters/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
82
src/aituner/engine_adapters/vllm.py
Normal file
82
src/aituner/engine_adapters/vllm.py
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Iterable
|
||||||
|
|
||||||
|
from ..knob_descriptor import KnobConstraints, KnobDescriptor
|
||||||
|
|
||||||
|
|
||||||
|
def default_vllm_descriptors(*, tunable_flags: Iterable[str]) -> tuple[KnobDescriptor, ...]:
|
||||||
|
tunable = set(tunable_flags)
|
||||||
|
descriptors: list[KnobDescriptor] = []
|
||||||
|
if "max-num-seqs" in tunable:
|
||||||
|
descriptors.append(
|
||||||
|
KnobDescriptor(
|
||||||
|
name="max-num-seqs",
|
||||||
|
location="flag",
|
||||||
|
value_type="int",
|
||||||
|
mechanisms=("admission_capacity", "kv_memory_pressure"),
|
||||||
|
search_geometry="positive_capacity",
|
||||||
|
operators=("coordinate_line_search", "frontier_delta_projection"),
|
||||||
|
constraints=KnobConstraints(min_value=1, integer=True, multiple_of=8),
|
||||||
|
directional_effects={
|
||||||
|
"increase": ("admission_capacity",),
|
||||||
|
"decrease": ("kv_memory_pressure",),
|
||||||
|
},
|
||||||
|
risk_effects={
|
||||||
|
"increase": ("kv_memory_pressure", "decode_tail_latency"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if "max-num-batched-tokens" in tunable:
|
||||||
|
descriptors.append(
|
||||||
|
KnobDescriptor(
|
||||||
|
name="max-num-batched-tokens",
|
||||||
|
location="flag",
|
||||||
|
value_type="int",
|
||||||
|
mechanisms=("prefill_scheduling", "decode_batching"),
|
||||||
|
search_geometry="positive_capacity",
|
||||||
|
operators=("coordinate_line_search", "frontier_delta_projection"),
|
||||||
|
constraints=KnobConstraints(min_value=1, integer=True, multiple_of=128),
|
||||||
|
directional_effects={
|
||||||
|
"increase": ("prefill_scheduling", "decode_batching"),
|
||||||
|
"decrease": ("prefill_tail_latency",),
|
||||||
|
},
|
||||||
|
risk_effects={
|
||||||
|
"increase": ("prefill_tail_latency", "kv_memory_pressure"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if "gpu-memory-utilization" in tunable:
|
||||||
|
descriptors.append(
|
||||||
|
KnobDescriptor(
|
||||||
|
name="gpu-memory-utilization",
|
||||||
|
location="flag",
|
||||||
|
value_type="float",
|
||||||
|
mechanisms=("kv_memory_capacity", "launch_feasibility"),
|
||||||
|
search_geometry="bounded_fraction",
|
||||||
|
operators=("coordinate_line_search", "frontier_delta_projection"),
|
||||||
|
constraints=KnobConstraints(min_value=0.0, max_value=1.0),
|
||||||
|
directional_effects={
|
||||||
|
"increase": ("kv_memory_capacity",),
|
||||||
|
"decrease": ("launch_feasibility",),
|
||||||
|
},
|
||||||
|
risk_effects={
|
||||||
|
"increase": ("launch_feasibility",),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if "enable-chunked-prefill" in tunable:
|
||||||
|
descriptors.append(
|
||||||
|
KnobDescriptor(
|
||||||
|
name="enable-chunked-prefill",
|
||||||
|
location="flag",
|
||||||
|
value_type="bool",
|
||||||
|
mechanisms=("prefill_scheduling",),
|
||||||
|
search_geometry="toggle",
|
||||||
|
operators=("coordinate_line_search",),
|
||||||
|
directional_effects={
|
||||||
|
"toggle": ("prefill_scheduling",),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return tuple(descriptors)
|
||||||
40
src/aituner/knob_descriptor.py
Normal file
40
src/aituner/knob_descriptor.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Literal, Mapping
|
||||||
|
|
||||||
|
|
||||||
|
KnobLocation = Literal["flag", "env"]
|
||||||
|
KnobValueType = Literal["int", "float", "bool", "enum", "str"]
|
||||||
|
SearchGeometry = Literal["linear", "positive_capacity", "bounded_fraction", "toggle"]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class KnobConstraints:
|
||||||
|
min_value: float | None = None
|
||||||
|
max_value: float | None = None
|
||||||
|
integer: bool = False
|
||||||
|
multiple_of: int | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class KnobDescriptor:
|
||||||
|
"""Declarative serving-knob semantics used by generic planners.
|
||||||
|
|
||||||
|
The descriptor intentionally does not enumerate target values for continuous
|
||||||
|
or large integer knobs. It describes how a generic operator may perturb the
|
||||||
|
knob and which mechanism each direction is expected to affect.
|
||||||
|
"""
|
||||||
|
|
||||||
|
name: str
|
||||||
|
location: KnobLocation
|
||||||
|
value_type: KnobValueType
|
||||||
|
mechanisms: tuple[str, ...]
|
||||||
|
search_geometry: SearchGeometry
|
||||||
|
operators: tuple[str, ...]
|
||||||
|
constraints: KnobConstraints = field(default_factory=KnobConstraints)
|
||||||
|
directional_effects: Mapping[str, tuple[str, ...]] = field(default_factory=dict)
|
||||||
|
risk_effects: Mapping[str, tuple[str, ...]] = field(default_factory=dict)
|
||||||
|
|
||||||
|
def current_value(self, config: Mapping[str, Any]) -> Any:
|
||||||
|
return config.get(self.name)
|
||||||
@@ -317,6 +317,11 @@ def build_prompt(
|
|||||||
if parallel_candidates
|
if parallel_candidates
|
||||||
else "If TP/DP/EP are not tunable, focus on the remaining launch-safe runtime knobs."
|
else "If TP/DP/EP are not tunable, focus on the remaining launch-safe runtime knobs."
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
"Harness candidate policy is advisory: prefer a high-scoring harness candidate when it matches your diagnosis, but you may propose an out-of-set config when the harness candidate family appears to miss the right step; such proposals are audited as candidate-family gaps."
|
||||||
|
if study.llm.harness_candidate_policy == "advisory"
|
||||||
|
else "Harness candidate policy is strict: your config_patch must match one of the harness eligible candidates after effective-config materialization."
|
||||||
|
),
|
||||||
"",
|
"",
|
||||||
"Study stack:",
|
"Study stack:",
|
||||||
json.dumps(
|
json.dumps(
|
||||||
|
|||||||
213
src/aituner/mechanism_planner.py
Normal file
213
src/aituner/mechanism_planner.py
Normal file
@@ -0,0 +1,213 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Mapping
|
||||||
|
|
||||||
|
from .knob_descriptor import KnobDescriptor
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class CoordinateSearchPolicy:
|
||||||
|
initial_relative_step: float = 1.0
|
||||||
|
initial_fraction_step: float = 0.05
|
||||||
|
grow_factor: float = 1.5
|
||||||
|
shrink_factor: float = 0.5
|
||||||
|
min_score: float = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class CoordinateOperatorState:
|
||||||
|
knob: str
|
||||||
|
direction: str
|
||||||
|
trust_radius: float | None = None
|
||||||
|
last_good_value: Any | None = None
|
||||||
|
last_bad_value: Any | None = None
|
||||||
|
tested_values: tuple[Any, ...] = ()
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class MechanismCandidate:
|
||||||
|
action_id: str
|
||||||
|
knob: str
|
||||||
|
mechanism: str
|
||||||
|
operator: str
|
||||||
|
direction: str
|
||||||
|
patch: dict[str, Any]
|
||||||
|
score: float
|
||||||
|
score_factors: dict[str, float]
|
||||||
|
evidence_refs: tuple[str, ...] = ()
|
||||||
|
|
||||||
|
|
||||||
|
def coordinate_line_search_candidates(
|
||||||
|
*,
|
||||||
|
current_config: Mapping[str, Any],
|
||||||
|
descriptors: tuple[KnobDescriptor, ...],
|
||||||
|
evidence_weights: Mapping[str, float],
|
||||||
|
states: Mapping[tuple[str, str], CoordinateOperatorState] | None = None,
|
||||||
|
policy: CoordinateSearchPolicy | None = None,
|
||||||
|
) -> tuple[MechanismCandidate, ...]:
|
||||||
|
policy = policy or CoordinateSearchPolicy()
|
||||||
|
states = states or {}
|
||||||
|
candidates: list[MechanismCandidate] = []
|
||||||
|
for descriptor in descriptors:
|
||||||
|
if "coordinate_line_search" not in descriptor.operators:
|
||||||
|
continue
|
||||||
|
direction, mechanism, evidence = _choose_direction(descriptor, evidence_weights)
|
||||||
|
if direction is None or mechanism is None:
|
||||||
|
continue
|
||||||
|
if evidence < policy.min_score:
|
||||||
|
continue
|
||||||
|
state = states.get((descriptor.name, direction))
|
||||||
|
current = descriptor.current_value(current_config)
|
||||||
|
target = _propose_value(
|
||||||
|
descriptor=descriptor,
|
||||||
|
current=current,
|
||||||
|
direction=direction,
|
||||||
|
state=state,
|
||||||
|
policy=policy,
|
||||||
|
)
|
||||||
|
if target is None or target == current:
|
||||||
|
continue
|
||||||
|
risk = _direction_risk(descriptor, direction, evidence_weights)
|
||||||
|
score = max(0.0, evidence - risk)
|
||||||
|
candidates.append(
|
||||||
|
MechanismCandidate(
|
||||||
|
action_id=(
|
||||||
|
f"coordinate_line_search:{descriptor.search_geometry}:"
|
||||||
|
f"{descriptor.name}:{direction}:{_stable_token(current)}->{_stable_token(target)}"
|
||||||
|
),
|
||||||
|
knob=descriptor.name,
|
||||||
|
mechanism=mechanism,
|
||||||
|
operator="coordinate_line_search",
|
||||||
|
direction=direction,
|
||||||
|
patch={descriptor.name: target},
|
||||||
|
score=round(score, 4),
|
||||||
|
score_factors={
|
||||||
|
"mechanism_evidence": round(evidence, 4),
|
||||||
|
"direction_risk": round(risk, 4),
|
||||||
|
},
|
||||||
|
evidence_refs=(mechanism,),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
candidates.sort(key=lambda item: (item.score, item.action_id), reverse=True)
|
||||||
|
return tuple(candidates)
|
||||||
|
|
||||||
|
|
||||||
|
def _choose_direction(
|
||||||
|
descriptor: KnobDescriptor,
|
||||||
|
evidence_weights: Mapping[str, float],
|
||||||
|
) -> tuple[str | None, str | None, float]:
|
||||||
|
best_direction: str | None = None
|
||||||
|
best_mechanism: str | None = None
|
||||||
|
best_weight = 0.0
|
||||||
|
for direction, mechanisms in descriptor.directional_effects.items():
|
||||||
|
for mechanism in mechanisms:
|
||||||
|
weight = float(evidence_weights.get(mechanism, 0.0))
|
||||||
|
if weight > best_weight:
|
||||||
|
best_direction = direction
|
||||||
|
best_mechanism = mechanism
|
||||||
|
best_weight = weight
|
||||||
|
return best_direction, best_mechanism, best_weight
|
||||||
|
|
||||||
|
|
||||||
|
def _direction_risk(
|
||||||
|
descriptor: KnobDescriptor,
|
||||||
|
direction: str,
|
||||||
|
evidence_weights: Mapping[str, float],
|
||||||
|
) -> float:
|
||||||
|
risks = descriptor.risk_effects.get(direction, ())
|
||||||
|
if not risks:
|
||||||
|
return 0.0
|
||||||
|
return min(0.5, 0.2 * max(float(evidence_weights.get(item, 0.0)) for item in risks))
|
||||||
|
|
||||||
|
|
||||||
|
def _propose_value(
|
||||||
|
*,
|
||||||
|
descriptor: KnobDescriptor,
|
||||||
|
current: Any,
|
||||||
|
direction: str,
|
||||||
|
state: CoordinateOperatorState | None,
|
||||||
|
policy: CoordinateSearchPolicy,
|
||||||
|
) -> Any | None:
|
||||||
|
if descriptor.search_geometry == "toggle":
|
||||||
|
if not isinstance(current, bool):
|
||||||
|
current = bool(current)
|
||||||
|
return not current
|
||||||
|
if descriptor.search_geometry == "bounded_fraction":
|
||||||
|
value = _as_float(current)
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
radius = (
|
||||||
|
state.trust_radius
|
||||||
|
if state is not None and state.trust_radius is not None
|
||||||
|
else policy.initial_fraction_step
|
||||||
|
)
|
||||||
|
if direction == "decrease":
|
||||||
|
target = value - radius
|
||||||
|
else:
|
||||||
|
target = value + radius
|
||||||
|
return _canonicalize_value(descriptor, target)
|
||||||
|
if descriptor.search_geometry == "linear":
|
||||||
|
value = _as_float(current)
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
radius = (
|
||||||
|
state.trust_radius
|
||||||
|
if state is not None and state.trust_radius is not None
|
||||||
|
else 1.0
|
||||||
|
)
|
||||||
|
return _canonicalize_value(
|
||||||
|
descriptor,
|
||||||
|
value - radius if direction == "decrease" else value + radius,
|
||||||
|
)
|
||||||
|
if descriptor.search_geometry == "positive_capacity":
|
||||||
|
value = _as_float(current)
|
||||||
|
if value is None or value <= 0:
|
||||||
|
min_value = descriptor.constraints.min_value
|
||||||
|
return _canonicalize_value(descriptor, min_value if min_value is not None else 1)
|
||||||
|
radius = (
|
||||||
|
state.trust_radius
|
||||||
|
if state is not None and state.trust_radius is not None
|
||||||
|
else policy.initial_relative_step
|
||||||
|
)
|
||||||
|
factor = max(1.0 + radius, 1.01)
|
||||||
|
target = value / factor if direction == "decrease" else value * factor
|
||||||
|
return _canonicalize_value(descriptor, target)
|
||||||
|
raise ValueError(f"unsupported search geometry {descriptor.search_geometry!r}")
|
||||||
|
|
||||||
|
|
||||||
|
def _canonicalize_value(descriptor: KnobDescriptor, value: float | int) -> Any:
|
||||||
|
target = float(value)
|
||||||
|
if descriptor.constraints.min_value is not None:
|
||||||
|
target = max(target, float(descriptor.constraints.min_value))
|
||||||
|
if descriptor.constraints.max_value is not None:
|
||||||
|
target = min(target, float(descriptor.constraints.max_value))
|
||||||
|
if descriptor.constraints.integer or descriptor.value_type == "int":
|
||||||
|
integer_target = int(math.ceil(target))
|
||||||
|
multiple_of = descriptor.constraints.multiple_of
|
||||||
|
if multiple_of is not None and multiple_of > 1:
|
||||||
|
integer_target = int(math.ceil(integer_target / multiple_of) * multiple_of)
|
||||||
|
if descriptor.constraints.min_value is not None:
|
||||||
|
integer_target = max(integer_target, int(descriptor.constraints.min_value))
|
||||||
|
if descriptor.constraints.max_value is not None:
|
||||||
|
integer_target = min(integer_target, int(descriptor.constraints.max_value))
|
||||||
|
return integer_target
|
||||||
|
return round(target, 6)
|
||||||
|
|
||||||
|
|
||||||
|
def _as_float(value: Any) -> float | None:
|
||||||
|
if isinstance(value, bool):
|
||||||
|
return None
|
||||||
|
if isinstance(value, (int, float)):
|
||||||
|
return float(value)
|
||||||
|
if isinstance(value, str) and value.strip():
|
||||||
|
try:
|
||||||
|
return float(value.strip())
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _stable_token(value: Any) -> str:
|
||||||
|
return repr(value)
|
||||||
@@ -732,6 +732,7 @@ class LLMPolicySpec:
|
|||||||
system_prompt: str
|
system_prompt: str
|
||||||
max_history_trials: int
|
max_history_trials: int
|
||||||
use_harness: bool = True
|
use_harness: bool = True
|
||||||
|
harness_candidate_policy: str = "advisory"
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_dict(cls, data: Mapping[str, Any] | None) -> "LLMPolicySpec":
|
def from_dict(cls, data: Mapping[str, Any] | None) -> "LLMPolicySpec":
|
||||||
@@ -743,6 +744,13 @@ class LLMPolicySpec:
|
|||||||
if payload.get("endpoint")
|
if payload.get("endpoint")
|
||||||
else None
|
else None
|
||||||
)
|
)
|
||||||
|
harness_candidate_policy = str(
|
||||||
|
payload.get("harness_candidate_policy") or "advisory"
|
||||||
|
).strip()
|
||||||
|
if harness_candidate_policy not in {"advisory", "strict"}:
|
||||||
|
raise SpecError(
|
||||||
|
"llm.harness_candidate_policy must be one of: advisory, strict."
|
||||||
|
)
|
||||||
return cls(
|
return cls(
|
||||||
endpoint=endpoint,
|
endpoint=endpoint,
|
||||||
system_prompt=str(payload.get("system_prompt") or "").strip(),
|
system_prompt=str(payload.get("system_prompt") or "").strip(),
|
||||||
@@ -754,6 +762,7 @@ class LLMPolicySpec:
|
|||||||
if payload.get("use_harness") is not None
|
if payload.get("use_harness") is not None
|
||||||
else True
|
else True
|
||||||
),
|
),
|
||||||
|
harness_candidate_policy=harness_candidate_policy,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -27,7 +27,15 @@ class StudyStore:
|
|||||||
|
|
||||||
def init_study(self, *, spec_path: Path, study: StudySpec) -> Path:
|
def init_study(self, *, spec_path: Path, study: StudySpec) -> Path:
|
||||||
root = self.study_root(study.study_id)
|
root = self.study_root(study.study_id)
|
||||||
for rel in ("prompts", "proposals", "trials", "results", "harness"):
|
for rel in (
|
||||||
|
"prompts",
|
||||||
|
"proposals",
|
||||||
|
"proposal_attributions",
|
||||||
|
"trials",
|
||||||
|
"results",
|
||||||
|
"harness",
|
||||||
|
"candidate_family_gaps",
|
||||||
|
):
|
||||||
(root / rel).mkdir(parents=True, exist_ok=True)
|
(root / rel).mkdir(parents=True, exist_ok=True)
|
||||||
(root / "study_spec.source").write_text(str(spec_path.resolve()) + "\n", encoding="utf-8")
|
(root / "study_spec.source").write_text(str(spec_path.resolve()) + "\n", encoding="utf-8")
|
||||||
self.write_json(root / "study_spec.snapshot.json", to_jsonable(study))
|
self.write_json(root / "study_spec.snapshot.json", to_jsonable(study))
|
||||||
@@ -80,6 +88,26 @@ class StudyStore:
|
|||||||
self.write_json(path, payload)
|
self.write_json(path, payload)
|
||||||
return path
|
return path
|
||||||
|
|
||||||
|
def write_proposal_attribution(
|
||||||
|
self,
|
||||||
|
study_id: str,
|
||||||
|
proposal_name: str,
|
||||||
|
payload: dict[str, Any],
|
||||||
|
) -> Path:
|
||||||
|
path = self.study_root(study_id) / "proposal_attributions" / f"{proposal_name}.json"
|
||||||
|
self.write_json(path, payload)
|
||||||
|
return path
|
||||||
|
|
||||||
|
def write_candidate_family_gap(
|
||||||
|
self,
|
||||||
|
study_id: str,
|
||||||
|
trial_id: str,
|
||||||
|
payload: dict[str, Any],
|
||||||
|
) -> Path:
|
||||||
|
path = self.study_root(study_id) / "candidate_family_gaps" / f"{trial_id}.json"
|
||||||
|
self.write_json(path, payload)
|
||||||
|
return path
|
||||||
|
|
||||||
def materialize_trial(
|
def materialize_trial(
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
|
|||||||
@@ -7410,6 +7410,231 @@ class CoreFlowTests(unittest.TestCase):
|
|||||||
(store.study_root(study.study_id) / "harness" / "candidate-set-0002.json").exists()
|
(store.study_root(study.study_id) / "harness" / "candidate-set-0002.json").exists()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_cli_tune_records_advisory_llm_out_of_set_candidate_family_gap(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
tmp_path = Path(tmp)
|
||||||
|
study_path = _write_study_assets(tmp_path)
|
||||||
|
payload = json.loads(study_path.read_text(encoding="utf-8"))
|
||||||
|
payload["llm"]["endpoint"] = {
|
||||||
|
"provider": "custom",
|
||||||
|
"base_url": "http://llm.example/v1",
|
||||||
|
"wire_api": "chat.completions",
|
||||||
|
"model": "test-model",
|
||||||
|
"api_key_env": "OPENAI_API_KEY",
|
||||||
|
}
|
||||||
|
study_path.write_text(json.dumps(payload), encoding="utf-8")
|
||||||
|
study = load_study_spec(study_path)
|
||||||
|
store_root = tmp_path / "store"
|
||||||
|
store = StudyStore(store_root)
|
||||||
|
store.init_study(spec_path=study_path, study=study)
|
||||||
|
store.save_state(
|
||||||
|
StudyState(
|
||||||
|
study_id=study.study_id,
|
||||||
|
best_trial_id="trial-0001",
|
||||||
|
best_parallel_size=1,
|
||||||
|
best_sampling_u=0.25,
|
||||||
|
best_request_rate=1.0,
|
||||||
|
best_request_rate_per_gpu=1.0,
|
||||||
|
next_trial_index=2,
|
||||||
|
trials=[
|
||||||
|
TrialSummary(
|
||||||
|
trial_id="trial-0001",
|
||||||
|
status="completed",
|
||||||
|
parallel_size=1,
|
||||||
|
best_request_rate=1.0,
|
||||||
|
best_request_rate_per_gpu=1.0,
|
||||||
|
config_patch={
|
||||||
|
"env_patch": {},
|
||||||
|
"flag_patch": {"max-num-seqs": 8},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
harness_context = {
|
||||||
|
"experiment_plan": {
|
||||||
|
"planner_version": "test",
|
||||||
|
"candidate_set": {
|
||||||
|
"candidate_set_hash": "candidate-set-test",
|
||||||
|
"eligible_candidates": [
|
||||||
|
{
|
||||||
|
"candidate_id": "cand-mns16",
|
||||||
|
"action_id": "coordinate_step:max-num-seqs:8->16",
|
||||||
|
"knob_family": "max-num-seqs",
|
||||||
|
"score": 0.8,
|
||||||
|
"effective_config_fingerprint": "not-the-llm-proposal",
|
||||||
|
"config_patch": {
|
||||||
|
"env_patch": {},
|
||||||
|
"flag_patch": {"max-num-seqs": 16},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"blocked_candidates": [],
|
||||||
|
},
|
||||||
|
"next_action": None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
llm_payload = json.dumps(
|
||||||
|
{
|
||||||
|
"observation": "Harness is in the right admission direction but too conservative.",
|
||||||
|
"diagnosis": "Try a larger same-operator admission step.",
|
||||||
|
"config_patch": {"env_patch": {}, "flag_patch": {"max-num-seqs": 24}},
|
||||||
|
"expected_effects": ["test whether admission capacity was underexplored"],
|
||||||
|
"why_not_previous_failures": "new value and no launch failure evidence",
|
||||||
|
"should_stop": False,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
def fake_run_trial(trial_spec_path: Path) -> dict[str, object]:
|
||||||
|
trial_payload = json.loads(trial_spec_path.read_text(encoding="utf-8"))
|
||||||
|
trial_root = Path(trial_payload["artifact_dir"])
|
||||||
|
result = {
|
||||||
|
"study_id": trial_payload["study_id"],
|
||||||
|
"trial_id": trial_payload["trial_id"],
|
||||||
|
"status": "completed",
|
||||||
|
"best_sampling_u": 0.5,
|
||||||
|
"best_request_rate": 2.0,
|
||||||
|
"best_pass_rate": 1.0,
|
||||||
|
"best_request_count": 2,
|
||||||
|
"probes": [],
|
||||||
|
}
|
||||||
|
(trial_root / "result.json").write_text(json.dumps(result), encoding="utf-8")
|
||||||
|
return result
|
||||||
|
|
||||||
|
buffer = io.StringIO()
|
||||||
|
with mock.patch("aituner.cli.build_harness_context", return_value=harness_context):
|
||||||
|
with mock.patch("aituner.llm.build_harness_context", return_value=harness_context):
|
||||||
|
with mock.patch("aituner.cli.call_llm_for_proposal", return_value=llm_payload):
|
||||||
|
with mock.patch("aituner.cli.run_trial", side_effect=fake_run_trial):
|
||||||
|
with contextlib.redirect_stdout(buffer):
|
||||||
|
exit_code = cli_main(
|
||||||
|
[
|
||||||
|
"study",
|
||||||
|
"tune",
|
||||||
|
"--spec",
|
||||||
|
str(study_path),
|
||||||
|
"--store-root",
|
||||||
|
str(store_root),
|
||||||
|
"--skip-baseline",
|
||||||
|
"--max-trials",
|
||||||
|
"2",
|
||||||
|
"--proposal-policy",
|
||||||
|
"llm-first",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(exit_code, 0)
|
||||||
|
summary = json.loads(buffer.getvalue())
|
||||||
|
executed = summary["executed_trials"]
|
||||||
|
self.assertEqual(executed[0]["proposal_origin"], "llm_out_of_set")
|
||||||
|
self.assertTrue(executed[0]["candidate_family_gap_path"])
|
||||||
|
attribution_path = (
|
||||||
|
store.study_root(study.study_id)
|
||||||
|
/ "proposal_attributions"
|
||||||
|
/ "proposal-0002.json"
|
||||||
|
)
|
||||||
|
attribution = json.loads(attribution_path.read_text(encoding="utf-8"))
|
||||||
|
self.assertEqual(attribution["proposal_origin"], "llm_out_of_set")
|
||||||
|
self.assertEqual(attribution["harness_candidate_policy"], "advisory")
|
||||||
|
gap_path = Path(executed[0]["candidate_family_gap_path"])
|
||||||
|
gap = json.loads(gap_path.read_text(encoding="utf-8"))
|
||||||
|
self.assertEqual(gap["gap_type"], "same_operator_new_step")
|
||||||
|
self.assertEqual(gap["review_status"], "pending")
|
||||||
|
self.assertEqual(gap["changed_knobs"], ["flag:max-num-seqs"])
|
||||||
|
self.assertEqual(gap["proposal_patch"]["flag_patch"]["max-num-seqs"], 24)
|
||||||
|
self.assertEqual(gap["nearest_harness_candidates"][0]["candidate_id"], "cand-mns16")
|
||||||
|
|
||||||
|
def test_cli_tune_strict_harness_policy_rejects_llm_out_of_set_proposal(self) -> None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
tmp_path = Path(tmp)
|
||||||
|
study_path = _write_study_assets(tmp_path)
|
||||||
|
payload = json.loads(study_path.read_text(encoding="utf-8"))
|
||||||
|
payload["llm"]["harness_candidate_policy"] = "strict"
|
||||||
|
payload["llm"]["endpoint"] = {
|
||||||
|
"provider": "custom",
|
||||||
|
"base_url": "http://llm.example/v1",
|
||||||
|
"wire_api": "chat.completions",
|
||||||
|
"model": "test-model",
|
||||||
|
"api_key_env": "OPENAI_API_KEY",
|
||||||
|
}
|
||||||
|
study_path.write_text(json.dumps(payload), encoding="utf-8")
|
||||||
|
study = load_study_spec(study_path)
|
||||||
|
store_root = tmp_path / "store"
|
||||||
|
store = StudyStore(store_root)
|
||||||
|
store.init_study(spec_path=study_path, study=study)
|
||||||
|
store.save_state(
|
||||||
|
StudyState(
|
||||||
|
study_id=study.study_id,
|
||||||
|
best_trial_id="trial-0001",
|
||||||
|
best_parallel_size=1,
|
||||||
|
best_request_rate=1.0,
|
||||||
|
best_request_rate_per_gpu=1.0,
|
||||||
|
next_trial_index=2,
|
||||||
|
trials=[
|
||||||
|
TrialSummary(
|
||||||
|
trial_id="trial-0001",
|
||||||
|
status="completed",
|
||||||
|
parallel_size=1,
|
||||||
|
best_request_rate=1.0,
|
||||||
|
best_request_rate_per_gpu=1.0,
|
||||||
|
config_patch={"env_patch": {}, "flag_patch": {"max-num-seqs": 8}},
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
harness_context = {
|
||||||
|
"experiment_plan": {
|
||||||
|
"candidate_set": {
|
||||||
|
"candidate_set_hash": "candidate-set-test",
|
||||||
|
"eligible_candidates": [
|
||||||
|
{
|
||||||
|
"candidate_id": "cand-mns16",
|
||||||
|
"effective_config_fingerprint": "not-the-llm-proposal",
|
||||||
|
"config_patch": {
|
||||||
|
"env_patch": {},
|
||||||
|
"flag_patch": {"max-num-seqs": 16},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
llm_payload = json.dumps(
|
||||||
|
{
|
||||||
|
"observation": "Try an out-of-set candidate.",
|
||||||
|
"diagnosis": "strict mode should reject this.",
|
||||||
|
"config_patch": {"env_patch": {}, "flag_patch": {"max-num-seqs": 24}},
|
||||||
|
"expected_effects": ["should not run"],
|
||||||
|
"why_not_previous_failures": "",
|
||||||
|
"should_stop": False,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
stderr = io.StringIO()
|
||||||
|
with mock.patch("aituner.cli.build_harness_context", return_value=harness_context):
|
||||||
|
with mock.patch("aituner.llm.build_harness_context", return_value=harness_context):
|
||||||
|
with mock.patch("aituner.cli.call_llm_for_proposal", return_value=llm_payload):
|
||||||
|
with mock.patch("aituner.cli.run_trial") as run_trial_mock:
|
||||||
|
with contextlib.redirect_stderr(stderr):
|
||||||
|
exit_code = cli_main(
|
||||||
|
[
|
||||||
|
"study",
|
||||||
|
"tune",
|
||||||
|
"--spec",
|
||||||
|
str(study_path),
|
||||||
|
"--store-root",
|
||||||
|
str(store_root),
|
||||||
|
"--skip-baseline",
|
||||||
|
"--max-trials",
|
||||||
|
"2",
|
||||||
|
"--proposal-policy",
|
||||||
|
"llm-first",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(exit_code, 2)
|
||||||
|
run_trial_mock.assert_not_called()
|
||||||
|
self.assertIn("llm.harness_candidate_policy=strict", stderr.getvalue())
|
||||||
|
|
||||||
def test_cli_tune_evaluates_baseline_before_llm_proposal(self) -> None:
|
def test_cli_tune_evaluates_baseline_before_llm_proposal(self) -> None:
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
tmp_path = Path(tmp)
|
tmp_path = Path(tmp)
|
||||||
|
|||||||
68
tests/test_mechanism_planner.py
Normal file
68
tests/test_mechanism_planner.py
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from aituner.engine_adapters.vllm import default_vllm_descriptors
|
||||||
|
from aituner.knob_descriptor import KnobConstraints, KnobDescriptor
|
||||||
|
from aituner.mechanism_planner import coordinate_line_search_candidates
|
||||||
|
|
||||||
|
|
||||||
|
class MechanismPlannerTests(unittest.TestCase):
|
||||||
|
def test_coordinate_search_uses_mechanism_not_knob_name(self) -> None:
|
||||||
|
vllm_descriptor = default_vllm_descriptors(tunable_flags=("max-num-seqs",))[0]
|
||||||
|
sglang_descriptor = KnobDescriptor(
|
||||||
|
name="max-running-requests",
|
||||||
|
location="flag",
|
||||||
|
value_type="int",
|
||||||
|
mechanisms=("admission_capacity", "kv_memory_pressure"),
|
||||||
|
search_geometry="positive_capacity",
|
||||||
|
operators=("coordinate_line_search",),
|
||||||
|
constraints=KnobConstraints(min_value=1, integer=True, multiple_of=8),
|
||||||
|
directional_effects={
|
||||||
|
"increase": ("admission_capacity",),
|
||||||
|
"decrease": ("kv_memory_pressure",),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
vllm_candidates = coordinate_line_search_candidates(
|
||||||
|
current_config={"max-num-seqs": 8},
|
||||||
|
descriptors=(vllm_descriptor,),
|
||||||
|
evidence_weights={"admission_capacity": 0.9},
|
||||||
|
)
|
||||||
|
sglang_candidates = coordinate_line_search_candidates(
|
||||||
|
current_config={"max-running-requests": 8},
|
||||||
|
descriptors=(sglang_descriptor,),
|
||||||
|
evidence_weights={"admission_capacity": 0.9},
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(vllm_candidates[0].patch, {"max-num-seqs": 16})
|
||||||
|
self.assertEqual(sglang_candidates[0].patch, {"max-running-requests": 16})
|
||||||
|
self.assertEqual(vllm_candidates[0].mechanism, "admission_capacity")
|
||||||
|
self.assertEqual(sglang_candidates[0].mechanism, "admission_capacity")
|
||||||
|
|
||||||
|
def test_positive_capacity_can_decrease_for_memory_pressure(self) -> None:
|
||||||
|
descriptor = default_vllm_descriptors(tunable_flags=("max-num-seqs",))[0]
|
||||||
|
|
||||||
|
candidates = coordinate_line_search_candidates(
|
||||||
|
current_config={"max-num-seqs": 64},
|
||||||
|
descriptors=(descriptor,),
|
||||||
|
evidence_weights={"kv_memory_pressure": 0.8},
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(candidates[0].direction, "decrease")
|
||||||
|
self.assertEqual(candidates[0].patch, {"max-num-seqs": 32})
|
||||||
|
|
||||||
|
def test_bounded_fraction_respects_constraints(self) -> None:
|
||||||
|
descriptor = default_vllm_descriptors(tunable_flags=("gpu-memory-utilization",))[0]
|
||||||
|
|
||||||
|
candidates = coordinate_line_search_candidates(
|
||||||
|
current_config={"gpu-memory-utilization": 0.98},
|
||||||
|
descriptors=(descriptor,),
|
||||||
|
evidence_weights={"kv_memory_capacity": 0.8},
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(candidates[0].patch, {"gpu-memory-utilization": 1.0})
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
Reference in New Issue
Block a user