Minimize no-harness ablation prompt
This commit is contained in:
@@ -148,7 +148,11 @@ def cmd_study_llm_propose(args: argparse.Namespace) -> int:
|
|||||||
state=state,
|
state=state,
|
||||||
capability_profile=capability_profile,
|
capability_profile=capability_profile,
|
||||||
)
|
)
|
||||||
proposal_text = call_llm_for_proposal(policy=study.llm, prompt=prompt)
|
proposal_text = call_llm_for_proposal(
|
||||||
|
policy=study.llm,
|
||||||
|
prompt=prompt,
|
||||||
|
use_harness=study.llm.use_harness,
|
||||||
|
)
|
||||||
proposal = parse_proposal_text(proposal_text, study)
|
proposal = parse_proposal_text(proposal_text, study)
|
||||||
name = args.proposal_name or f"proposal-{state.next_trial_index:04d}"
|
name = args.proposal_name or f"proposal-{state.next_trial_index:04d}"
|
||||||
path = store.write_proposal(study.study_id, name, proposal)
|
path = store.write_proposal(study.study_id, name, proposal)
|
||||||
@@ -300,7 +304,11 @@ def cmd_study_tune(args: argparse.Namespace) -> int:
|
|||||||
"No proposal files provided, study.llm.endpoint is not configured, "
|
"No proposal files provided, study.llm.endpoint is not configured, "
|
||||||
"and the harness stop guard did not fire."
|
"and the harness stop guard did not fire."
|
||||||
)
|
)
|
||||||
proposal_text = call_llm_for_proposal(policy=study.llm, prompt=prompt)
|
proposal_text = call_llm_for_proposal(
|
||||||
|
policy=study.llm,
|
||||||
|
prompt=prompt,
|
||||||
|
use_harness=study.llm.use_harness,
|
||||||
|
)
|
||||||
proposal_name = f"proposal-{state.next_trial_index:04d}"
|
proposal_name = f"proposal-{state.next_trial_index:04d}"
|
||||||
raw_proposal_path = store.study_root(study.study_id) / "proposals" / f"{proposal_name}.raw.txt"
|
raw_proposal_path = store.study_root(study.study_id) / "proposals" / f"{proposal_name}.raw.txt"
|
||||||
raw_proposal_path.write_text(proposal_text, encoding="utf-8")
|
raw_proposal_path.write_text(proposal_text, encoding="utf-8")
|
||||||
|
|||||||
@@ -212,16 +212,102 @@ def build_prompt(
|
|||||||
)
|
)
|
||||||
launch_failures = _launch_failure_history(state)
|
launch_failures = _launch_failure_history(state)
|
||||||
parallel_candidates = _enumerate_parallel_candidates(study)
|
parallel_candidates = _enumerate_parallel_candidates(study)
|
||||||
sections = [
|
common_preamble = [
|
||||||
"You are tuning an OpenAI-compatible serving engine.",
|
"You are tuning an OpenAI-compatible serving engine.",
|
||||||
"Return exactly one JSON object with keys: observation, diagnosis, config_patch, expected_effects, why_not_previous_failures, should_stop.",
|
"Return exactly one JSON object with keys: observation, diagnosis, config_patch, expected_effects, why_not_previous_failures, should_stop.",
|
||||||
"config_patch must contain env_patch and flag_patch.",
|
"config_patch must contain env_patch and flag_patch.",
|
||||||
"expected_effects must be a JSON array of short strings, not an object.",
|
"expected_effects must be a JSON array of short strings, not an object.",
|
||||||
"should_stop must be a boolean. Use true only when the harness convergence guard says another adjacent probe is not justified.",
|
(
|
||||||
|
"should_stop must be a boolean. Use true only when the harness convergence guard says another adjacent probe is not justified."
|
||||||
|
if study.llm.use_harness
|
||||||
|
else "should_stop must be a boolean. Use false unless no valid config can be proposed."
|
||||||
|
),
|
||||||
"Only use allowed tunable env keys and allowed tunable flag keys.",
|
"Only use allowed tunable env keys and allowed tunable flag keys.",
|
||||||
"Do not wrap the JSON in markdown fences or any extra text.",
|
"Do not wrap the JSON in markdown fences or any extra text.",
|
||||||
"Do not repeat a config that previously failed to launch unless the new patch explicitly removes the failing knob.",
|
"Do not repeat a config that previously failed to launch unless the new patch explicitly removes the failing knob.",
|
||||||
"Treat previous engine launch failures as hard negative evidence. If you touch TP/DP/EP, keep the proposal inside the topology constraints exactly.",
|
"Treat previous engine launch failures as hard negative evidence. If you touch TP/DP/EP, keep the proposal inside the topology constraints exactly.",
|
||||||
|
]
|
||||||
|
if not study.llm.use_harness:
|
||||||
|
sections = [
|
||||||
|
*common_preamble,
|
||||||
|
"",
|
||||||
|
"Study context:",
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"study_id": study.study_id,
|
||||||
|
"objective": "maximize feasible request_rate_per_gpu at the SLO target",
|
||||||
|
"current_best": {
|
||||||
|
"trial_id": state.best_trial_id,
|
||||||
|
"best_parallel_size": state.best_parallel_size,
|
||||||
|
"best_sampling_u": state.best_sampling_u,
|
||||||
|
"best_request_rate": state.best_request_rate,
|
||||||
|
"best_request_rate_per_gpu": state.best_request_rate_per_gpu,
|
||||||
|
},
|
||||||
|
"hardware": {
|
||||||
|
"gpu_count": study.hardware.gpu_count,
|
||||||
|
"gpu_model": study.hardware.gpu_model,
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"model_id": study.model.model_id,
|
||||||
|
"served_model_name": study.model.served_model_name,
|
||||||
|
},
|
||||||
|
"trace": {
|
||||||
|
"window_id": study.trace.window_id,
|
||||||
|
"request_mode": study.trace.request_mode,
|
||||||
|
"completion_tokens_override": study.trace.completion_tokens_override,
|
||||||
|
"input_length_filter": (
|
||||||
|
{
|
||||||
|
"min_input_tokens": study.trace.input_length_filter.min_input_tokens,
|
||||||
|
"max_input_tokens": study.trace.input_length_filter.max_input_tokens,
|
||||||
|
}
|
||||||
|
if study.trace.input_length_filter is not None
|
||||||
|
else None
|
||||||
|
),
|
||||||
|
},
|
||||||
|
"engine": {
|
||||||
|
"engine_name": study.engine.engine_name,
|
||||||
|
"engine_version": study.engine.engine_version,
|
||||||
|
"base_flags": study.engine.base_flags,
|
||||||
|
"base_envs": study.engine.base_envs,
|
||||||
|
"allowed_flag_keys": study.engine.tunable_flags,
|
||||||
|
"allowed_env_keys": study.engine.tunable_envs,
|
||||||
|
"topology_constraints": (
|
||||||
|
study.engine.topology_constraints.__dict__
|
||||||
|
if study.engine.topology_constraints is not None
|
||||||
|
else None
|
||||||
|
),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
ensure_ascii=False,
|
||||||
|
indent=2,
|
||||||
|
),
|
||||||
|
"",
|
||||||
|
"SLO:",
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"target_pass_rate": study.slo.target_pass_rate,
|
||||||
|
"ttft_rule": study.slo.ttft_rule,
|
||||||
|
"tpot_rule": study.slo.tpot_rule,
|
||||||
|
"objective_notes": objective_notes,
|
||||||
|
},
|
||||||
|
default=lambda value: value.__dict__,
|
||||||
|
ensure_ascii=False,
|
||||||
|
indent=2,
|
||||||
|
),
|
||||||
|
"",
|
||||||
|
"Trial history:",
|
||||||
|
json.dumps(history, ensure_ascii=False, indent=2),
|
||||||
|
"",
|
||||||
|
"Known launch failures:",
|
||||||
|
json.dumps(launch_failures, ensure_ascii=False, indent=2),
|
||||||
|
"",
|
||||||
|
"Tested config signatures:",
|
||||||
|
json.dumps(_tested_config_signatures(state), ensure_ascii=False, indent=2),
|
||||||
|
]
|
||||||
|
return "\n".join(sections)
|
||||||
|
|
||||||
|
sections = [
|
||||||
|
*common_preamble,
|
||||||
(
|
(
|
||||||
"TP/DP/EP are part of the tunable space for this study. Prioritize exploring legal topology changes in parallel space before runtime-only knobs unless recent history already proves a topology variant is worse or fails to launch."
|
"TP/DP/EP are part of the tunable space for this study. Prioritize exploring legal topology changes in parallel space before runtime-only knobs unless recent history already proves a topology variant is worse or fails to launch."
|
||||||
if parallel_candidates
|
if parallel_candidates
|
||||||
@@ -314,7 +400,6 @@ def build_prompt(
|
|||||||
"Tested config signatures:",
|
"Tested config signatures:",
|
||||||
json.dumps(_tested_config_signatures(state), ensure_ascii=False, indent=2),
|
json.dumps(_tested_config_signatures(state), ensure_ascii=False, indent=2),
|
||||||
]
|
]
|
||||||
if study.llm.use_harness:
|
|
||||||
sections.extend(
|
sections.extend(
|
||||||
[
|
[
|
||||||
"",
|
"",
|
||||||
@@ -329,15 +414,6 @@ def build_prompt(
|
|||||||
"",
|
"",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
sections.extend(
|
|
||||||
[
|
|
||||||
"",
|
|
||||||
"Harnesses:",
|
|
||||||
"Disabled by llm.use_harness=false for ablation.",
|
|
||||||
"",
|
|
||||||
]
|
|
||||||
)
|
|
||||||
sections.extend(
|
sections.extend(
|
||||||
[
|
[
|
||||||
"The primary cross-topology comparison metric is request_rate_per_gpu, not raw request_rate.",
|
"The primary cross-topology comparison metric is request_rate_per_gpu, not raw request_rate.",
|
||||||
@@ -348,11 +424,7 @@ def build_prompt(
|
|||||||
else "The evaluator runs each proposal over the full configured search range so raw per-iteration performance is measured directly."
|
else "The evaluator runs each proposal over the full configured search range so raw per-iteration performance is measured directly."
|
||||||
),
|
),
|
||||||
"Do not assume a configuration with fewer GPUs should inherit the global incumbent sampling_u.",
|
"Do not assume a configuration with fewer GPUs should inherit the global incumbent sampling_u.",
|
||||||
(
|
"Follow the active harness. Prefer stop over a weak exploratory proposal once a good incumbent has converged.",
|
||||||
"Follow the active harness. Prefer stop over a weak exploratory proposal once a good incumbent has converged."
|
|
||||||
if study.llm.use_harness
|
|
||||||
else "For this ablation, reason from the raw study stack, trial history, launch failures, and tested config signatures without harness hints."
|
|
||||||
),
|
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
return "\n".join(sections)
|
return "\n".join(sections)
|
||||||
@@ -604,6 +676,7 @@ def call_llm_for_proposal(
|
|||||||
*,
|
*,
|
||||||
policy: LLMPolicySpec,
|
policy: LLMPolicySpec,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
|
use_harness: bool = True,
|
||||||
) -> str:
|
) -> str:
|
||||||
if policy.endpoint is None:
|
if policy.endpoint is None:
|
||||||
raise RuntimeError("study.llm.endpoint is not configured")
|
raise RuntimeError("study.llm.endpoint is not configured")
|
||||||
@@ -611,6 +684,7 @@ def call_llm_for_proposal(
|
|||||||
max_attempts = 4
|
max_attempts = 4
|
||||||
for attempt in range(max_attempts):
|
for attempt in range(max_attempts):
|
||||||
try:
|
try:
|
||||||
|
system_prompt = policy.system_prompt if use_harness else ""
|
||||||
if policy.endpoint.stream:
|
if policy.endpoint.stream:
|
||||||
text = stream_text_completion(
|
text = stream_text_completion(
|
||||||
base_url=policy.endpoint.base_url,
|
base_url=policy.endpoint.base_url,
|
||||||
@@ -620,7 +694,7 @@ def call_llm_for_proposal(
|
|||||||
model=policy.endpoint.model,
|
model=policy.endpoint.model,
|
||||||
messages=[{"role": "user", "content": prompt}],
|
messages=[{"role": "user", "content": prompt}],
|
||||||
timeout_s=policy.endpoint.timeout_s,
|
timeout_s=policy.endpoint.timeout_s,
|
||||||
system_prompt=policy.system_prompt,
|
system_prompt=system_prompt,
|
||||||
reasoning_effort=policy.endpoint.reasoning_effort,
|
reasoning_effort=policy.endpoint.reasoning_effort,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
@@ -632,7 +706,7 @@ def call_llm_for_proposal(
|
|||||||
model=policy.endpoint.model,
|
model=policy.endpoint.model,
|
||||||
messages=[{"role": "user", "content": prompt}],
|
messages=[{"role": "user", "content": prompt}],
|
||||||
timeout_s=policy.endpoint.timeout_s,
|
timeout_s=policy.endpoint.timeout_s,
|
||||||
system_prompt=policy.system_prompt,
|
system_prompt=system_prompt,
|
||||||
reasoning_effort=policy.endpoint.reasoning_effort,
|
reasoning_effort=policy.endpoint.reasoning_effort,
|
||||||
)
|
)
|
||||||
text = _extract_response_text(response)
|
text = _extract_response_text(response)
|
||||||
|
|||||||
@@ -1002,9 +1002,16 @@ class CoreFlowTests(unittest.TestCase):
|
|||||||
capability_profile=None,
|
capability_profile=None,
|
||||||
)
|
)
|
||||||
self.assertFalse(study.llm.use_harness)
|
self.assertFalse(study.llm.use_harness)
|
||||||
self.assertIn("Disabled by llm.use_harness=false", prompt)
|
self.assertIn("Study context:", prompt)
|
||||||
|
self.assertIn("Trial history:", prompt)
|
||||||
|
self.assertIn("Known launch failures:", prompt)
|
||||||
self.assertNotIn('"paper_alignment"', prompt)
|
self.assertNotIn('"paper_alignment"', prompt)
|
||||||
self.assertIn("without harness hints", prompt)
|
self.assertNotIn("Harnesses:", prompt)
|
||||||
|
self.assertNotIn("Disabled by llm.use_harness=false", prompt)
|
||||||
|
self.assertNotIn("without harness hints", prompt)
|
||||||
|
self.assertNotIn("Window summary:", prompt)
|
||||||
|
self.assertNotIn("Parallel space candidates:", prompt)
|
||||||
|
self.assertNotIn("Prioritize exploring legal topology changes", prompt)
|
||||||
|
|
||||||
def test_harness_uses_prior_infeasible_probe_for_active_bottleneck(self) -> None:
|
def test_harness_uses_prior_infeasible_probe_for_active_bottleneck(self) -> None:
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
with tempfile.TemporaryDirectory() as tmp:
|
||||||
|
|||||||
Reference in New Issue
Block a user