Ablation substrate: scale=0.5 + out=128 + 6 probes (TP1 measurable, tractable)

scale=0.2 made TP1 uniformly infeasible (no baseline); bound decode to 128 tokens and
use mild 2x compression so TP1 registers a real, fast baseline, with 6 probes to span
TP1's low and TP4's high feasibility boundaries. Both configs identical except use_harness.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-16 20:29:30 +08:00
parent a1cbab0e69
commit 5965f4fbbc
2 changed files with 12 additions and 10 deletions

View File

@@ -130,9 +130,9 @@
"min_input_tokens": 0, "min_input_tokens": 0,
"max_input_tokens": 8192 "max_input_tokens": 8192
}, },
"replay_time_scale": 0.2, "replay_time_scale": 0.5,
"early_stop_max_lag_s": 30.0, "early_stop_max_lag_s": 45.0,
"early_stop_max_elapsed_s": 180.0, "early_stop_max_elapsed_s": 320.0,
"adaptive_stop": { "adaptive_stop": {
"enabled": true, "enabled": true,
"tau": 0.9, "tau": 0.9,
@@ -141,7 +141,8 @@
"max_checks": 20, "max_checks": 20,
"min_fraction": 0.1, "min_fraction": 0.1,
"boundary_delta": 0.02 "boundary_delta": 0.02
} },
"completion_tokens_override": 128
}, },
"slo": { "slo": {
"target_pass_rate": 0.95, "target_pass_rate": 0.95,
@@ -159,7 +160,7 @@
"low": 0.0, "low": 0.0,
"high": 0.25, "high": 0.25,
"tolerance": 0.001, "tolerance": 0.001,
"max_probes": 3, "max_probes": 6,
"sample_seed": 20260325, "sample_seed": 20260325,
"inherit_incumbent_floor": true "inherit_incumbent_floor": true
}, },

View File

@@ -130,9 +130,9 @@
"min_input_tokens": 0, "min_input_tokens": 0,
"max_input_tokens": 8192 "max_input_tokens": 8192
}, },
"replay_time_scale": 0.2, "replay_time_scale": 0.5,
"early_stop_max_lag_s": 30.0, "early_stop_max_lag_s": 45.0,
"early_stop_max_elapsed_s": 180.0, "early_stop_max_elapsed_s": 320.0,
"adaptive_stop": { "adaptive_stop": {
"enabled": true, "enabled": true,
"tau": 0.9, "tau": 0.9,
@@ -141,7 +141,8 @@
"max_checks": 20, "max_checks": 20,
"min_fraction": 0.1, "min_fraction": 0.1,
"boundary_delta": 0.02 "boundary_delta": 0.02
} },
"completion_tokens_override": 128
}, },
"slo": { "slo": {
"target_pass_rate": 0.95, "target_pass_rate": 0.95,
@@ -159,7 +160,7 @@
"low": 0.0, "low": 0.0,
"high": 0.25, "high": 0.25,
"tolerance": 0.001, "tolerance": 0.001,
"max_probes": 3, "max_probes": 6,
"sample_seed": 20260325, "sample_seed": 20260325,
"inherit_incumbent_floor": true "inherit_incumbent_floor": true
}, },