Ablation substrate: scale=0.5 + out=128 + 6 probes (TP1 measurable, tractable)

scale=0.2 made TP1 uniformly infeasible (no baseline); bound decode to 128 tokens and
use mild 2x compression so TP1 registers a real, fast baseline, with 6 probes to span
TP1's low and TP4's high feasibility boundaries. Both configs identical except use_harness.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-16 20:29:30 +08:00
parent a1cbab0e69
commit 5965f4fbbc
2 changed files with 12 additions and 10 deletions

View File

@@ -130,9 +130,9 @@
"min_input_tokens": 0,
"max_input_tokens": 8192
},
"replay_time_scale": 0.2,
"early_stop_max_lag_s": 30.0,
"early_stop_max_elapsed_s": 180.0,
"replay_time_scale": 0.5,
"early_stop_max_lag_s": 45.0,
"early_stop_max_elapsed_s": 320.0,
"adaptive_stop": {
"enabled": true,
"tau": 0.9,
@@ -141,7 +141,8 @@
"max_checks": 20,
"min_fraction": 0.1,
"boundary_delta": 0.02
}
},
"completion_tokens_override": 128
},
"slo": {
"target_pass_rate": 0.95,
@@ -159,7 +160,7 @@
"low": 0.0,
"high": 0.25,
"tolerance": 0.001,
"max_probes": 3,
"max_probes": 6,
"sample_seed": 20260325,
"inherit_incumbent_floor": true
},

View File

@@ -130,9 +130,9 @@
"min_input_tokens": 0,
"max_input_tokens": 8192
},
"replay_time_scale": 0.2,
"early_stop_max_lag_s": 30.0,
"early_stop_max_elapsed_s": 180.0,
"replay_time_scale": 0.5,
"early_stop_max_lag_s": 45.0,
"early_stop_max_elapsed_s": 320.0,
"adaptive_stop": {
"enabled": true,
"tau": 0.9,
@@ -141,7 +141,8 @@
"max_checks": 20,
"min_fraction": 0.1,
"boundary_delta": 0.02
}
},
"completion_tokens_override": 128
},
"slo": {
"target_pass_rate": 0.95,
@@ -159,7 +160,7 @@
"low": 0.0,
"high": 0.25,
"tolerance": 0.001,
"max_probes": 3,
"max_probes": 6,
"sample_seed": 20260325,
"inherit_incumbent_floor": true
},