Add --max-single-turn-ratio to control single-turn session fraction
Single-turn sessions with unique prefixes get 0% cache hit, diluting APC in benchmarks. --max-single-turn-ratio caps their fraction, boosting multi-turn density and theoretical APC. Example: --sample-ratio 0.008 --max-single-turn-ratio 0.3 Before: 9.2% multi-turn, APC=70.5% After: 70.0% multi-turn, APC=85.0%, sharing=53.3% Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -69,15 +69,15 @@ def sample_sessions(
|
||||
*,
|
||||
sample_ratio: float | None = None,
|
||||
target_requests: int | None = None,
|
||||
max_single_turn_ratio: float | None = None,
|
||||
seed: int,
|
||||
) -> list[str]:
|
||||
"""Sample sessions preserving KV cache reuse."""
|
||||
rng = random.Random(seed)
|
||||
|
||||
if sample_ratio is not None:
|
||||
return _sample_window_then_thin(rows_by_session, sample_ratio, rng)
|
||||
|
||||
if target_requests is not None:
|
||||
selected = _sample_window_then_thin(rows_by_session, sample_ratio, rng)
|
||||
elif target_requests is not None:
|
||||
all_sids = list(rows_by_session.keys())
|
||||
rng.shuffle(all_sids)
|
||||
selected = []
|
||||
@@ -87,9 +87,35 @@ def sample_sessions(
|
||||
total += len(rows_by_session[sid])
|
||||
if total >= target_requests:
|
||||
break
|
||||
else:
|
||||
raise ValueError("Must specify --sample-ratio or --target-requests")
|
||||
|
||||
if max_single_turn_ratio is not None:
|
||||
selected = _cap_single_turn(rows_by_session, selected,
|
||||
max_single_turn_ratio, rng)
|
||||
|
||||
return selected
|
||||
|
||||
|
||||
def _cap_single_turn(
|
||||
rows_by_session: dict[str, list[dict]],
|
||||
selected: list[str],
|
||||
max_ratio: float,
|
||||
rng: random.Random,
|
||||
) -> list[str]:
|
||||
"""Thin single-turn sessions so they are at most max_ratio of total sessions."""
|
||||
multi = [s for s in selected if len(rows_by_session[s]) > 1]
|
||||
single = [s for s in selected if len(rows_by_session[s]) == 1]
|
||||
|
||||
# max_ratio of TOTAL sessions should be single-turn
|
||||
# n_single / (n_single + n_multi) <= max_ratio
|
||||
# n_single <= max_ratio * n_multi / (1 - max_ratio)
|
||||
max_single = int(max_ratio * len(multi) / (1 - max_ratio))
|
||||
if len(single) <= max_single:
|
||||
return selected
|
||||
|
||||
raise ValueError("Must specify --sample-ratio or --target-requests")
|
||||
rng.shuffle(single)
|
||||
return multi + single[:max_single]
|
||||
|
||||
|
||||
def _sample_window_then_thin(
|
||||
@@ -217,6 +243,8 @@ def main() -> None:
|
||||
help="Fraction of sessions to sample (e.g. 0.016 for 8/500 GPU ratio)")
|
||||
p.add_argument("--target-requests", type=int, default=None,
|
||||
help="Target number of requests (legacy, no sharing preservation)")
|
||||
p.add_argument("--max-single-turn-ratio", type=float, default=None,
|
||||
help="Cap single-turn sessions to this fraction of total (e.g. 0.3)")
|
||||
p.add_argument("--seed", type=int, default=42)
|
||||
args = p.parse_args()
|
||||
|
||||
@@ -233,6 +261,7 @@ def main() -> None:
|
||||
rows_by_session,
|
||||
sample_ratio=args.sample_ratio,
|
||||
target_requests=args.target_requests,
|
||||
max_single_turn_ratio=args.max_single_turn_ratio,
|
||||
seed=args.seed,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user