Add --max-single-turn-ratio to control single-turn session fraction

Single-turn sessions with unique prefixes get 0% cache hit, diluting
APC in benchmarks.  --max-single-turn-ratio caps their fraction,
boosting multi-turn density and theoretical APC.

Example: --sample-ratio 0.008 --max-single-turn-ratio 0.3
  Before: 9.2% multi-turn, APC=70.5%
  After:  70.0% multi-turn, APC=85.0%, sharing=53.3%

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-23 14:17:25 +08:00
parent 1e1e2e774d
commit d8dc9dc0ce

View File

@@ -69,15 +69,15 @@ def sample_sessions(
*,
sample_ratio: float | None = None,
target_requests: int | None = None,
max_single_turn_ratio: float | None = None,
seed: int,
) -> list[str]:
"""Sample sessions preserving KV cache reuse."""
rng = random.Random(seed)
if sample_ratio is not None:
return _sample_window_then_thin(rows_by_session, sample_ratio, rng)
if target_requests is not None:
selected = _sample_window_then_thin(rows_by_session, sample_ratio, rng)
elif target_requests is not None:
all_sids = list(rows_by_session.keys())
rng.shuffle(all_sids)
selected = []
@@ -87,9 +87,35 @@ def sample_sessions(
total += len(rows_by_session[sid])
if total >= target_requests:
break
else:
raise ValueError("Must specify --sample-ratio or --target-requests")
if max_single_turn_ratio is not None:
selected = _cap_single_turn(rows_by_session, selected,
max_single_turn_ratio, rng)
return selected
def _cap_single_turn(
rows_by_session: dict[str, list[dict]],
selected: list[str],
max_ratio: float,
rng: random.Random,
) -> list[str]:
"""Thin single-turn sessions so they are at most max_ratio of total sessions."""
multi = [s for s in selected if len(rows_by_session[s]) > 1]
single = [s for s in selected if len(rows_by_session[s]) == 1]
# max_ratio of TOTAL sessions should be single-turn
# n_single / (n_single + n_multi) <= max_ratio
# n_single <= max_ratio * n_multi / (1 - max_ratio)
max_single = int(max_ratio * len(multi) / (1 - max_ratio))
if len(single) <= max_single:
return selected
raise ValueError("Must specify --sample-ratio or --target-requests")
rng.shuffle(single)
return multi + single[:max_single]
def _sample_window_then_thin(
@@ -217,6 +243,8 @@ def main() -> None:
help="Fraction of sessions to sample (e.g. 0.016 for 8/500 GPU ratio)")
p.add_argument("--target-requests", type=int, default=None,
help="Target number of requests (legacy, no sharing preservation)")
p.add_argument("--max-single-turn-ratio", type=float, default=None,
help="Cap single-turn sessions to this fraction of total (e.g. 0.3)")
p.add_argument("--seed", type=int, default=42)
args = p.parse_args()
@@ -233,6 +261,7 @@ def main() -> None:
rows_by_session,
sample_ratio=args.sample_ratio,
target_requests=args.target_requests,
max_single_turn_ratio=args.max_single_turn_ratio,
seed=args.seed,
)