Adds the project's first test suite. Covers the
score_candidate() pure function from the previous refactor
commit, validating the qualitative properties that
KVC_ROUTER_ALGORITHM.md §3.1 and §4.2 rely on.
Tests / properties:
- determinism: same args -> same tuple
- shape: 4-int tuple
- primary term: overlap dominates pure sticky
- primary term: sticky_bonus credited
- tie-2 inflight: lower wins
- tie-3 assigned: lower wins
- strict lex order: sticky wins position-1 over fresh-idle
- load_floor disabled by default
- load_floor gated off when sticky=True
- load_floor zero during warmup (mean=0)
- load_floor proportional to deficit (200/100/0 at 0/50/100% load)
- load_floor does not underflow when overloaded
- real per-session overlap beats load_floor on warm D
- boilerplate overlap loses to load_floor on cold D
(the cold-D fix from E1_E2_FIX_DESIGN §Q2)
Test infrastructure:
- tests/ package with README explaining the GPU-free
scope and the run instruction
- pyproject.toml [dependency-groups] test = [pytest>=8]
(install via `uv sync --group test`)
- pyproject.toml [tool.pytest.ini_options] sets testpaths
Verified locally: 14/14 passing under pytest 9.0.3 in an
isolated 3.13 venv. No SGLang / GPU touched.
190 lines
6.8 KiB
Python
190 lines
6.8 KiB
Python
"""Unit tests for Algorithm 1 (KvAwarePolicy score_candidate).
|
|
|
|
Reference: docs/KVC_ROUTER_ALGORITHM.md §3.1. The lex-score is
|
|
|
|
(overlap + sticky_bonus*sticky + floor_bonus,
|
|
sticky,
|
|
-inflight,
|
|
-assigned)
|
|
|
|
These tests pin down the qualitative properties that the algorithm's
|
|
correctness arguments rely on. They run without SGLang/GPU.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from agentic_pd_hybrid.policies import score_candidate
|
|
|
|
|
|
def _score(**overrides):
|
|
"""Helper: build a score with all defaults and per-test overrides."""
|
|
args = dict(
|
|
overlap=0,
|
|
sticky=False,
|
|
inflight=0,
|
|
assigned=0,
|
|
mean_assigned=0.0,
|
|
sticky_bonus=1,
|
|
load_floor_bonus=0,
|
|
)
|
|
args.update(overrides)
|
|
return score_candidate(**args)
|
|
|
|
|
|
# -- Determinism ----------------------------------------------------------------
|
|
|
|
|
|
def test_score_is_pure():
|
|
"""Same kwargs must produce the same tuple (no hidden state)."""
|
|
a = _score(overlap=3, sticky=True, inflight=1, assigned=7)
|
|
b = _score(overlap=3, sticky=True, inflight=1, assigned=7)
|
|
assert a == b
|
|
|
|
|
|
def test_score_returns_4_tuple():
|
|
s = _score()
|
|
assert isinstance(s, tuple)
|
|
assert len(s) == 4
|
|
assert all(isinstance(x, int) for x in s)
|
|
|
|
|
|
# -- Primary term: overlap dominates sticky --------------------------------------
|
|
|
|
|
|
def test_overlap_strictly_dominates_pure_sticky():
|
|
"""Theorem-2 building block: any positive overlap on a non-sticky D wins
|
|
against a sticky-only D with zero overlap (sticky_bonus=1)."""
|
|
overlap = _score(overlap=2, sticky=False)
|
|
sticky_only = _score(overlap=0, sticky=True)
|
|
assert overlap > sticky_only
|
|
|
|
|
|
def test_overlap_plus_sticky_beats_overlap_alone():
|
|
"""Two D's with equal overlap: sticky one wins (sticky_bonus contributes
|
|
to primary AND wins tie-1)."""
|
|
sticky_d = _score(overlap=5, sticky=True)
|
|
fresh_d = _score(overlap=5, sticky=False)
|
|
assert sticky_d > fresh_d
|
|
|
|
|
|
# -- Tie breakers ----------------------------------------------------------------
|
|
|
|
|
|
def test_tiebreaker_inflight_lower_wins():
|
|
"""Equal primary & sticky: prefer the D with fewer in-flight requests."""
|
|
low = _score(overlap=3, sticky=False, inflight=0, assigned=10)
|
|
high = _score(overlap=3, sticky=False, inflight=5, assigned=10)
|
|
assert low > high
|
|
|
|
|
|
def test_tiebreaker_assigned_lower_wins():
|
|
"""Equal primary & sticky & inflight: prefer rarely-picked D."""
|
|
rare = _score(overlap=3, sticky=False, inflight=2, assigned=1)
|
|
frequent = _score(overlap=3, sticky=False, inflight=2, assigned=99)
|
|
assert rare > frequent
|
|
|
|
|
|
def test_tiebreaker_strict_lex_order():
|
|
"""Sticky always beats non-sticky on tie-1 even if non-sticky has lower
|
|
inflight (the lex order is strict, position 1 outranks positions 2/3)."""
|
|
sticky_busy = _score(overlap=4, sticky=True, inflight=10, assigned=10)
|
|
fresh_idle = _score(overlap=4, sticky=False, inflight=0, assigned=0)
|
|
# Note: with sticky_bonus=1 added to position 0, sticky_busy actually wins
|
|
# on position 0 first (5 > 4). Force equal primary by lowering sticky's
|
|
# overlap.
|
|
sticky_busy_eq_primary = _score(overlap=3, sticky=True, inflight=10, assigned=10)
|
|
fresh_idle_eq_primary = _score(overlap=4, sticky=False, inflight=0, assigned=0)
|
|
# Now equal primary (3+1=4 vs 4). Sticky wins position 1.
|
|
assert sticky_busy_eq_primary > fresh_idle_eq_primary
|
|
|
|
|
|
# -- Load-floor bonus ------------------------------------------------------------
|
|
|
|
|
|
def test_load_floor_disabled_by_default():
|
|
"""load_floor_bonus=0 → no contribution to primary."""
|
|
s = _score(overlap=0, sticky=False, mean_assigned=10, assigned=0)
|
|
assert s[0] == 0
|
|
|
|
|
|
def test_load_floor_gated_off_when_sticky():
|
|
"""Even with load_floor_bonus>0, sticky D does NOT receive the boost.
|
|
Otherwise a session would migrate away from its warm D under load."""
|
|
sticky_under_loaded = _score(
|
|
overlap=0, sticky=True, mean_assigned=10, assigned=0, load_floor_bonus=200
|
|
)
|
|
# primary = overlap(0) + sticky_bonus(1) + floor(0) = 1
|
|
assert sticky_under_loaded[0] == 1
|
|
|
|
|
|
def test_load_floor_zero_when_mean_zero():
|
|
"""Warmup case: mean_assigned=0 -> no D gets boost -> degenerate to lex
|
|
tiebreak by iteration order."""
|
|
s = _score(
|
|
overlap=0, sticky=False, mean_assigned=0, assigned=0, load_floor_bonus=200
|
|
)
|
|
assert s[0] == 0
|
|
|
|
|
|
def test_load_floor_proportional_to_deficit():
|
|
"""floor_bonus = K * deficit / mean. assigned=0, mean=10, K=200 -> 200."""
|
|
s_zero = _score(
|
|
overlap=0, sticky=False, mean_assigned=10, assigned=0, load_floor_bonus=200
|
|
)
|
|
s_half = _score(
|
|
overlap=0, sticky=False, mean_assigned=10, assigned=5, load_floor_bonus=200
|
|
)
|
|
s_full = _score(
|
|
overlap=0, sticky=False, mean_assigned=10, assigned=10, load_floor_bonus=200
|
|
)
|
|
# deficit = max(0, 10-0)=10 -> bonus = int(200*10/10) = 200
|
|
# deficit = max(0, 10-5)=5 -> bonus = int(200*5/10) = 100
|
|
# deficit = max(0, 10-10)=0 -> bonus = 0
|
|
assert s_zero[0] == 200
|
|
assert s_half[0] == 100
|
|
assert s_full[0] == 0
|
|
|
|
|
|
def test_load_floor_does_not_underflow_when_overloaded():
|
|
"""assigned > mean -> deficit clamped to 0, no negative bonus."""
|
|
s = _score(
|
|
overlap=0, sticky=False, mean_assigned=10, assigned=50, load_floor_bonus=200
|
|
)
|
|
assert s[0] == 0
|
|
|
|
|
|
# -- Routing intent: real overlap beats load-floor bonus -------------------------
|
|
|
|
|
|
def test_real_prefix_overlap_beats_load_floor_on_warm_d():
|
|
"""E1_E2_FIX_DESIGN_ZH §Q2: load_floor should be set such that
|
|
real per-session prefix overlap outweighs the cold-D bonus.
|
|
With overlap=800 (a per-session prefix) and load_floor_bonus=200,
|
|
a warm D (high overlap, possibly high load) should still win against
|
|
a cold D with floor bonus."""
|
|
warm = _score(
|
|
overlap=800, sticky=True, mean_assigned=10, assigned=10, load_floor_bonus=200
|
|
)
|
|
cold = _score(
|
|
overlap=0, sticky=False, mean_assigned=10, assigned=0, load_floor_bonus=200
|
|
)
|
|
# warm primary = 800 + 1 + 0 = 801. cold primary = 0 + 0 + 200 = 200.
|
|
assert warm[0] == 801
|
|
assert cold[0] == 200
|
|
assert warm > cold
|
|
|
|
|
|
def test_boilerplate_overlap_loses_to_load_floor_for_cold_d():
|
|
"""Same §Q2: load_floor should beat cross-session boilerplate overlap.
|
|
If load_floor_bonus=200 and the worst-case boilerplate overlap is ~50,
|
|
a fresh cold D should still win against a slightly-warm-from-boilerplate D."""
|
|
warm_boilerplate = _score(
|
|
overlap=50, sticky=False, mean_assigned=10, assigned=10, load_floor_bonus=200
|
|
)
|
|
cold_under_loaded = _score(
|
|
overlap=0, sticky=False, mean_assigned=10, assigned=0, load_floor_bonus=200
|
|
)
|
|
# warm_boilerplate primary = 50 + 0 + 0 = 50 (assigned=mean, no deficit).
|
|
# cold_under_loaded primary = 0 + 0 + 200 = 200.
|
|
assert cold_under_loaded > warm_boilerplate
|