agentic-pd-hybrid/tests/test_policy_scoring.py

"""Unit tests for Algorithm 1 (KvAwarePolicy score_candidate).

Reference: docs/KVC_ROUTER_ALGORITHM.md §3.1. The lex-score is

    (overlap + sticky_bonus*sticky + floor_bonus,
     sticky,
     -inflight,
     -assigned)

These tests pin down the qualitative properties that the algorithm's
correctness arguments rely on. They run without SGLang/GPU.
"""

from __future__ import annotations

from agentic_pd_hybrid.policies import score_candidate


def _score(**overrides):
    """Helper: build a score with all defaults and per-test overrides."""
    args = dict(
        overlap=0,
        sticky=False,
        inflight=0,
        assigned=0,
        mean_assigned=0.0,
        sticky_bonus=1,
        load_floor_bonus=0,
    )
    args.update(overrides)
    return score_candidate(**args)


# -- Determinism ----------------------------------------------------------------


def test_score_is_pure():
    """Same kwargs must produce the same tuple (no hidden state)."""
    a = _score(overlap=3, sticky=True, inflight=1, assigned=7)
    b = _score(overlap=3, sticky=True, inflight=1, assigned=7)
    assert a == b


def test_score_returns_4_tuple():
    s = _score()
    assert isinstance(s, tuple)
    assert len(s) == 4
    assert all(isinstance(x, int) for x in s)


# -- Primary term: overlap dominates sticky --------------------------------------


def test_overlap_strictly_dominates_pure_sticky():
    """Theorem-2 building block: any positive overlap on a non-sticky D wins
    against a sticky-only D with zero overlap (sticky_bonus=1)."""
    overlap = _score(overlap=2, sticky=False)
    sticky_only = _score(overlap=0, sticky=True)
    assert overlap > sticky_only


def test_overlap_plus_sticky_beats_overlap_alone():
    """Two D's with equal overlap: sticky one wins (sticky_bonus contributes
    to primary AND wins tie-1)."""
    sticky_d = _score(overlap=5, sticky=True)
    fresh_d = _score(overlap=5, sticky=False)
    assert sticky_d > fresh_d


# -- Tie breakers ----------------------------------------------------------------


def test_tiebreaker_inflight_lower_wins():
    """Equal primary & sticky: prefer the D with fewer in-flight requests."""
    low = _score(overlap=3, sticky=False, inflight=0, assigned=10)
    high = _score(overlap=3, sticky=False, inflight=5, assigned=10)
    assert low > high


def test_tiebreaker_assigned_lower_wins():
    """Equal primary & sticky & inflight: prefer rarely-picked D."""
    rare = _score(overlap=3, sticky=False, inflight=2, assigned=1)
    frequent = _score(overlap=3, sticky=False, inflight=2, assigned=99)
    assert rare > frequent


def test_tiebreaker_strict_lex_order():
    """Sticky always beats non-sticky on tie-1 even if non-sticky has lower
    inflight (the lex order is strict, position 1 outranks positions 2/3)."""
    sticky_busy = _score(overlap=4, sticky=True, inflight=10, assigned=10)
    fresh_idle = _score(overlap=4, sticky=False, inflight=0, assigned=0)
    # Note: with sticky_bonus=1 added to position 0, sticky_busy actually wins
    # on position 0 first (5 > 4). Force equal primary by lowering sticky's
    # overlap.
    sticky_busy_eq_primary = _score(overlap=3, sticky=True, inflight=10, assigned=10)
    fresh_idle_eq_primary = _score(overlap=4, sticky=False, inflight=0, assigned=0)
    # Now equal primary (3+1=4 vs 4). Sticky wins position 1.
    assert sticky_busy_eq_primary > fresh_idle_eq_primary


# -- Load-floor bonus ------------------------------------------------------------


def test_load_floor_disabled_by_default():
    """load_floor_bonus=0 → no contribution to primary."""
    s = _score(overlap=0, sticky=False, mean_assigned=10, assigned=0)
    assert s[0] == 0


def test_load_floor_gated_off_when_sticky():
    """Even with load_floor_bonus>0, sticky D does NOT receive the boost.
    Otherwise a session would migrate away from its warm D under load."""
    sticky_under_loaded = _score(
        overlap=0, sticky=True, mean_assigned=10, assigned=0, load_floor_bonus=200
    )
    # primary = overlap(0) + sticky_bonus(1) + floor(0) = 1
    assert sticky_under_loaded[0] == 1


def test_load_floor_zero_when_mean_zero():
    """Warmup case: mean_assigned=0 -> no D gets boost -> degenerate to lex
    tiebreak by iteration order."""
    s = _score(
        overlap=0, sticky=False, mean_assigned=0, assigned=0, load_floor_bonus=200
    )
    assert s[0] == 0


def test_load_floor_proportional_to_deficit():
    """floor_bonus = K * deficit / mean. assigned=0, mean=10, K=200 -> 200."""
    s_zero = _score(
        overlap=0, sticky=False, mean_assigned=10, assigned=0, load_floor_bonus=200
    )
    s_half = _score(
        overlap=0, sticky=False, mean_assigned=10, assigned=5, load_floor_bonus=200
    )
    s_full = _score(
        overlap=0, sticky=False, mean_assigned=10, assigned=10, load_floor_bonus=200
    )
    # deficit = max(0, 10-0)=10 -> bonus = int(200*10/10) = 200
    # deficit = max(0, 10-5)=5  -> bonus = int(200*5/10)  = 100
    # deficit = max(0, 10-10)=0 -> bonus = 0
    assert s_zero[0] == 200
    assert s_half[0] == 100
    assert s_full[0] == 0


def test_load_floor_does_not_underflow_when_overloaded():
    """assigned > mean -> deficit clamped to 0, no negative bonus."""
    s = _score(
        overlap=0, sticky=False, mean_assigned=10, assigned=50, load_floor_bonus=200
    )
    assert s[0] == 0


# -- Routing intent: real overlap beats load-floor bonus -------------------------


def test_real_prefix_overlap_beats_load_floor_on_warm_d():
    """E1_E2_FIX_DESIGN_ZH §Q2: load_floor should be set such that
    real per-session prefix overlap outweighs the cold-D bonus.
    With overlap=800 (a per-session prefix) and load_floor_bonus=200,
    a warm D (high overlap, possibly high load) should still win against
    a cold D with floor bonus."""
    warm = _score(
        overlap=800, sticky=True, mean_assigned=10, assigned=10, load_floor_bonus=200
    )
    cold = _score(
        overlap=0, sticky=False, mean_assigned=10, assigned=0, load_floor_bonus=200
    )
    # warm primary = 800 + 1 + 0 = 801. cold primary = 0 + 0 + 200 = 200.
    assert warm[0] == 801
    assert cold[0] == 200
    assert warm > cold


def test_boilerplate_overlap_loses_to_load_floor_for_cold_d():
    """Same §Q2: load_floor should beat cross-session boilerplate overlap.
    If load_floor_bonus=200 and the worst-case boilerplate overlap is ~50,
    a fresh cold D should still win against a slightly-warm-from-boilerplate D."""
    warm_boilerplate = _score(
        overlap=50, sticky=False, mean_assigned=10, assigned=10, load_floor_bonus=200
    )
    cold_under_loaded = _score(
        overlap=0, sticky=False, mean_assigned=10, assigned=0, load_floor_bonus=200
    )
    # warm_boilerplate primary = 50 + 0 + 0 = 50 (assigned=mean, no deficit).
    # cold_under_loaded primary = 0 + 0 + 200 = 200.
    assert cold_under_loaded > warm_boilerplate