Adds the algorithm-layer guarantee tests for
docs/KVC_ROUTER_ALGORITHM.md §4.1. The full Dispatch loop
lives in replay.py (HTTP + mooncake), but the policy-layer
guarantee is testable in isolation: under any reject
sequence, select() must keep returning a valid worker.
Cases:
- select returns a valid decision even after every (s,d)
is past τ_reject (degenerate fallback)
- |D|·τ_reject rejects suffice to explore every D
(cannot trap a session on one D under universal
rejection)
- degenerate fallback picks the least-rejected D
(Algorithm 1 line 4)
- per-(session, D) isolation: session A's blacklist
does not affect session B
- migration_reject_threshold=0 disables blacklist
- select() does NOT silently bump the reject counter
(the only mutator is record_admission_reject)
Adds tests/_fixtures.py with minimal make_topology() and
make_request() helpers that skip build_single_node_topology's
GPU-budget validation (irrelevant in unit tests).
Verified locally: 20/20 passing under pytest 9.0.3. The
six new tests cover only Algorithm 1's policy-layer
half of Theorem 1; the reset-on-success half lives in
Algorithm 3 (replay.py) and is a future test target.
67 lines
1.8 KiB
Python
67 lines
1.8 KiB
Python
"""Lightweight fixtures for algorithm-layer tests.
|
|
|
|
Builds minimal TraceRequest / SingleNodeTopology / RoutingState instances
|
|
without invoking build_single_node_topology() (which validates GPU budgets
|
|
we don't care about in unit tests).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from agentic_pd_hybrid.topology import SingleNodeTopology, WorkerSpec
|
|
from agentic_pd_hybrid.trace import TraceRequest
|
|
|
|
|
|
def make_topology(decode_count: int = 3, prefill_count: int = 1) -> SingleNodeTopology:
|
|
prefill_workers = tuple(
|
|
WorkerSpec(
|
|
role="prefill",
|
|
ordinal=i,
|
|
gpu_ids=(i,),
|
|
host="127.0.0.1",
|
|
port=30000 + i,
|
|
)
|
|
for i in range(prefill_count)
|
|
)
|
|
decode_workers = tuple(
|
|
WorkerSpec(
|
|
role="decode",
|
|
ordinal=i,
|
|
gpu_ids=(prefill_count + i,),
|
|
host="127.0.0.1",
|
|
port=31000 + i,
|
|
)
|
|
for i in range(decode_count)
|
|
)
|
|
return SingleNodeTopology(
|
|
model_path="/dev/null/test-model",
|
|
prefill_workers=prefill_workers,
|
|
decode_workers=decode_workers,
|
|
direct_workers=(),
|
|
router_host="127.0.0.1",
|
|
router_port=8000,
|
|
transfer_backend="mooncake",
|
|
trust_remote_code=True,
|
|
)
|
|
|
|
|
|
def make_request(
|
|
*,
|
|
session_id: str = "sess-1",
|
|
turn_id: int = 0,
|
|
hash_ids: tuple[int, ...] = (),
|
|
input_length: int = 1024,
|
|
output_length: int = 64,
|
|
) -> TraceRequest:
|
|
return TraceRequest(
|
|
request_id=f"{session_id}-t{turn_id}",
|
|
session_id=session_id,
|
|
chat_id=int(turn_id),
|
|
parent_chat_id=-1 if turn_id == 0 else int(turn_id - 1),
|
|
timestamp_s=float(turn_id),
|
|
input_length=input_length,
|
|
output_length=output_length,
|
|
request_type="user",
|
|
turn_id=turn_id,
|
|
hash_ids=hash_ids,
|
|
)
|