agentic-pd-hybrid/tests/_fixtures.py

"""Lightweight fixtures for algorithm-layer tests.

Builds minimal TraceRequest / SingleNodeTopology / RoutingState instances
without invoking build_single_node_topology() (which validates GPU budgets
we don't care about in unit tests).
"""

from __future__ import annotations

from agentic_pd_hybrid.topology import SingleNodeTopology, WorkerSpec
from agentic_pd_hybrid.trace import TraceRequest


def make_topology(decode_count: int = 3, prefill_count: int = 1) -> SingleNodeTopology:
    prefill_workers = tuple(
        WorkerSpec(
            role="prefill",
            ordinal=i,
            gpu_ids=(i,),
            host="127.0.0.1",
            port=30000 + i,
        )
        for i in range(prefill_count)
    )
    decode_workers = tuple(
        WorkerSpec(
            role="decode",
            ordinal=i,
            gpu_ids=(prefill_count + i,),
            host="127.0.0.1",
            port=31000 + i,
        )
        for i in range(decode_count)
    )
    return SingleNodeTopology(
        model_path="/dev/null/test-model",
        prefill_workers=prefill_workers,
        decode_workers=decode_workers,
        direct_workers=(),
        router_host="127.0.0.1",
        router_port=8000,
        transfer_backend="mooncake",
        trust_remote_code=True,
    )


def make_request(
    *,
    session_id: str = "sess-1",
    turn_id: int = 0,
    hash_ids: tuple[int, ...] = (),
    input_length: int = 1024,
    output_length: int = 64,
) -> TraceRequest:
    return TraceRequest(
        request_id=f"{session_id}-t{turn_id}",
        session_id=session_id,
        chat_id=int(turn_id),
        parent_chat_id=-1 if turn_id == 0 else int(turn_id - 1),
        timestamp_s=float(turn_id),
        input_length=input_length,
        output_length=output_length,
        request_type="user",
        turn_id=turn_id,
        hash_ids=hash_ids,
    )