Files
aituner/tests/test_mechanism_planner.py

86 lines
3.4 KiB
Python

from __future__ import annotations
import unittest
from aituner.engine_adapters.vllm import default_vllm_descriptors
from aituner.knob_descriptor import KnobConstraints, KnobDescriptor
from aituner.mechanism_planner import (
CoordinateSearchPolicy,
coordinate_line_search_candidates,
)
class MechanismPlannerTests(unittest.TestCase):
def test_coordinate_search_uses_mechanism_not_knob_name(self) -> None:
vllm_descriptor = default_vllm_descriptors(tunable_flags=("max-num-seqs",))[0]
sglang_descriptor = KnobDescriptor(
name="max-running-requests",
location="flag",
value_type="int",
mechanisms=("admission_capacity", "kv_memory_pressure"),
search_geometry="positive_capacity",
operators=("coordinate_line_search",),
constraints=KnobConstraints(min_value=1, integer=True, multiple_of=8),
directional_effects={
"increase": ("admission_capacity",),
"decrease": ("kv_memory_pressure",),
},
)
vllm_candidates = coordinate_line_search_candidates(
current_config={"max-num-seqs": 8},
descriptors=(vllm_descriptor,),
evidence_weights={"admission_capacity": 0.9},
)
sglang_candidates = coordinate_line_search_candidates(
current_config={"max-running-requests": 8},
descriptors=(sglang_descriptor,),
evidence_weights={"admission_capacity": 0.9},
)
self.assertEqual(vllm_candidates[0].patch, {"max-num-seqs": 16})
self.assertEqual(sglang_candidates[0].patch, {"max-running-requests": 16})
self.assertEqual(vllm_candidates[0].mechanism, "admission_capacity")
self.assertEqual(sglang_candidates[0].mechanism, "admission_capacity")
def test_positive_capacity_can_decrease_for_memory_pressure(self) -> None:
descriptor = default_vllm_descriptors(tunable_flags=("max-num-seqs",))[0]
candidates = coordinate_line_search_candidates(
current_config={"max-num-seqs": 64},
descriptors=(descriptor,),
evidence_weights={"kv_memory_pressure": 0.8},
)
self.assertEqual(candidates[0].direction, "decrease")
self.assertEqual(candidates[0].patch, {"max-num-seqs": 32})
def test_bounded_fraction_respects_constraints(self) -> None:
descriptor = default_vllm_descriptors(tunable_flags=("gpu-memory-utilization",))[0]
candidates = coordinate_line_search_candidates(
current_config={"gpu-memory-utilization": 0.96},
descriptors=(descriptor,),
evidence_weights={"kv_memory_capacity": 0.8},
)
self.assertEqual(candidates[0].patch, {"gpu-memory-utilization": 0.97})
def test_coordinate_search_can_emit_larger_same_operator_steps(self) -> None:
descriptor = default_vllm_descriptors(tunable_flags=("max-num-seqs",))[0]
candidates = coordinate_line_search_candidates(
current_config={"max-num-seqs": 8},
descriptors=(descriptor,),
evidence_weights={"admission_capacity": 0.9},
policy=CoordinateSearchPolicy(step_multipliers=(1.0, 2.0)),
)
patches = [candidate.patch for candidate in candidates]
self.assertIn({"max-num-seqs": 16}, patches)
self.assertIn({"max-num-seqs": 24}, patches)
if __name__ == "__main__":
unittest.main()