86 lines
3.4 KiB
Python
86 lines
3.4 KiB
Python
from __future__ import annotations
|
|
|
|
import unittest
|
|
|
|
from aituner.engine_adapters.vllm import default_vllm_descriptors
|
|
from aituner.knob_descriptor import KnobConstraints, KnobDescriptor
|
|
from aituner.mechanism_planner import (
|
|
CoordinateSearchPolicy,
|
|
coordinate_line_search_candidates,
|
|
)
|
|
|
|
|
|
class MechanismPlannerTests(unittest.TestCase):
|
|
def test_coordinate_search_uses_mechanism_not_knob_name(self) -> None:
|
|
vllm_descriptor = default_vllm_descriptors(tunable_flags=("max-num-seqs",))[0]
|
|
sglang_descriptor = KnobDescriptor(
|
|
name="max-running-requests",
|
|
location="flag",
|
|
value_type="int",
|
|
mechanisms=("admission_capacity", "kv_memory_pressure"),
|
|
search_geometry="positive_capacity",
|
|
operators=("coordinate_line_search",),
|
|
constraints=KnobConstraints(min_value=1, integer=True, multiple_of=8),
|
|
directional_effects={
|
|
"increase": ("admission_capacity",),
|
|
"decrease": ("kv_memory_pressure",),
|
|
},
|
|
)
|
|
|
|
vllm_candidates = coordinate_line_search_candidates(
|
|
current_config={"max-num-seqs": 8},
|
|
descriptors=(vllm_descriptor,),
|
|
evidence_weights={"admission_capacity": 0.9},
|
|
)
|
|
sglang_candidates = coordinate_line_search_candidates(
|
|
current_config={"max-running-requests": 8},
|
|
descriptors=(sglang_descriptor,),
|
|
evidence_weights={"admission_capacity": 0.9},
|
|
)
|
|
|
|
self.assertEqual(vllm_candidates[0].patch, {"max-num-seqs": 16})
|
|
self.assertEqual(sglang_candidates[0].patch, {"max-running-requests": 16})
|
|
self.assertEqual(vllm_candidates[0].mechanism, "admission_capacity")
|
|
self.assertEqual(sglang_candidates[0].mechanism, "admission_capacity")
|
|
|
|
def test_positive_capacity_can_decrease_for_memory_pressure(self) -> None:
|
|
descriptor = default_vllm_descriptors(tunable_flags=("max-num-seqs",))[0]
|
|
|
|
candidates = coordinate_line_search_candidates(
|
|
current_config={"max-num-seqs": 64},
|
|
descriptors=(descriptor,),
|
|
evidence_weights={"kv_memory_pressure": 0.8},
|
|
)
|
|
|
|
self.assertEqual(candidates[0].direction, "decrease")
|
|
self.assertEqual(candidates[0].patch, {"max-num-seqs": 32})
|
|
|
|
def test_bounded_fraction_respects_constraints(self) -> None:
|
|
descriptor = default_vllm_descriptors(tunable_flags=("gpu-memory-utilization",))[0]
|
|
|
|
candidates = coordinate_line_search_candidates(
|
|
current_config={"gpu-memory-utilization": 0.96},
|
|
descriptors=(descriptor,),
|
|
evidence_weights={"kv_memory_capacity": 0.8},
|
|
)
|
|
|
|
self.assertEqual(candidates[0].patch, {"gpu-memory-utilization": 0.97})
|
|
|
|
def test_coordinate_search_can_emit_larger_same_operator_steps(self) -> None:
|
|
descriptor = default_vllm_descriptors(tunable_flags=("max-num-seqs",))[0]
|
|
|
|
candidates = coordinate_line_search_candidates(
|
|
current_config={"max-num-seqs": 8},
|
|
descriptors=(descriptor,),
|
|
evidence_weights={"admission_capacity": 0.9},
|
|
policy=CoordinateSearchPolicy(step_multipliers=(1.0, 2.0)),
|
|
)
|
|
|
|
patches = [candidate.patch for candidate in candidates]
|
|
self.assertIn({"max-num-seqs": 16}, patches)
|
|
self.assertIn({"max-num-seqs": 24}, patches)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|