from __future__ import annotations import unittest from aituner.engine_adapters.vllm import default_vllm_descriptors from aituner.knob_descriptor import KnobConstraints, KnobDescriptor from aituner.mechanism_planner import ( CoordinateSearchPolicy, coordinate_line_search_candidates, ) class MechanismPlannerTests(unittest.TestCase): def test_coordinate_search_uses_mechanism_not_knob_name(self) -> None: vllm_descriptor = default_vllm_descriptors(tunable_flags=("max-num-seqs",))[0] sglang_descriptor = KnobDescriptor( name="max-running-requests", location="flag", value_type="int", mechanisms=("admission_capacity", "kv_memory_pressure"), search_geometry="positive_capacity", operators=("coordinate_line_search",), constraints=KnobConstraints(min_value=1, integer=True, multiple_of=8), directional_effects={ "increase": ("admission_capacity",), "decrease": ("kv_memory_pressure",), }, ) vllm_candidates = coordinate_line_search_candidates( current_config={"max-num-seqs": 8}, descriptors=(vllm_descriptor,), evidence_weights={"admission_capacity": 0.9}, ) sglang_candidates = coordinate_line_search_candidates( current_config={"max-running-requests": 8}, descriptors=(sglang_descriptor,), evidence_weights={"admission_capacity": 0.9}, ) self.assertEqual(vllm_candidates[0].patch, {"max-num-seqs": 16}) self.assertEqual(sglang_candidates[0].patch, {"max-running-requests": 16}) self.assertEqual(vllm_candidates[0].mechanism, "admission_capacity") self.assertEqual(sglang_candidates[0].mechanism, "admission_capacity") def test_positive_capacity_can_decrease_for_memory_pressure(self) -> None: descriptor = default_vllm_descriptors(tunable_flags=("max-num-seqs",))[0] candidates = coordinate_line_search_candidates( current_config={"max-num-seqs": 64}, descriptors=(descriptor,), evidence_weights={"kv_memory_pressure": 0.8}, ) self.assertEqual(candidates[0].direction, "decrease") self.assertEqual(candidates[0].patch, {"max-num-seqs": 32}) def test_bounded_fraction_respects_constraints(self) -> None: descriptor = default_vllm_descriptors(tunable_flags=("gpu-memory-utilization",))[0] candidates = coordinate_line_search_candidates( current_config={"gpu-memory-utilization": 0.96}, descriptors=(descriptor,), evidence_weights={"kv_memory_capacity": 0.8}, ) self.assertEqual(candidates[0].patch, {"gpu-memory-utilization": 0.97}) def test_coordinate_search_can_emit_larger_same_operator_steps(self) -> None: descriptor = default_vllm_descriptors(tunable_flags=("max-num-seqs",))[0] candidates = coordinate_line_search_candidates( current_config={"max-num-seqs": 8}, descriptors=(descriptor,), evidence_weights={"admission_capacity": 0.9}, policy=CoordinateSearchPolicy(step_multipliers=(1.0, 2.0)), ) patches = [candidate.patch for candidate in candidates] self.assertIn({"max-num-seqs": 16}, patches) self.assertIn({"max-num-seqs": 24}, patches) if __name__ == "__main__": unittest.main()