Add generic decode-only harness guidance
This commit is contained in:
@@ -486,6 +486,52 @@ class CoreFlowTests(unittest.TestCase):
|
||||
self.assertIn("There is no TTFT SLO for this study.", prompt)
|
||||
self.assertIn("decode-only", prompt)
|
||||
|
||||
def test_decode_only_harness_defaults_to_decode_tpot(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
study_path = _write_study_assets(
|
||||
tmp_path,
|
||||
trace_overrides={"request_mode": "decode_only"},
|
||||
slo_overrides={
|
||||
"ttft_rule": None,
|
||||
"tpot_rule": {"kind": "fixed_ms", "threshold_ms": 20},
|
||||
},
|
||||
engine_overrides={
|
||||
"tunable_flags": [
|
||||
"tensor-parallel-size",
|
||||
"data-parallel-size",
|
||||
"max-num-seqs",
|
||||
"max-num-batched-tokens",
|
||||
],
|
||||
"topology_constraints": {
|
||||
"allowed_tensor_parallel_sizes": [1, 2, 4, 8],
|
||||
"allowed_data_parallel_sizes": [1, 2, 4, 8],
|
||||
"allowed_tp_dp_products": [8],
|
||||
"require_tp_dp_product_equals_gpu_count": True,
|
||||
},
|
||||
},
|
||||
)
|
||||
study = load_study_spec(study_path)
|
||||
window, requests = load_trace_requests(study, study_spec_path=study_path)
|
||||
context = build_harness_context(
|
||||
study=study,
|
||||
window_summary=summarize_window(requests, window),
|
||||
state=StudyState(study_id=study.study_id),
|
||||
)
|
||||
active = {
|
||||
harness["knob_family"]
|
||||
for harness in context["knob_harnesses"]
|
||||
if harness["active_now"]
|
||||
}
|
||||
self.assertIn("tensor-parallel-size", active)
|
||||
self.assertIn("data-parallel-size", active)
|
||||
self.assertIn("max-num-seqs", active)
|
||||
self.assertIn("max-num-batched-tokens", active)
|
||||
self.assertIn(
|
||||
"For decode_only studies, ignore TTFT",
|
||||
"\n".join(context["proposal_rules"]),
|
||||
)
|
||||
|
||||
def test_load_study_spec_rejects_mismatched_served_model_name(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_path = Path(tmp)
|
||||
|
||||
Reference in New Issue
Block a user