Add generic decode-only harness guidance

This commit is contained in:
2026-04-28 06:46:18 +08:00
parent 71902b9fc2
commit 39aa47fbf1
3 changed files with 124 additions and 7 deletions

View File

@@ -486,6 +486,52 @@ class CoreFlowTests(unittest.TestCase):
self.assertIn("There is no TTFT SLO for this study.", prompt)
self.assertIn("decode-only", prompt)
def test_decode_only_harness_defaults_to_decode_tpot(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
study_path = _write_study_assets(
tmp_path,
trace_overrides={"request_mode": "decode_only"},
slo_overrides={
"ttft_rule": None,
"tpot_rule": {"kind": "fixed_ms", "threshold_ms": 20},
},
engine_overrides={
"tunable_flags": [
"tensor-parallel-size",
"data-parallel-size",
"max-num-seqs",
"max-num-batched-tokens",
],
"topology_constraints": {
"allowed_tensor_parallel_sizes": [1, 2, 4, 8],
"allowed_data_parallel_sizes": [1, 2, 4, 8],
"allowed_tp_dp_products": [8],
"require_tp_dp_product_equals_gpu_count": True,
},
},
)
study = load_study_spec(study_path)
window, requests = load_trace_requests(study, study_spec_path=study_path)
context = build_harness_context(
study=study,
window_summary=summarize_window(requests, window),
state=StudyState(study_id=study.study_id),
)
active = {
harness["knob_family"]
for harness in context["knob_harnesses"]
if harness["active_now"]
}
self.assertIn("tensor-parallel-size", active)
self.assertIn("data-parallel-size", active)
self.assertIn("max-num-seqs", active)
self.assertIn("max-num-batched-tokens", active)
self.assertIn(
"For decode_only studies, ignore TTFT",
"\n".join(context["proposal_rules"]),
)
def test_load_study_spec_rejects_mismatched_served_model_name(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)