Make harness runtime refinement memory safe

This commit is contained in:
2026-05-06 17:37:31 +08:00
parent cf2e741550
commit 5d96689ea6
3 changed files with 136 additions and 4 deletions

View File

@@ -751,7 +751,6 @@ class CoreFlowTests(unittest.TestCase):
proposal.config_patch.flag_patch,
{
"tensor-parallel-size": 2,
"gpu-memory-utilization": 0.95,
"enable-chunked-prefill": True,
"max-num-batched-tokens": 16384,
},