Make harness runtime refinement memory safe

2026-05-06 17:37:31 +08:00
parent cf2e741550
commit 5d96689ea6
3 changed files with 136 additions and 4 deletions
--- a/tests/test_core_flow.py
+++ b/tests/test_core_flow.py
@@ -751,7 +751,6 @@ class CoreFlowTests(unittest.TestCase):
                proposal.config_patch.flag_patch,
                {
                    "tensor-parallel-size": 2,
-                    "gpu-memory-utilization": 0.95,
                    "enable-chunked-prefill": True,
                    "max-num-batched-tokens": 16384,
                },