Make harness runtime refinement memory safe
This commit is contained in:
@@ -751,7 +751,6 @@ class CoreFlowTests(unittest.TestCase):
|
||||
proposal.config_patch.flag_patch,
|
||||
{
|
||||
"tensor-parallel-size": 2,
|
||||
"gpu-memory-utilization": 0.95,
|
||||
"enable-chunked-prefill": True,
|
||||
"max-num-batched-tokens": 16384,
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user