Use full state for frontier projection
This commit is contained in:
@@ -2856,6 +2856,67 @@ class CoreFlowTests(unittest.TestCase):
|
||||
},
|
||||
},
|
||||
),
|
||||
TrialSummary(
|
||||
trial_id="trial-0006",
|
||||
status="completed",
|
||||
parallel_size=4,
|
||||
best_request_rate=8.0,
|
||||
best_request_rate_per_gpu=2.0,
|
||||
config_patch={
|
||||
"env_patch": {},
|
||||
"flag_patch": {
|
||||
"tensor-parallel-size": 4,
|
||||
"gpu-memory-utilization": 0.9,
|
||||
"max-num-seqs": 16,
|
||||
},
|
||||
},
|
||||
),
|
||||
TrialSummary(
|
||||
trial_id="trial-0007",
|
||||
status="completed",
|
||||
parallel_size=4,
|
||||
best_request_rate=8.0,
|
||||
best_request_rate_per_gpu=2.0,
|
||||
config_patch={
|
||||
"env_patch": {},
|
||||
"flag_patch": {
|
||||
"tensor-parallel-size": 4,
|
||||
"gpu-memory-utilization": 0.92,
|
||||
},
|
||||
},
|
||||
),
|
||||
TrialSummary(
|
||||
trial_id="trial-0008",
|
||||
status="completed",
|
||||
parallel_size=4,
|
||||
best_request_rate=8.0,
|
||||
best_request_rate_per_gpu=2.0,
|
||||
config_patch={
|
||||
"env_patch": {},
|
||||
"flag_patch": {
|
||||
"tensor-parallel-size": 4,
|
||||
"gpu-memory-utilization": 0.9,
|
||||
"max-num-batched-tokens": 16384,
|
||||
"max-num-seqs": 16,
|
||||
},
|
||||
},
|
||||
),
|
||||
TrialSummary(
|
||||
trial_id="trial-0009",
|
||||
status="completed",
|
||||
parallel_size=4,
|
||||
best_request_rate=8.0,
|
||||
best_request_rate_per_gpu=2.0,
|
||||
config_patch={
|
||||
"env_patch": {},
|
||||
"flag_patch": {
|
||||
"tensor-parallel-size": 4,
|
||||
"gpu-memory-utilization": 0.9,
|
||||
"enable-chunked-prefill": True,
|
||||
"max-num-batched-tokens": 8192,
|
||||
},
|
||||
},
|
||||
),
|
||||
],
|
||||
)
|
||||
context = build_harness_context(
|
||||
|
||||
Reference in New Issue
Block a user