15 lines
219 B
JSON
15 lines
219 B
JSON
{
|
|
"prefill_service_by_bucket": {
|
|
"4k": {
|
|
"tp4_ms": 320,
|
|
"tp8_ms": 240
|
|
}
|
|
},
|
|
"queueing_knee_by_bucket": {
|
|
"4k": {
|
|
"tp4_tok_s_per_gpu": 1000,
|
|
"tp8_tok_s_per_gpu": 1100
|
|
}
|
|
}
|
|
}
|