feat: new router and benchmark setup
This commit is contained in:
@@ -54,7 +54,11 @@ impl Cluster {
|
||||
router,
|
||||
block_size_tokens: model.block_size_tokens,
|
||||
kv_block_bytes: model.kv_block_bytes(),
|
||||
ttft_model: TtftModel::new(&config.hardware, &config.calibration, model.kv_block_bytes()),
|
||||
ttft_model: TtftModel::new(
|
||||
&config.hardware,
|
||||
&config.calibration,
|
||||
model.kv_block_bytes(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -256,6 +260,8 @@ mod tests {
|
||||
let req = RequestRecord {
|
||||
req_id: 1,
|
||||
chat_id: 0,
|
||||
parent_chat_id: -1,
|
||||
turn: 1,
|
||||
arrival: 0.0,
|
||||
input_len: 32,
|
||||
output_len: 16,
|
||||
@@ -269,7 +275,10 @@ mod tests {
|
||||
.insert_blocks(&req.hash_ids, &mut evicted);
|
||||
|
||||
let stats = cluster.route_and_admit(&req, 0.0);
|
||||
let pure_pcie = cluster.instances[0].links.pcie.cost(cluster.kv_block_bytes * 2);
|
||||
let pure_pcie = cluster.instances[0]
|
||||
.links
|
||||
.pcie
|
||||
.cost(cluster.kv_block_bytes * 2);
|
||||
|
||||
assert!(stats.ready_at > pure_pcie);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user