feat: new router and benchmark setup
This commit is contained in:
@@ -58,8 +58,11 @@ impl Router for EstimatedTtftRouter {
|
||||
let miss_tokens = residency.miss_blocks.saturating_mul(inst.block_size_tokens);
|
||||
let kv_prepare = self.ttft_model.kv_prepare_time_s(residency);
|
||||
let first_token_tail = self.ttft_model.first_token_tail_s();
|
||||
let cost =
|
||||
drain + scheduler + kv_prepare + inst.compute.prefill_time(miss_tokens) + first_token_tail;
|
||||
let cost = drain
|
||||
+ scheduler
|
||||
+ kv_prepare
|
||||
+ inst.compute.prefill_time(miss_tokens)
|
||||
+ first_token_tail;
|
||||
|
||||
candidates.push(CandidateInfo {
|
||||
instance: inst.id,
|
||||
@@ -72,7 +75,8 @@ impl Router for EstimatedTtftRouter {
|
||||
|
||||
// Minimise (cost, queue_len, -local_prefix).
|
||||
let ql = inst.queue_len();
|
||||
let reusable = residency.l0_hit_blocks + residency.l1_hit_blocks + residency.remote_hit_blocks;
|
||||
let reusable =
|
||||
residency.l0_hit_blocks + residency.l1_hit_blocks + residency.remote_hit_blocks;
|
||||
let better = cost < best_cost
|
||||
|| (cost == best_cost && ql < best_queue)
|
||||
|| (cost == best_cost && ql == best_queue && reusable > best_reuse);
|
||||
|
||||
Reference in New Issue
Block a user