feat: new router and benchmark setup

This commit is contained in:
2026-04-16 14:23:53 +08:00
parent c86d931d8f
commit 996511f300
35 changed files with 1480 additions and 76 deletions

View File

@@ -58,8 +58,11 @@ impl Router for EstimatedTtftRouter {
let miss_tokens = residency.miss_blocks.saturating_mul(inst.block_size_tokens);
let kv_prepare = self.ttft_model.kv_prepare_time_s(residency);
let first_token_tail = self.ttft_model.first_token_tail_s();
let cost =
drain + scheduler + kv_prepare + inst.compute.prefill_time(miss_tokens) + first_token_tail;
let cost = drain
+ scheduler
+ kv_prepare
+ inst.compute.prefill_time(miss_tokens)
+ first_token_tail;
candidates.push(CandidateInfo {
instance: inst.id,
@@ -72,7 +75,8 @@ impl Router for EstimatedTtftRouter {
// Minimise (cost, queue_len, -local_prefix).
let ql = inst.queue_len();
let reusable = residency.l0_hit_blocks + residency.l1_hit_blocks + residency.remote_hit_blocks;
let reusable =
residency.l0_hit_blocks + residency.l1_hit_blocks + residency.remote_hit_blocks;
let better = cost < best_cost
|| (cost == best_cost && ql < best_queue)
|| (cost == best_cost && ql == best_queue && reusable > best_reuse);