//! KV-aware routing via meta-store candidate selection + precise probing. //! //! The global meta store is used as a *candidate pre-filter*: we score //! every instance's predicted prefix from the store, take the top-K by //! (predicted_prefix DESC, load ASC), and then exact-probe those K //! candidates' actual L0+L1 caches to get the true longest prefix. This //! catches two cases where the meta store is wrong: //! //! - the store is stale (block evicted from L0/L1 but TTL not yet up), //! - the store undercounts because some blocks' TTL expired individually. //! //! Because the candidate set is sourced from the meta store rather than //! from a load ranking, this router is a strict superset of `ttl_aware`: //! any instance the meta store would pick is a candidate here, and the //! exact probe can only move the decision toward a truthfully-better //! instance. Each probe adds `probe_latency_s` to the request's //! effective arrival time. //! //! If the meta store returns zero-prefix for every instance (e.g. cold //! start, or a request whose blocks have never been seen), we fall back //! to the top-K least-loaded instances so we still place the request. use crate::cluster::meta_store::MetaStore; use crate::instance::Instance; use crate::router::{CandidateInfo, RouteDecision, Router}; use crate::trace::RequestRecord; pub struct PreciseRouter { pub topk: u32, pub probe_latency_s: f64, pub alpha: f64, } impl PreciseRouter { pub fn new(topk: u32, probe_latency_s: f64, alpha: f64) -> Self { Self { topk, probe_latency_s, alpha } } fn load_of(&self, inst: &Instance) -> f64 { inst.kv_blocks_used as f64 + self.alpha * inst.queue_len() as f64 } } impl Router for PreciseRouter { fn name(&self) -> &'static str { "precise" } fn route( &mut self, req: &RequestRecord, instances: &[Instance], meta: &MetaStore, now: f64, ) -> RouteDecision { let n = instances.len(); let k = (self.topk as usize).min(n).max(1); // 1. Meta-store candidate set: rank all instances by // (predicted_prefix DESC, load ASC) and take the top-K. let meta_scores = meta.score_prefix(&req.hash_ids, now, n); let any_meta_hit = meta_scores.iter().any(|&p| p > 0); let mut ranked: Vec = (0..n).collect(); if any_meta_hit { ranked.sort_by(|&a, &b| { let pa = meta_scores[a]; let pb = meta_scores[b]; // prefix desc, then load asc pb.cmp(&pa) .then_with(|| { self.load_of(&instances[a]) .partial_cmp(&self.load_of(&instances[b])) .unwrap_or(std::cmp::Ordering::Equal) }) }); } else { // Cold start fallback: pure load order. ranked.sort_by(|&a, &b| { self.load_of(&instances[a]) .partial_cmp(&self.load_of(&instances[b])) .unwrap_or(std::cmp::Ordering::Equal) }); } let probed = &ranked[..k]; // 2. Exact probe each candidate and pick // argmax(exact_prefix, tiebreak: -load). let mut candidates = Vec::with_capacity(k); let mut best = probed[0] as u32; let mut best_key: (i64, f64) = (i64::MIN, f64::INFINITY); for &i in probed { let inst = &instances[i]; let l0 = inst.cache.l0.longest_prefix_peek(&req.hash_ids); let l1 = inst.cache.l1.longest_prefix_peek(&req.hash_ids[l0..]); let predicted = (l0 + l1) as u32; let load = self.load_of(inst); candidates.push(CandidateInfo { instance: inst.id, predicted_prefix: predicted, load_blocks: inst.kv_blocks_used, queue_len: inst.queue_len(), }); let key = (predicted as i64, -load); if key > (best_key.0, -best_key.1) { best_key = (predicted as i64, load); best = inst.id; } } RouteDecision { req_id: req.req_id, mode: "precise", chosen: best, probe_overhead_s: k as f64 * self.probe_latency_s, candidates, reason: "exact-probe top-K meta-store candidates", } } }