kvcache-simulator/src/router/precise_aware.rs

//! KV-aware routing via meta-store candidate selection + precise probing.
//!
//! The global meta store is used as a *candidate pre-filter*: we score
//! every instance's predicted prefix from the store, take the top-K by
//! (predicted_prefix DESC, load ASC), and then exact-probe those K
//! candidates' actual L0+L1 caches to get the true longest prefix. This
//! catches two cases where the meta store is wrong:
//!
//!   - the store is stale (block evicted from L0/L1 but TTL not yet up),
//!   - the store undercounts because some blocks' TTL expired individually.
//!
//! Because the candidate set is sourced from the meta store rather than
//! from a load ranking, this router is a strict superset of `ttl_aware`:
//! any instance the meta store would pick is a candidate here, and the
//! exact probe can only move the decision toward a truthfully-better
//! instance. Each probe adds `probe_latency_s` to the request's
//! effective arrival time.
//!
//! If the meta store returns zero-prefix for every instance (e.g. cold
//! start, or a request whose blocks have never been seen), we fall back
//! to the top-K least-loaded instances so we still place the request.

use crate::cluster::meta_store::MetaStore;
use crate::instance::Instance;
use crate::router::{CandidateInfo, RouteDecision, Router};
use crate::trace::RequestRecord;

pub struct PreciseRouter {
    pub topk: u32,
    pub probe_latency_s: f64,
    pub alpha: f64,
}

impl PreciseRouter {
    pub fn new(topk: u32, probe_latency_s: f64, alpha: f64) -> Self {
        Self { topk, probe_latency_s, alpha }
    }

    fn load_of(&self, inst: &Instance) -> f64 {
        inst.kv_blocks_used as f64 + self.alpha * inst.queue_len() as f64
    }
}

impl Router for PreciseRouter {
    fn name(&self) -> &'static str {
        "precise"
    }

    fn route(
        &mut self,
        req: &RequestRecord,
        instances: &[Instance],
        meta: &MetaStore,
        now: f64,
    ) -> RouteDecision {
        let n = instances.len();
        let k = (self.topk as usize).min(n).max(1);

        // 1. Meta-store candidate set: rank all instances by
        //    (predicted_prefix DESC, load ASC) and take the top-K.
        let meta_scores = meta.score_prefix(&req.hash_ids, now, n);
        let any_meta_hit = meta_scores.iter().any(|&p| p > 0);

        let mut ranked: Vec<usize> = (0..n).collect();
        if any_meta_hit {
            ranked.sort_by(|&a, &b| {
                let pa = meta_scores[a];
                let pb = meta_scores[b];
                // prefix desc, then load asc
                pb.cmp(&pa)
                    .then_with(|| {
                        self.load_of(&instances[a])
                            .partial_cmp(&self.load_of(&instances[b]))
                            .unwrap_or(std::cmp::Ordering::Equal)
                    })
            });
        } else {
            // Cold start fallback: pure load order.
            ranked.sort_by(|&a, &b| {
                self.load_of(&instances[a])
                    .partial_cmp(&self.load_of(&instances[b]))
                    .unwrap_or(std::cmp::Ordering::Equal)
            });
        }
        let probed = &ranked[..k];

        // 2. Exact probe each candidate and pick
        //    argmax(exact_prefix, tiebreak: -load).
        let mut candidates = Vec::with_capacity(k);
        let mut best = probed[0] as u32;
        let mut best_key: (i64, f64) = (i64::MIN, f64::INFINITY);
        for &i in probed {
            let inst = &instances[i];
            let l0 = inst.cache.l0.longest_prefix_peek(&req.hash_ids);
            let l1 = inst.cache.l1.longest_prefix_peek(&req.hash_ids[l0..]);
            let predicted = (l0 + l1) as u32;
            let load = self.load_of(inst);
            candidates.push(CandidateInfo {
                instance: inst.id,
                predicted_prefix: predicted,
                load_blocks: inst.kv_blocks_used,
                queue_len: inst.queue_len(),
            });
            let key = (predicted as i64, -load);
            if key > (best_key.0, -best_key.1) {
                best_key = (predicted as i64, load);
                best = inst.id;
            }
        }

        RouteDecision {
            req_id: req.req_id,
            mode: "precise",
            chosen: best,
            probe_overhead_s: k as f64 * self.probe_latency_s,
            candidates,
            reason: "exact-probe top-K meta-store candidates",
        }
    }
}