kvcache-simulator/src/router/cache_load.rs

//! Load-filtered cache-aware routing.
//!
//! **Step 1** — filter: sort all instances by `queue_len` ascending and take the
//! least-loaded quarter (≥ 2 instances).
//!
//! **Step 2** — select: among that pool, pick the instance with the highest
//! meta-store prefix score.  Tiebreak on lowest `queue_len`.
//!
//! This cleanly separates concerns: step 1 guarantees the request won't land
//! on a saturated instance, while step 2 maximises cache reuse within the
//! load-safe pool.  The 1/4 fraction keeps the pool large enough that good
//! cache candidates are rarely excluded.

use crate::cluster::meta_store::MetaStore;
use crate::instance::Instance;
use crate::router::{local_l0_scores, CandidateInfo, RouteDecision, Router};
use crate::trace::RequestRecord;

pub struct CacheLoadRouter;

impl CacheLoadRouter {
    pub fn new() -> Self {
        Self
    }
}

impl Default for CacheLoadRouter {
    fn default() -> Self {
        Self::new()
    }
}

impl Router for CacheLoadRouter {
    fn name(&self) -> &'static str {
        "cache_load"
    }

    fn route(
        &mut self,
        req: &RequestRecord,
        instances: &[Instance],
        _meta: &MetaStore,
        _now: f64,
    ) -> RouteDecision {
        let n = instances.len();
        let scores = local_l0_scores(req, instances);

        // Step 1: least-loaded 1/4 of instances (by queue_len).
        let pool_size = (n / 4).max(2).min(n);
        let mut indices: Vec<usize> = (0..n).collect();
        indices.sort_by_key(|&i| instances[i].queue_len());
        let pool = &indices[..pool_size];

        // Step 2: among the pool, pick highest prefix score.
        // Tiebreak: lowest queue_len.
        let mut best_idx = pool[0];
        let mut best_prefix = scores[pool[0]];
        let mut best_queue = instances[pool[0]].queue_len();

        for &i in &pool[1..] {
            let p = scores[i];
            let q = instances[i].queue_len();
            if p > best_prefix || (p == best_prefix && q < best_queue) {
                best_idx = i;
                best_prefix = p;
                best_queue = q;
            }
        }

        let mut candidates = Vec::with_capacity(pool_size);
        for &i in pool {
            candidates.push(CandidateInfo {
                instance: instances[i].id,
                predicted_prefix: scores[i],
                load_blocks: instances[i].kv_blocks_used,
                queue_len: instances[i].queue_len(),
            });
        }

        crate::router::local_route_decision(
            req.req_id,
            "cache_load",
            instances[best_idx].id,
            0.0,
            candidates,
            "least-loaded 1/4, then best local L0 prefix",
        )
    }
}