Files
kvcache-simulator/src/router/cache_load.rs

90 lines
2.7 KiB
Rust

//! Load-filtered cache-aware routing.
//!
//! **Step 1** — filter: sort all instances by `queue_len` ascending and take the
//! least-loaded quarter (≥ 2 instances).
//!
//! **Step 2** — select: among that pool, pick the instance with the highest
//! meta-store prefix score. Tiebreak on lowest `queue_len`.
//!
//! This cleanly separates concerns: step 1 guarantees the request won't land
//! on a saturated instance, while step 2 maximises cache reuse within the
//! load-safe pool. The 1/4 fraction keeps the pool large enough that good
//! cache candidates are rarely excluded.
use crate::cluster::meta_store::MetaStore;
use crate::instance::Instance;
use crate::router::{local_l0_scores, CandidateInfo, RouteDecision, Router};
use crate::trace::RequestRecord;
pub struct CacheLoadRouter;
impl CacheLoadRouter {
pub fn new() -> Self {
Self
}
}
impl Default for CacheLoadRouter {
fn default() -> Self {
Self::new()
}
}
impl Router for CacheLoadRouter {
fn name(&self) -> &'static str {
"cache_load"
}
fn route(
&mut self,
req: &RequestRecord,
instances: &[Instance],
_meta: &MetaStore,
_now: f64,
) -> RouteDecision {
let n = instances.len();
let scores = local_l0_scores(req, instances);
// Step 1: least-loaded 1/4 of instances (by queue_len).
let pool_size = (n / 4).max(2).min(n);
let mut indices: Vec<usize> = (0..n).collect();
indices.sort_by_key(|&i| instances[i].queue_len());
let pool = &indices[..pool_size];
// Step 2: among the pool, pick highest prefix score.
// Tiebreak: lowest queue_len.
let mut best_idx = pool[0];
let mut best_prefix = scores[pool[0]];
let mut best_queue = instances[pool[0]].queue_len();
for &i in &pool[1..] {
let p = scores[i];
let q = instances[i].queue_len();
if p > best_prefix || (p == best_prefix && q < best_queue) {
best_idx = i;
best_prefix = p;
best_queue = q;
}
}
let mut candidates = Vec::with_capacity(pool_size);
for &i in pool {
candidates.push(CandidateInfo {
instance: instances[i].id,
predicted_prefix: scores[i],
load_blocks: instances[i].kv_blocks_used,
queue_len: instances[i].queue_len(),
});
}
crate::router::local_route_decision(
req.req_id,
"cache_load",
instances[best_idx].id,
0.0,
candidates,
"least-loaded 1/4, then best local L0 prefix",
)
}
}