//! Offline oracle analyzers for upper-bound KV-cache hit rates. //! //! Two analyses, both treating the cluster as a single aggregated cache so //! the result is independent of routing — i.e. they answer the question //! "what is the best the cluster could possibly do?": //! //! 1. **Unlimited capacity**: longest-prefix-match against an unbounded //! cache. The only misses are blocks that the prefix walk encounters for //! the first time. Sets the absolute ceiling. //! //! 2. **Belady (offline optimal eviction) at finite capacity**: classic //! OPT replacement — evict the cached block whose *next* access is //! furthest in the future. Run alongside an LRU baseline at the same //! capacity so the gap tells you how much room LRU is leaving. //! //! Hit accounting uses prefix-match semantics matching the rest of the //! simulator: a block at position k in a request counts as a hit only if //! all positions 0..k are also in the cache. use ahash::{AHashMap, AHashSet}; use serde::Serialize; use std::collections::BinaryHeap; use crate::instance::kv_cache::LruBlocks; use crate::trace::RequestRecord; #[derive(Debug, Clone, Serialize)] pub struct OracleResult { pub num_requests: u64, pub total_blocks: u64, pub unique_blocks: u64, pub unlimited: TierResult, pub belady_finite: TierResult, pub lru_finite: TierResult, } #[derive(Debug, Clone, Serialize, Default)] pub struct TierResult { pub label: String, pub capacity_blocks: u64, pub hits: u64, pub misses: u64, pub hit_rate: f64, } impl TierResult { fn from_counts(label: &str, capacity_blocks: u64, hits: u64, total: u64) -> Self { let misses = total.saturating_sub(hits); TierResult { label: label.to_string(), capacity_blocks, hits, misses, hit_rate: if total == 0 { 0.0 } else { hits as f64 / total as f64 }, } } } pub fn analyze(records: &[RequestRecord], capacity_blocks: u64) -> OracleResult { // total / unique counters let total_blocks: u64 = records.iter().map(|r| r.hash_ids.len() as u64).sum(); let mut unique = AHashSet::new(); for r in records { for &h in &r.hash_ids { unique.insert(h); } } // 1. Unlimited cache let unlimited_hits = run_unlimited(records); let unlimited = TierResult::from_counts("unlimited", u64::MAX, unlimited_hits, total_blocks); // 2. Precompute next-use index for Belady let next_use = build_next_use(records); // 3. Belady at the given capacity let belady_hits = run_belady(records, &next_use, capacity_blocks as usize); let belady = TierResult::from_counts("belady", capacity_blocks, belady_hits, total_blocks); // 4. LRU baseline at the same capacity let lru_hits = run_lru(records, capacity_blocks as usize); let lru = TierResult::from_counts("lru", capacity_blocks, lru_hits, total_blocks); OracleResult { num_requests: records.len() as u64, total_blocks, unique_blocks: unique.len() as u64, unlimited, belady_finite: belady, lru_finite: lru, } } fn run_unlimited(records: &[RequestRecord]) -> u64 { let mut seen: AHashSet = AHashSet::with_capacity(1 << 18); let mut hits: u64 = 0; for r in records { // Longest prefix match against `seen` for &h in &r.hash_ids { if seen.contains(&h) { hits += 1; } else { break; } } for &h in &r.hash_ids { seen.insert(h); } } hits } fn run_lru(records: &[RequestRecord], capacity: usize) -> u64 { if capacity == 0 { return 0; } let mut cache = LruBlocks::new(capacity); let mut hits: u64 = 0; let mut evicted = Vec::new(); for r in records { hits += cache.longest_prefix(&r.hash_ids) as u64; evicted.clear(); cache.insert_blocks(&r.hash_ids, &mut evicted); } hits } /// For each (request_idx, position_in_hash_ids) compute the next request /// index whose `hash_ids` contains the same block (`u32::MAX` if none). fn build_next_use(records: &[RequestRecord]) -> Vec> { let n = records.len(); let mut next_use: Vec> = Vec::with_capacity(n); for r in records { next_use.push(vec![u32::MAX; r.hash_ids.len()]); } let mut last_seen: AHashMap = AHashMap::with_capacity(1 << 18); for i in (0..n).rev() { let r = &records[i]; for (j, &h) in r.hash_ids.iter().enumerate() { next_use[i][j] = *last_seen.get(&h).unwrap_or(&u32::MAX); } for &h in &r.hash_ids { last_seen.insert(h, i as u32); } } next_use } /// Belady (offline OPT) eviction over the trace. /// /// Implementation: lazy-deletion max-heap keyed by next-use index. Each /// cache entry has a version; the heap may contain stale entries from /// previous insertions, which we skip on pop. fn run_belady(records: &[RequestRecord], next_use: &[Vec], capacity: usize) -> u64 { if capacity == 0 { return 0; } // block_hash -> (current_version, current_next_use) let mut in_cache: AHashMap = AHashMap::with_capacity(capacity); // (next_use, version, block_hash) — BinaryHeap is max-heap, which is what // we want for "evict the entry whose next access is furthest". let mut heap: BinaryHeap<(u32, u64, u64)> = BinaryHeap::with_capacity(capacity); let mut version: u64 = 0; let mut hits: u64 = 0; for (i, r) in records.iter().enumerate() { // 1. Longest-prefix hit accounting against current cache. for &h in &r.hash_ids { if in_cache.contains_key(&h) { hits += 1; } else { break; } } // 2. Insert / update each block in the request with its new next-use. for (j, &h) in r.hash_ids.iter().enumerate() { let nu = next_use[i][j]; if let Some(slot) = in_cache.get_mut(&h) { version += 1; slot.0 = version; slot.1 = nu; heap.push((nu, version, h)); continue; } // Need to make room? if in_cache.len() == capacity { // Evict max next_use entry, skipping stale heap entries. loop { let (nu_top, ver_top, h_top) = match heap.pop() { Some(x) => x, None => break, }; if let Some(&(cur_ver, cur_nu)) = in_cache.get(&h_top) { if cur_ver == ver_top && cur_nu == nu_top { in_cache.remove(&h_top); break; } } // stale; loop } } version += 1; in_cache.insert(h, (version, nu)); heap.push((nu, version, h)); } } hits } #[cfg(test)] mod tests { use super::*; fn req(id: u64, t: f64, hashes: Vec) -> RequestRecord { RequestRecord { req_id: id, chat_id: id as i64, arrival: t, input_len: (hashes.len() as u32) * 16, output_len: 16, hash_ids: hashes, } } #[test] fn unlimited_first_occurrence_misses() { let recs = vec![ req(0, 0.0, vec![1, 2, 3]), req(1, 1.0, vec![1, 2, 3, 4]), req(2, 2.0, vec![1, 2, 3, 4, 5]), ]; let out = analyze(&recs, 100); // total = 3 + 4 + 5 = 12 assert_eq!(out.total_blocks, 12); // unique = {1,2,3,4,5} = 5 assert_eq!(out.unique_blocks, 5); // unlimited hits = 0 (req 0 all miss) + 3 (req 1 has [1,2,3] cached, then 4 miss) + 4 assert_eq!(out.unlimited.hits, 7); assert!((out.unlimited.hit_rate - 7.0 / 12.0).abs() < 1e-9); } #[test] fn belady_beats_lru_when_lru_thrashes() { // Capacity 2. Pattern designed so LRU thrashes but Belady keeps the // useful block: A B A C A B A C A ... let mut recs = Vec::new(); let pattern = [1u64, 2, 1, 3, 1, 2, 1, 3]; for (i, &h) in pattern.iter().enumerate() { recs.push(req(i as u64, i as f64, vec![h])); } let out = analyze(&recs, 2); assert!( out.belady_finite.hits >= out.lru_finite.hits, "belady should be at least as good as lru: belady={} lru={}", out.belady_finite.hits, out.lru_finite.hits ); } #[test] fn unlimited_is_upper_bound() { let recs = vec![ req(0, 0.0, vec![10, 20, 30]), req(1, 1.0, vec![10, 20, 30, 40, 50]), req(2, 2.0, vec![60]), req(3, 3.0, vec![10, 20, 30, 40, 50, 60]), ]; let out = analyze(&recs, 3); assert!(out.unlimited.hit_rate >= out.belady_finite.hit_rate); assert!(out.belady_finite.hit_rate >= out.lru_finite.hit_rate - 1e-9); } }