kvcache-simulator/src/oracle.rs

//! Offline oracle analyzers for upper-bound KV-cache hit rates.
//!
//! Two analyses, both treating the cluster as a single aggregated cache so
//! the result is independent of routing — i.e. they answer the question
//! "what is the best the cluster could possibly do?":
//!
//! 1. **Unlimited capacity**: longest-prefix-match against an unbounded
//!    cache. The only misses are blocks that the prefix walk encounters for
//!    the first time. Sets the absolute ceiling.
//!
//! 2. **Belady (offline optimal eviction) at finite capacity**: classic
//!    OPT replacement — evict the cached block whose *next* access is
//!    furthest in the future. Run alongside an LRU baseline at the same
//!    capacity so the gap tells you how much room LRU is leaving.
//!
//! Hit accounting uses prefix-match semantics matching the rest of the
//! simulator: a block at position k in a request counts as a hit only if
//! all positions 0..k are also in the cache.

use ahash::{AHashMap, AHashSet};
use serde::Serialize;
use std::collections::BinaryHeap;

use crate::instance::kv_cache::LruBlocks;
use crate::trace::RequestRecord;

#[derive(Debug, Clone, Serialize)]
pub struct OracleResult {
    pub num_requests: u64,
    pub total_blocks: u64,
    pub unique_blocks: u64,
    pub unlimited: TierResult,
    pub belady_finite: TierResult,
    pub lru_finite: TierResult,
}

#[derive(Debug, Clone, Serialize, Default)]
pub struct TierResult {
    pub label: String,
    pub capacity_blocks: u64,
    pub hits: u64,
    pub misses: u64,
    pub hit_rate: f64,
}

impl TierResult {
    fn from_counts(label: &str, capacity_blocks: u64, hits: u64, total: u64) -> Self {
        let misses = total.saturating_sub(hits);
        TierResult {
            label: label.to_string(),
            capacity_blocks,
            hits,
            misses,
            hit_rate: if total == 0 {
                0.0
            } else {
                hits as f64 / total as f64
            },
        }
    }
}

pub fn analyze(records: &[RequestRecord], capacity_blocks: u64) -> OracleResult {
    // total / unique counters
    let total_blocks: u64 = records.iter().map(|r| r.hash_ids.len() as u64).sum();
    let mut unique = AHashSet::new();
    for r in records {
        for &h in &r.hash_ids {
            unique.insert(h);
        }
    }

    // 1. Unlimited cache
    let unlimited_hits = run_unlimited(records);
    let unlimited = TierResult::from_counts("unlimited", u64::MAX, unlimited_hits, total_blocks);

    // 2. Precompute next-use index for Belady
    let next_use = build_next_use(records);

    // 3. Belady at the given capacity
    let belady_hits = run_belady(records, &next_use, capacity_blocks as usize);
    let belady = TierResult::from_counts("belady", capacity_blocks, belady_hits, total_blocks);

    // 4. LRU baseline at the same capacity
    let lru_hits = run_lru(records, capacity_blocks as usize);
    let lru = TierResult::from_counts("lru", capacity_blocks, lru_hits, total_blocks);

    OracleResult {
        num_requests: records.len() as u64,
        total_blocks,
        unique_blocks: unique.len() as u64,
        unlimited,
        belady_finite: belady,
        lru_finite: lru,
    }
}

fn run_unlimited(records: &[RequestRecord]) -> u64 {
    let mut seen: AHashSet<u64> = AHashSet::with_capacity(1 << 18);
    let mut hits: u64 = 0;
    for r in records {
        // Longest prefix match against `seen`
        for &h in &r.hash_ids {
            if seen.contains(&h) {
                hits += 1;
            } else {
                break;
            }
        }
        for &h in &r.hash_ids {
            seen.insert(h);
        }
    }
    hits
}

fn run_lru(records: &[RequestRecord], capacity: usize) -> u64 {
    if capacity == 0 {
        return 0;
    }
    let mut cache = LruBlocks::new(capacity);
    let mut hits: u64 = 0;
    let mut evicted = Vec::new();
    for r in records {
        hits += cache.longest_prefix(&r.hash_ids) as u64;
        evicted.clear();
        cache.insert_blocks(&r.hash_ids, &mut evicted);
    }
    hits
}

/// For each (request_idx, position_in_hash_ids) compute the next request
/// index whose `hash_ids` contains the same block (`u32::MAX` if none).
fn build_next_use(records: &[RequestRecord]) -> Vec<Vec<u32>> {
    let n = records.len();
    let mut next_use: Vec<Vec<u32>> = Vec::with_capacity(n);
    for r in records {
        next_use.push(vec![u32::MAX; r.hash_ids.len()]);
    }
    let mut last_seen: AHashMap<u64, u32> = AHashMap::with_capacity(1 << 18);
    for i in (0..n).rev() {
        let r = &records[i];
        for (j, &h) in r.hash_ids.iter().enumerate() {
            next_use[i][j] = *last_seen.get(&h).unwrap_or(&u32::MAX);
        }
        for &h in &r.hash_ids {
            last_seen.insert(h, i as u32);
        }
    }
    next_use
}

/// Belady (offline OPT) eviction over the trace.
///
/// Implementation: lazy-deletion max-heap keyed by next-use index. Each
/// cache entry has a version; the heap may contain stale entries from
/// previous insertions, which we skip on pop.
fn run_belady(records: &[RequestRecord], next_use: &[Vec<u32>], capacity: usize) -> u64 {
    if capacity == 0 {
        return 0;
    }
    // block_hash -> (current_version, current_next_use)
    let mut in_cache: AHashMap<u64, (u64, u32)> = AHashMap::with_capacity(capacity);
    // (next_use, version, block_hash) — BinaryHeap is max-heap, which is what
    // we want for "evict the entry whose next access is furthest".
    let mut heap: BinaryHeap<(u32, u64, u64)> = BinaryHeap::with_capacity(capacity);
    let mut version: u64 = 0;
    let mut hits: u64 = 0;

    for (i, r) in records.iter().enumerate() {
        // 1. Longest-prefix hit accounting against current cache.
        for &h in &r.hash_ids {
            if in_cache.contains_key(&h) {
                hits += 1;
            } else {
                break;
            }
        }

        // 2. Insert / update each block in the request with its new next-use.
        for (j, &h) in r.hash_ids.iter().enumerate() {
            let nu = next_use[i][j];
            if let Some(slot) = in_cache.get_mut(&h) {
                version += 1;
                slot.0 = version;
                slot.1 = nu;
                heap.push((nu, version, h));
                continue;
            }
            // Need to make room?
            if in_cache.len() == capacity {
                // Evict max next_use entry, skipping stale heap entries.
                loop {
                    let (nu_top, ver_top, h_top) = match heap.pop() {
                        Some(x) => x,
                        None => break,
                    };
                    if let Some(&(cur_ver, cur_nu)) = in_cache.get(&h_top) {
                        if cur_ver == ver_top && cur_nu == nu_top {
                            in_cache.remove(&h_top);
                            break;
                        }
                    }
                    // stale; loop
                }
            }
            version += 1;
            in_cache.insert(h, (version, nu));
            heap.push((nu, version, h));
        }
    }

    hits
}

#[cfg(test)]
mod tests {
    use super::*;

    fn req(id: u64, t: f64, hashes: Vec<u64>) -> RequestRecord {
        RequestRecord {
            req_id: id,
            chat_id: id as i64,
            arrival: t,
            input_len: (hashes.len() as u32) * 16,
            output_len: 16,
            hash_ids: hashes,
        }
    }

    #[test]
    fn unlimited_first_occurrence_misses() {
        let recs = vec![
            req(0, 0.0, vec![1, 2, 3]),
            req(1, 1.0, vec![1, 2, 3, 4]),
            req(2, 2.0, vec![1, 2, 3, 4, 5]),
        ];
        let out = analyze(&recs, 100);
        // total = 3 + 4 + 5 = 12
        assert_eq!(out.total_blocks, 12);
        // unique = {1,2,3,4,5} = 5
        assert_eq!(out.unique_blocks, 5);
        // unlimited hits = 0 (req 0 all miss) + 3 (req 1 has [1,2,3] cached, then 4 miss) + 4
        assert_eq!(out.unlimited.hits, 7);
        assert!((out.unlimited.hit_rate - 7.0 / 12.0).abs() < 1e-9);
    }

    #[test]
    fn belady_beats_lru_when_lru_thrashes() {
        // Capacity 2. Pattern designed so LRU thrashes but Belady keeps the
        // useful block: A B A C A B A C A ...
        let mut recs = Vec::new();
        let pattern = [1u64, 2, 1, 3, 1, 2, 1, 3];
        for (i, &h) in pattern.iter().enumerate() {
            recs.push(req(i as u64, i as f64, vec![h]));
        }
        let out = analyze(&recs, 2);
        assert!(
            out.belady_finite.hits >= out.lru_finite.hits,
            "belady should be at least as good as lru: belady={} lru={}",
            out.belady_finite.hits,
            out.lru_finite.hits
        );
    }

    #[test]
    fn unlimited_is_upper_bound() {
        let recs = vec![
            req(0, 0.0, vec![10, 20, 30]),
            req(1, 1.0, vec![10, 20, 30, 40, 50]),
            req(2, 2.0, vec![60]),
            req(3, 3.0, vec![10, 20, 30, 40, 50, 60]),
        ];
        let out = analyze(&recs, 3);
        assert!(out.unlimited.hit_rate >= out.belady_finite.hit_rate);
        assert!(out.belady_finite.hit_rate >= out.lru_finite.hit_rate - 1e-9);
    }
}