279 lines
9.1 KiB
Rust
279 lines
9.1 KiB
Rust
//! Offline oracle analyzers for upper-bound KV-cache hit rates.
|
|
//!
|
|
//! Two analyses, both treating the cluster as a single aggregated cache so
|
|
//! the result is independent of routing — i.e. they answer the question
|
|
//! "what is the best the cluster could possibly do?":
|
|
//!
|
|
//! 1. **Unlimited capacity**: longest-prefix-match against an unbounded
|
|
//! cache. The only misses are blocks that the prefix walk encounters for
|
|
//! the first time. Sets the absolute ceiling.
|
|
//!
|
|
//! 2. **Belady (offline optimal eviction) at finite capacity**: classic
|
|
//! OPT replacement — evict the cached block whose *next* access is
|
|
//! furthest in the future. Run alongside an LRU baseline at the same
|
|
//! capacity so the gap tells you how much room LRU is leaving.
|
|
//!
|
|
//! Hit accounting uses prefix-match semantics matching the rest of the
|
|
//! simulator: a block at position k in a request counts as a hit only if
|
|
//! all positions 0..k are also in the cache.
|
|
|
|
use ahash::{AHashMap, AHashSet};
|
|
use serde::Serialize;
|
|
use std::collections::BinaryHeap;
|
|
|
|
use crate::instance::kv_cache::LruBlocks;
|
|
use crate::trace::RequestRecord;
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct OracleResult {
|
|
pub num_requests: u64,
|
|
pub total_blocks: u64,
|
|
pub unique_blocks: u64,
|
|
pub unlimited: TierResult,
|
|
pub belady_finite: TierResult,
|
|
pub lru_finite: TierResult,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Default)]
|
|
pub struct TierResult {
|
|
pub label: String,
|
|
pub capacity_blocks: u64,
|
|
pub hits: u64,
|
|
pub misses: u64,
|
|
pub hit_rate: f64,
|
|
}
|
|
|
|
impl TierResult {
|
|
fn from_counts(label: &str, capacity_blocks: u64, hits: u64, total: u64) -> Self {
|
|
let misses = total.saturating_sub(hits);
|
|
TierResult {
|
|
label: label.to_string(),
|
|
capacity_blocks,
|
|
hits,
|
|
misses,
|
|
hit_rate: if total == 0 {
|
|
0.0
|
|
} else {
|
|
hits as f64 / total as f64
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn analyze(records: &[RequestRecord], capacity_blocks: u64) -> OracleResult {
|
|
// total / unique counters
|
|
let total_blocks: u64 = records.iter().map(|r| r.hash_ids.len() as u64).sum();
|
|
let mut unique = AHashSet::new();
|
|
for r in records {
|
|
for &h in &r.hash_ids {
|
|
unique.insert(h);
|
|
}
|
|
}
|
|
|
|
// 1. Unlimited cache
|
|
let unlimited_hits = run_unlimited(records);
|
|
let unlimited = TierResult::from_counts("unlimited", u64::MAX, unlimited_hits, total_blocks);
|
|
|
|
// 2. Precompute next-use index for Belady
|
|
let next_use = build_next_use(records);
|
|
|
|
// 3. Belady at the given capacity
|
|
let belady_hits = run_belady(records, &next_use, capacity_blocks as usize);
|
|
let belady = TierResult::from_counts("belady", capacity_blocks, belady_hits, total_blocks);
|
|
|
|
// 4. LRU baseline at the same capacity
|
|
let lru_hits = run_lru(records, capacity_blocks as usize);
|
|
let lru = TierResult::from_counts("lru", capacity_blocks, lru_hits, total_blocks);
|
|
|
|
OracleResult {
|
|
num_requests: records.len() as u64,
|
|
total_blocks,
|
|
unique_blocks: unique.len() as u64,
|
|
unlimited,
|
|
belady_finite: belady,
|
|
lru_finite: lru,
|
|
}
|
|
}
|
|
|
|
fn run_unlimited(records: &[RequestRecord]) -> u64 {
|
|
let mut seen: AHashSet<u64> = AHashSet::with_capacity(1 << 18);
|
|
let mut hits: u64 = 0;
|
|
for r in records {
|
|
// Longest prefix match against `seen`
|
|
for &h in &r.hash_ids {
|
|
if seen.contains(&h) {
|
|
hits += 1;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
for &h in &r.hash_ids {
|
|
seen.insert(h);
|
|
}
|
|
}
|
|
hits
|
|
}
|
|
|
|
fn run_lru(records: &[RequestRecord], capacity: usize) -> u64 {
|
|
if capacity == 0 {
|
|
return 0;
|
|
}
|
|
let mut cache = LruBlocks::new(capacity);
|
|
let mut hits: u64 = 0;
|
|
let mut evicted = Vec::new();
|
|
for r in records {
|
|
hits += cache.longest_prefix(&r.hash_ids) as u64;
|
|
evicted.clear();
|
|
cache.insert_blocks(&r.hash_ids, &mut evicted);
|
|
}
|
|
hits
|
|
}
|
|
|
|
/// For each (request_idx, position_in_hash_ids) compute the next request
|
|
/// index whose `hash_ids` contains the same block (`u32::MAX` if none).
|
|
fn build_next_use(records: &[RequestRecord]) -> Vec<Vec<u32>> {
|
|
let n = records.len();
|
|
let mut next_use: Vec<Vec<u32>> = Vec::with_capacity(n);
|
|
for r in records {
|
|
next_use.push(vec![u32::MAX; r.hash_ids.len()]);
|
|
}
|
|
let mut last_seen: AHashMap<u64, u32> = AHashMap::with_capacity(1 << 18);
|
|
for i in (0..n).rev() {
|
|
let r = &records[i];
|
|
for (j, &h) in r.hash_ids.iter().enumerate() {
|
|
next_use[i][j] = *last_seen.get(&h).unwrap_or(&u32::MAX);
|
|
}
|
|
for &h in &r.hash_ids {
|
|
last_seen.insert(h, i as u32);
|
|
}
|
|
}
|
|
next_use
|
|
}
|
|
|
|
/// Belady (offline OPT) eviction over the trace.
|
|
///
|
|
/// Implementation: lazy-deletion max-heap keyed by next-use index. Each
|
|
/// cache entry has a version; the heap may contain stale entries from
|
|
/// previous insertions, which we skip on pop.
|
|
fn run_belady(records: &[RequestRecord], next_use: &[Vec<u32>], capacity: usize) -> u64 {
|
|
if capacity == 0 {
|
|
return 0;
|
|
}
|
|
// block_hash -> (current_version, current_next_use)
|
|
let mut in_cache: AHashMap<u64, (u64, u32)> = AHashMap::with_capacity(capacity);
|
|
// (next_use, version, block_hash) — BinaryHeap is max-heap, which is what
|
|
// we want for "evict the entry whose next access is furthest".
|
|
let mut heap: BinaryHeap<(u32, u64, u64)> = BinaryHeap::with_capacity(capacity);
|
|
let mut version: u64 = 0;
|
|
let mut hits: u64 = 0;
|
|
|
|
for (i, r) in records.iter().enumerate() {
|
|
// 1. Longest-prefix hit accounting against current cache.
|
|
for &h in &r.hash_ids {
|
|
if in_cache.contains_key(&h) {
|
|
hits += 1;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
// 2. Insert / update each block in the request with its new next-use.
|
|
for (j, &h) in r.hash_ids.iter().enumerate() {
|
|
let nu = next_use[i][j];
|
|
if let Some(slot) = in_cache.get_mut(&h) {
|
|
version += 1;
|
|
slot.0 = version;
|
|
slot.1 = nu;
|
|
heap.push((nu, version, h));
|
|
continue;
|
|
}
|
|
// Need to make room?
|
|
if in_cache.len() == capacity {
|
|
// Evict max next_use entry, skipping stale heap entries.
|
|
loop {
|
|
let (nu_top, ver_top, h_top) = match heap.pop() {
|
|
Some(x) => x,
|
|
None => break,
|
|
};
|
|
if let Some(&(cur_ver, cur_nu)) = in_cache.get(&h_top) {
|
|
if cur_ver == ver_top && cur_nu == nu_top {
|
|
in_cache.remove(&h_top);
|
|
break;
|
|
}
|
|
}
|
|
// stale; loop
|
|
}
|
|
}
|
|
version += 1;
|
|
in_cache.insert(h, (version, nu));
|
|
heap.push((nu, version, h));
|
|
}
|
|
}
|
|
|
|
hits
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
fn req(id: u64, t: f64, hashes: Vec<u64>) -> RequestRecord {
|
|
RequestRecord {
|
|
req_id: id,
|
|
chat_id: id as i64,
|
|
arrival: t,
|
|
input_len: (hashes.len() as u32) * 16,
|
|
output_len: 16,
|
|
hash_ids: hashes,
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn unlimited_first_occurrence_misses() {
|
|
let recs = vec![
|
|
req(0, 0.0, vec![1, 2, 3]),
|
|
req(1, 1.0, vec![1, 2, 3, 4]),
|
|
req(2, 2.0, vec![1, 2, 3, 4, 5]),
|
|
];
|
|
let out = analyze(&recs, 100);
|
|
// total = 3 + 4 + 5 = 12
|
|
assert_eq!(out.total_blocks, 12);
|
|
// unique = {1,2,3,4,5} = 5
|
|
assert_eq!(out.unique_blocks, 5);
|
|
// unlimited hits = 0 (req 0 all miss) + 3 (req 1 has [1,2,3] cached, then 4 miss) + 4
|
|
assert_eq!(out.unlimited.hits, 7);
|
|
assert!((out.unlimited.hit_rate - 7.0 / 12.0).abs() < 1e-9);
|
|
}
|
|
|
|
#[test]
|
|
fn belady_beats_lru_when_lru_thrashes() {
|
|
// Capacity 2. Pattern designed so LRU thrashes but Belady keeps the
|
|
// useful block: A B A C A B A C A ...
|
|
let mut recs = Vec::new();
|
|
let pattern = [1u64, 2, 1, 3, 1, 2, 1, 3];
|
|
for (i, &h) in pattern.iter().enumerate() {
|
|
recs.push(req(i as u64, i as f64, vec![h]));
|
|
}
|
|
let out = analyze(&recs, 2);
|
|
assert!(
|
|
out.belady_finite.hits >= out.lru_finite.hits,
|
|
"belady should be at least as good as lru: belady={} lru={}",
|
|
out.belady_finite.hits,
|
|
out.lru_finite.hits
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn unlimited_is_upper_bound() {
|
|
let recs = vec![
|
|
req(0, 0.0, vec![10, 20, 30]),
|
|
req(1, 1.0, vec![10, 20, 30, 40, 50]),
|
|
req(2, 2.0, vec![60]),
|
|
req(3, 3.0, vec![10, 20, 30, 40, 50, 60]),
|
|
];
|
|
let out = analyze(&recs, 3);
|
|
assert!(out.unlimited.hit_rate >= out.belady_finite.hit_rate);
|
|
assert!(out.belady_finite.hit_rate >= out.lru_finite.hit_rate - 1e-9);
|
|
}
|
|
}
|