chore
This commit is contained in:
43
src/main.rs
43
src/main.rs
@@ -479,20 +479,39 @@ fn cmd_oracle(
|
|||||||
cfg.sim.trace_path, cfg.sim.max_requests
|
cfg.sim.trace_path, cfg.sim.max_requests
|
||||||
);
|
);
|
||||||
let reader = TraceReader::open(&cfg.sim.trace_path, cfg.sim.max_requests)?;
|
let reader = TraceReader::open(&cfg.sim.trace_path, cfg.sim.max_requests)?;
|
||||||
let mut records: Vec<_> = reader.collect::<Result<Vec<_>, _>>()?;
|
let records: Vec<_> = reader.collect::<Result<Vec<_>, _>>()?;
|
||||||
let raw_count = records.len();
|
|
||||||
driver::apply_input_length_filter(&mut records, &cfg.sim);
|
// Build a count-mask: all records feed the cache, but only records inside
|
||||||
if records.len() != raw_count {
|
// the input-length range contribute to hit/miss accounting. This way a
|
||||||
|
// 128k+ bucket still benefits from prefix blocks populated by shorter
|
||||||
|
// requests, matching the real mixed-workload ceiling.
|
||||||
|
let lo = cfg.sim.input_length_min.unwrap_or(0);
|
||||||
|
let hi = cfg.sim.input_length_max.unwrap_or(u32::MAX);
|
||||||
|
let has_filter = lo > 0 || hi < u32::MAX;
|
||||||
|
let count_mask: Option<Vec<bool>> = if has_filter {
|
||||||
|
let mask: Vec<bool> = records
|
||||||
|
.iter()
|
||||||
|
.map(|r| r.input_len >= lo && r.input_len <= hi)
|
||||||
|
.collect();
|
||||||
|
let counted = mask.iter().filter(|&&v| v).count();
|
||||||
eprintln!(
|
eprintln!(
|
||||||
"[oracle] input_length filter [{}, {}] kept {}/{} requests",
|
"[oracle] input_length filter [{}, {}] counting {}/{} requests \
|
||||||
cfg.sim.input_length_min.unwrap_or(0),
|
(all {} used for cache state)",
|
||||||
cfg.sim
|
lo,
|
||||||
.input_length_max
|
if hi == u32::MAX {
|
||||||
.map_or("∞".to_string(), |v| v.to_string()),
|
"∞".to_string()
|
||||||
|
} else {
|
||||||
|
hi.to_string()
|
||||||
|
},
|
||||||
|
counted,
|
||||||
|
records.len(),
|
||||||
records.len(),
|
records.len(),
|
||||||
raw_count,
|
|
||||||
);
|
);
|
||||||
}
|
Some(mask)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
eprintln!(
|
eprintln!(
|
||||||
"[oracle] loaded {} requests; analyzing with capacity = {} blocks \
|
"[oracle] loaded {} requests; analyzing with capacity = {} blocks \
|
||||||
({} per-instance × {} instances{})",
|
({} per-instance × {} instances{})",
|
||||||
@@ -507,7 +526,7 @@ fn cmd_oracle(
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
let result = oracle::analyze(&records, capacity);
|
let result = oracle::analyze(&records, capacity, count_mask.as_deref());
|
||||||
let json = serde_json::to_string_pretty(&result)?;
|
let json = serde_json::to_string_pretty(&result)?;
|
||||||
println!("{}", json);
|
println!("{}", json);
|
||||||
|
|
||||||
|
|||||||
118
src/oracle.rs
118
src/oracle.rs
@@ -60,33 +60,66 @@ impl TierResult {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn analyze(records: &[RequestRecord], capacity_blocks: u64) -> OracleResult {
|
/// Run the oracle analysis over `records`.
|
||||||
// total / unique counters
|
///
|
||||||
let total_blocks: u64 = records.iter().map(|r| r.hash_ids.len() as u64).sum();
|
/// When `count_mask` is `Some`, **all** records still feed the caches (so the
|
||||||
|
/// cache state reflects the full workload), but only records where
|
||||||
|
/// `count_mask[i]` is `true` contribute to hit / miss / total accounting.
|
||||||
|
/// This is the correct way to answer "what is the theoretical hit-rate for a
|
||||||
|
/// particular input-length bucket within a mixed workload?" — the cache sees
|
||||||
|
/// every request, but the metric only measures the bucket of interest.
|
||||||
|
///
|
||||||
|
/// When `count_mask` is `None`, every record is counted (original behaviour).
|
||||||
|
pub fn analyze(
|
||||||
|
records: &[RequestRecord],
|
||||||
|
capacity_blocks: u64,
|
||||||
|
count_mask: Option<&[bool]>,
|
||||||
|
) -> OracleResult {
|
||||||
|
// Build a default all-true mask when none is supplied.
|
||||||
|
let default_mask;
|
||||||
|
let mask: &[bool] = match count_mask {
|
||||||
|
Some(m) => {
|
||||||
|
assert_eq!(m.len(), records.len());
|
||||||
|
m
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
default_mask = vec![true; records.len()];
|
||||||
|
&default_mask
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// total / unique counters — only for counted records
|
||||||
|
let mut total_blocks: u64 = 0;
|
||||||
let mut unique = AHashSet::new();
|
let mut unique = AHashSet::new();
|
||||||
for r in records {
|
let mut num_requests: u64 = 0;
|
||||||
|
for (i, r) in records.iter().enumerate() {
|
||||||
|
if mask[i] {
|
||||||
|
total_blocks += r.hash_ids.len() as u64;
|
||||||
|
num_requests += 1;
|
||||||
for &h in &r.hash_ids {
|
for &h in &r.hash_ids {
|
||||||
unique.insert(h);
|
unique.insert(h);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// 1. Unlimited cache
|
// 1. Unlimited cache
|
||||||
let unlimited_hits = run_unlimited(records);
|
let unlimited_hits = run_unlimited(records, mask);
|
||||||
let unlimited = TierResult::from_counts("unlimited", u64::MAX, unlimited_hits, total_blocks);
|
let unlimited = TierResult::from_counts("unlimited", u64::MAX, unlimited_hits, total_blocks);
|
||||||
|
|
||||||
// 2. Precompute next-use index for Belady
|
// 2. Precompute next-use index for Belady (over ALL records — eviction
|
||||||
|
// decisions must consider future accesses from the full workload)
|
||||||
let next_use = build_next_use(records);
|
let next_use = build_next_use(records);
|
||||||
|
|
||||||
// 3. Belady at the given capacity
|
// 3. Belady at the given capacity
|
||||||
let belady_hits = run_belady(records, &next_use, capacity_blocks as usize);
|
let belady_hits = run_belady(records, &next_use, capacity_blocks as usize, mask);
|
||||||
let belady = TierResult::from_counts("belady", capacity_blocks, belady_hits, total_blocks);
|
let belady = TierResult::from_counts("belady", capacity_blocks, belady_hits, total_blocks);
|
||||||
|
|
||||||
// 4. LRU baseline at the same capacity
|
// 4. LRU baseline at the same capacity
|
||||||
let lru_hits = run_lru(records, capacity_blocks as usize);
|
let lru_hits = run_lru(records, capacity_blocks as usize, mask);
|
||||||
let lru = TierResult::from_counts("lru", capacity_blocks, lru_hits, total_blocks);
|
let lru = TierResult::from_counts("lru", capacity_blocks, lru_hits, total_blocks);
|
||||||
|
|
||||||
OracleResult {
|
OracleResult {
|
||||||
num_requests: records.len() as u64,
|
num_requests,
|
||||||
total_blocks,
|
total_blocks,
|
||||||
unique_blocks: unique.len() as u64,
|
unique_blocks: unique.len() as u64,
|
||||||
unlimited,
|
unlimited,
|
||||||
@@ -95,11 +128,12 @@ pub fn analyze(records: &[RequestRecord], capacity_blocks: u64) -> OracleResult
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn run_unlimited(records: &[RequestRecord]) -> u64 {
|
fn run_unlimited(records: &[RequestRecord], mask: &[bool]) -> u64 {
|
||||||
let mut seen: AHashSet<u64> = AHashSet::with_capacity(1 << 18);
|
let mut seen: AHashSet<u64> = AHashSet::with_capacity(1 << 18);
|
||||||
let mut hits: u64 = 0;
|
let mut hits: u64 = 0;
|
||||||
for r in records {
|
for (i, r) in records.iter().enumerate() {
|
||||||
// Longest prefix match against `seen`
|
// Longest prefix match against `seen` — only count for masked records
|
||||||
|
if mask[i] {
|
||||||
for &h in &r.hash_ids {
|
for &h in &r.hash_ids {
|
||||||
if seen.contains(&h) {
|
if seen.contains(&h) {
|
||||||
hits += 1;
|
hits += 1;
|
||||||
@@ -107,6 +141,8 @@ fn run_unlimited(records: &[RequestRecord]) -> u64 {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
// Always populate the cache so all requests contribute to cache state
|
||||||
for &h in &r.hash_ids {
|
for &h in &r.hash_ids {
|
||||||
seen.insert(h);
|
seen.insert(h);
|
||||||
}
|
}
|
||||||
@@ -114,15 +150,20 @@ fn run_unlimited(records: &[RequestRecord]) -> u64 {
|
|||||||
hits
|
hits
|
||||||
}
|
}
|
||||||
|
|
||||||
fn run_lru(records: &[RequestRecord], capacity: usize) -> u64 {
|
fn run_lru(records: &[RequestRecord], capacity: usize, mask: &[bool]) -> u64 {
|
||||||
if capacity == 0 {
|
if capacity == 0 {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
let mut cache = LruBlocks::new(capacity);
|
let mut cache = LruBlocks::new(capacity);
|
||||||
let mut hits: u64 = 0;
|
let mut hits: u64 = 0;
|
||||||
let mut evicted = Vec::new();
|
let mut evicted = Vec::new();
|
||||||
for r in records {
|
for (i, r) in records.iter().enumerate() {
|
||||||
hits += cache.longest_prefix(&r.hash_ids) as u64;
|
// Always touch the cache (longest_prefix updates LRU recency) so that
|
||||||
|
// the eviction order reflects the real mixed workload.
|
||||||
|
let prefix_len = cache.longest_prefix(&r.hash_ids) as u64;
|
||||||
|
if mask[i] {
|
||||||
|
hits += prefix_len;
|
||||||
|
}
|
||||||
evicted.clear();
|
evicted.clear();
|
||||||
cache.insert_blocks(&r.hash_ids, &mut evicted);
|
cache.insert_blocks(&r.hash_ids, &mut evicted);
|
||||||
}
|
}
|
||||||
@@ -155,7 +196,7 @@ fn build_next_use(records: &[RequestRecord]) -> Vec<Vec<u32>> {
|
|||||||
/// Implementation: lazy-deletion max-heap keyed by next-use index. Each
|
/// Implementation: lazy-deletion max-heap keyed by next-use index. Each
|
||||||
/// cache entry has a version; the heap may contain stale entries from
|
/// cache entry has a version; the heap may contain stale entries from
|
||||||
/// previous insertions, which we skip on pop.
|
/// previous insertions, which we skip on pop.
|
||||||
fn run_belady(records: &[RequestRecord], next_use: &[Vec<u32>], capacity: usize) -> u64 {
|
fn run_belady(records: &[RequestRecord], next_use: &[Vec<u32>], capacity: usize, mask: &[bool]) -> u64 {
|
||||||
if capacity == 0 {
|
if capacity == 0 {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -168,7 +209,8 @@ fn run_belady(records: &[RequestRecord], next_use: &[Vec<u32>], capacity: usize)
|
|||||||
let mut hits: u64 = 0;
|
let mut hits: u64 = 0;
|
||||||
|
|
||||||
for (i, r) in records.iter().enumerate() {
|
for (i, r) in records.iter().enumerate() {
|
||||||
// 1. Longest-prefix hit accounting against current cache.
|
// 1. Longest-prefix hit accounting — only count for masked records.
|
||||||
|
if mask[i] {
|
||||||
for &h in &r.hash_ids {
|
for &h in &r.hash_ids {
|
||||||
if in_cache.contains_key(&h) {
|
if in_cache.contains_key(&h) {
|
||||||
hits += 1;
|
hits += 1;
|
||||||
@@ -176,8 +218,10 @@ fn run_belady(records: &[RequestRecord], next_use: &[Vec<u32>], capacity: usize)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// 2. Insert / update each block in the request with its new next-use.
|
// 2. Insert / update each block in the request with its new next-use.
|
||||||
|
// Always executed so the cache reflects the full workload.
|
||||||
for (j, &h) in r.hash_ids.iter().enumerate() {
|
for (j, &h) in r.hash_ids.iter().enumerate() {
|
||||||
let nu = next_use[i][j];
|
let nu = next_use[i][j];
|
||||||
if let Some(slot) = in_cache.get_mut(&h) {
|
if let Some(slot) = in_cache.get_mut(&h) {
|
||||||
@@ -237,7 +281,7 @@ mod tests {
|
|||||||
req(1, 1.0, vec![1, 2, 3, 4]),
|
req(1, 1.0, vec![1, 2, 3, 4]),
|
||||||
req(2, 2.0, vec![1, 2, 3, 4, 5]),
|
req(2, 2.0, vec![1, 2, 3, 4, 5]),
|
||||||
];
|
];
|
||||||
let out = analyze(&recs, 100);
|
let out = analyze(&recs, 100, None);
|
||||||
// total = 3 + 4 + 5 = 12
|
// total = 3 + 4 + 5 = 12
|
||||||
assert_eq!(out.total_blocks, 12);
|
assert_eq!(out.total_blocks, 12);
|
||||||
// unique = {1,2,3,4,5} = 5
|
// unique = {1,2,3,4,5} = 5
|
||||||
@@ -256,7 +300,7 @@ mod tests {
|
|||||||
for (i, &h) in pattern.iter().enumerate() {
|
for (i, &h) in pattern.iter().enumerate() {
|
||||||
recs.push(req(i as u64, i as f64, vec![h]));
|
recs.push(req(i as u64, i as f64, vec![h]));
|
||||||
}
|
}
|
||||||
let out = analyze(&recs, 2);
|
let out = analyze(&recs, 2, None);
|
||||||
assert!(
|
assert!(
|
||||||
out.belady_finite.hits >= out.lru_finite.hits,
|
out.belady_finite.hits >= out.lru_finite.hits,
|
||||||
"belady should be at least as good as lru: belady={} lru={}",
|
"belady should be at least as good as lru: belady={} lru={}",
|
||||||
@@ -273,8 +317,42 @@ mod tests {
|
|||||||
req(2, 2.0, vec![60]),
|
req(2, 2.0, vec![60]),
|
||||||
req(3, 3.0, vec![10, 20, 30, 40, 50, 60]),
|
req(3, 3.0, vec![10, 20, 30, 40, 50, 60]),
|
||||||
];
|
];
|
||||||
let out = analyze(&recs, 3);
|
let out = analyze(&recs, 3, None);
|
||||||
assert!(out.unlimited.hit_rate >= out.belady_finite.hit_rate);
|
assert!(out.unlimited.hit_rate >= out.belady_finite.hit_rate);
|
||||||
assert!(out.belady_finite.hit_rate >= out.lru_finite.hit_rate - 1e-9);
|
assert!(out.belady_finite.hit_rate >= out.lru_finite.hit_rate - 1e-9);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn count_mask_filters_accounting_not_cache() {
|
||||||
|
// req 0 populates blocks [1,2,3] but is not counted.
|
||||||
|
// req 1 has prefix [1,2,3,4] — the first 3 blocks are cache hits
|
||||||
|
// because req 0 populated them, even though req 0 is masked out.
|
||||||
|
let recs = vec![
|
||||||
|
req(0, 0.0, vec![1, 2, 3]),
|
||||||
|
req(1, 1.0, vec![1, 2, 3, 4]),
|
||||||
|
];
|
||||||
|
let mask = vec![false, true];
|
||||||
|
let out = analyze(&recs, 100, Some(&mask));
|
||||||
|
// Only req 1 is counted: total = 4, hits = 3 (prefix [1,2,3] hit)
|
||||||
|
assert_eq!(out.num_requests, 1);
|
||||||
|
assert_eq!(out.total_blocks, 4);
|
||||||
|
assert_eq!(out.unlimited.hits, 3);
|
||||||
|
assert!((out.unlimited.hit_rate - 3.0 / 4.0).abs() < 1e-9);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn count_mask_none_matches_all_true() {
|
||||||
|
let recs = vec![
|
||||||
|
req(0, 0.0, vec![1, 2, 3]),
|
||||||
|
req(1, 1.0, vec![1, 2, 3, 4]),
|
||||||
|
req(2, 2.0, vec![1, 2, 3, 4, 5]),
|
||||||
|
];
|
||||||
|
let out_none = analyze(&recs, 10, None);
|
||||||
|
let all_true = vec![true; recs.len()];
|
||||||
|
let out_all = analyze(&recs, 10, Some(&all_true));
|
||||||
|
assert_eq!(out_none.unlimited.hits, out_all.unlimited.hits);
|
||||||
|
assert_eq!(out_none.belady_finite.hits, out_all.belady_finite.hits);
|
||||||
|
assert_eq!(out_none.lru_finite.hits, out_all.lru_finite.hits);
|
||||||
|
assert_eq!(out_none.total_blocks, out_all.total_blocks);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user