KVCache simulator for LLM serving cluster routing research
Discrete-event simulator for evaluating KV cache-aware routing policies in prefill-disaggregated LLM serving clusters. Models a two-tier KV cache hierarchy (L0 GPU HBM + L1 CPU DRAM) with RDMA/PCIe link contention, architecture-derived roofline compute (MoE, MLA, DSA), and a cluster-wide meta-store for prefix-aware routing decisions. Includes 11 routing policies (random, round_robin, least_loaded, least_tokens, ttl_aware, precise, min_pd, cache_load, cache_score, estimated_ttft, prefix_affinity), HuggingFace config.json auto-parsing, built-in GPU hardware presets (H100/H800/H20/A100/B200), and ablation tooling for systematic policy comparison across real Alibaba serving traces. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
113
src/sim/engine.rs
Normal file
113
src/sim/engine.rs
Normal file
@@ -0,0 +1,113 @@
|
||||
//! Discrete-event engine.
|
||||
//!
|
||||
//! Single-threaded virtual time `f64` seconds. Events are stored in a min-heap
|
||||
//! keyed by `(time, seq)` so equal-time events fire in insertion order.
|
||||
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::BinaryHeap;
|
||||
|
||||
use super::events::Event;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Slot {
|
||||
time: f64,
|
||||
seq: u64,
|
||||
event: Event,
|
||||
}
|
||||
|
||||
impl Eq for Slot {}
|
||||
impl PartialEq for Slot {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.time == other.time && self.seq == other.seq
|
||||
}
|
||||
}
|
||||
impl Ord for Slot {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
// Reverse so BinaryHeap acts as a min-heap.
|
||||
other
|
||||
.time
|
||||
.partial_cmp(&self.time)
|
||||
.unwrap_or(Ordering::Equal)
|
||||
.then_with(|| other.seq.cmp(&self.seq))
|
||||
}
|
||||
}
|
||||
impl PartialOrd for Slot {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct EventQueue {
|
||||
heap: BinaryHeap<Slot>,
|
||||
seq: u64,
|
||||
now: f64,
|
||||
}
|
||||
|
||||
impl EventQueue {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn now(&self) -> f64 {
|
||||
self.now
|
||||
}
|
||||
|
||||
pub fn schedule(&mut self, time: f64, event: Event) {
|
||||
let t = time.max(self.now);
|
||||
self.seq += 1;
|
||||
self.heap.push(Slot { time: t, seq: self.seq, event });
|
||||
}
|
||||
|
||||
pub fn pop(&mut self) -> Option<(f64, Event)> {
|
||||
let slot = self.heap.pop()?;
|
||||
if slot.time > self.now {
|
||||
self.now = slot.time;
|
||||
}
|
||||
Some((slot.time, slot.event))
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.heap.len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.heap.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::types::InstanceId;
|
||||
|
||||
#[test]
|
||||
fn pops_in_time_order() {
|
||||
let mut q = EventQueue::new();
|
||||
q.schedule(2.0, Event::BatchTick { instance: 0 as InstanceId });
|
||||
q.schedule(1.0, Event::BatchTick { instance: 1 });
|
||||
q.schedule(1.5, Event::BatchTick { instance: 2 });
|
||||
let (t1, _) = q.pop().unwrap();
|
||||
let (t2, _) = q.pop().unwrap();
|
||||
let (t3, _) = q.pop().unwrap();
|
||||
assert!(t1 <= t2 && t2 <= t3);
|
||||
assert!((t1 - 1.0).abs() < 1e-12);
|
||||
assert!((t3 - 2.0).abs() < 1e-12);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn equal_time_fifo() {
|
||||
let mut q = EventQueue::new();
|
||||
q.schedule(1.0, Event::BatchTick { instance: 7 });
|
||||
q.schedule(1.0, Event::BatchTick { instance: 8 });
|
||||
let (_, e1) = q.pop().unwrap();
|
||||
let (_, e2) = q.pop().unwrap();
|
||||
match (e1, e2) {
|
||||
(Event::BatchTick { instance: a }, Event::BatchTick { instance: b }) => {
|
||||
assert_eq!(a, 7);
|
||||
assert_eq!(b, 8);
|
||||
}
|
||||
_ => panic!("wrong events"),
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user