Files
kvcache-simulator/src/trace.rs

110 lines
3.2 KiB
Rust

//! Streaming JSONL reader for the qwen-bailian trace format.
//!
//! Schema (per upstream README):
//! chat_id: i64
//! parent_chat_id: i64 (-1 = root)
//! timestamp: f64 (seconds since trace start)
//! input_length: i64
//! output_length: i64
//! type: string (text/search/image/file)
//! turn: i64
//! hash_ids: [i64] (16-token blocks, salted SipHash)
use anyhow::{Context, Result};
use serde::Deserialize;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;
#[derive(Debug, Clone, Deserialize)]
struct RawRecord {
#[serde(default)]
chat_id: i64,
#[serde(default)]
parent_chat_id: i64,
#[serde(default)]
timestamp: f64,
#[serde(default)]
input_length: i64,
#[serde(default)]
output_length: i64,
#[serde(default)]
turn: i64,
#[serde(default)]
hash_ids: Vec<i64>,
}
#[derive(Debug, Clone)]
pub struct RequestRecord {
pub req_id: u64,
pub chat_id: i64,
pub parent_chat_id: i64,
pub turn: i64,
pub arrival: f64,
pub input_len: u32,
pub output_len: u32,
pub hash_ids: Vec<u64>,
}
pub struct TraceReader {
inner: BufReader<File>,
next_id: u64,
line_buf: String,
max_requests: Option<u64>,
}
impl TraceReader {
pub fn open<P: AsRef<Path>>(path: P, max_requests: Option<u64>) -> Result<Self> {
let path = path.as_ref();
let f = File::open(path).with_context(|| format!("opening trace {}", path.display()))?;
Ok(Self {
inner: BufReader::with_capacity(1 << 20, f),
next_id: 0,
line_buf: String::with_capacity(4096),
max_requests,
})
}
}
impl Iterator for TraceReader {
type Item = Result<RequestRecord>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(cap) = self.max_requests {
if self.next_id >= cap {
return None;
}
}
loop {
self.line_buf.clear();
match self.inner.read_line(&mut self.line_buf) {
Ok(0) => return None,
Ok(_) => {
let trimmed = self.line_buf.trim();
if trimmed.is_empty() {
continue;
}
let parsed: Result<RawRecord, _> = serde_json::from_str(trimmed);
let raw = match parsed {
Ok(r) => r,
Err(e) => return Some(Err(anyhow::anyhow!("trace parse: {e}"))),
};
let id = self.next_id;
self.next_id += 1;
return Some(Ok(RequestRecord {
req_id: id,
chat_id: raw.chat_id,
parent_chat_id: raw.parent_chat_id,
turn: raw.turn,
arrival: raw.timestamp,
input_len: raw.input_length.max(0) as u32,
output_len: raw.output_length.max(0) as u32,
hash_ids: raw.hash_ids.into_iter().map(|h| h as u64).collect(),
}));
}
Err(e) => return Some(Err(e.into())),
}
}
}
}