chore: update ablation and clean configs

This commit is contained in:
2026-04-15 14:48:59 +08:00
parent eaf574cd4e
commit 365ceac3be
15 changed files with 879 additions and 324 deletions

View File

@@ -3,6 +3,7 @@ use clap::{Args, Parser, Subcommand};
use std::path::PathBuf;
use kvcache_simulator::config::{Config, RouterMode};
use kvcache_simulator::replay::ReplayEvictPolicy;
use kvcache_simulator::{driver, oracle, trace::TraceReader};
#[derive(Debug, Parser)]
@@ -74,7 +75,8 @@ enum Cmd {
#[command(flatten)]
overrides: ConfigOverrides,
},
/// Run the same trace under multiple routers and compare summaries.
/// Run the same trace under multiple routers and fixed-placement eviction
/// policies, then compare cache-hit summaries.
Ablate {
#[arg(short, long)]
config: PathBuf,
@@ -85,6 +87,10 @@ enum Cmd {
default_value = "random,least_loaded,least_tokens,ttl_aware,min_pd,cache_load,cache_score,estimated_ttft,prefix_affinity"
)]
routers: String,
/// Comma-separated eviction policies for ablation aggregation.
/// Currently only `lru` is supported.
#[arg(long, default_value = "lru")]
evict_policies: String,
#[command(flatten)]
overrides: ConfigOverrides,
},
@@ -125,8 +131,9 @@ fn main() -> Result<()> {
Cmd::Ablate {
config,
routers,
evict_policies,
overrides,
} => cmd_ablate(&config, &routers, &overrides),
} => cmd_ablate(&config, &routers, &evict_policies, &overrides),
Cmd::Validate { config, overrides } => cmd_validate(&config, &overrides),
Cmd::Oracle {
config,
@@ -134,7 +141,13 @@ fn main() -> Result<()> {
capacity_blocks,
per_instance,
out,
} => cmd_oracle(&config, &overrides, capacity_blocks, per_instance, out.as_deref()),
} => cmd_oracle(
&config,
&overrides,
capacity_blocks,
per_instance,
out.as_deref(),
),
}
}
@@ -151,7 +164,12 @@ fn cmd_run(path: &PathBuf, overrides: &ConfigOverrides) -> Result<()> {
Ok(())
}
fn cmd_ablate(path: &PathBuf, routers: &str, overrides: &ConfigOverrides) -> Result<()> {
fn cmd_ablate(
path: &PathBuf,
routers: &str,
evict_policies: &str,
overrides: &ConfigOverrides,
) -> Result<()> {
let base = load(path, overrides)?;
let modes: Vec<RouterMode> = routers
.split(',')
@@ -160,15 +178,27 @@ fn cmd_ablate(path: &PathBuf, routers: &str, overrides: &ConfigOverrides) -> Res
.map(RouterMode::parse)
.collect::<Result<Vec<_>>>()
.with_context(|| format!("parsing --routers='{routers}'"))?;
let mut all = Vec::new();
for mode in modes {
let mut cfg = base.clone();
cfg.cluster.router.mode = mode;
let sub = mode.as_str().to_string();
eprintln!("[ablate] running router={}", sub);
let out = driver::run(&cfg, Some(&sub))?;
all.push(out.summary);
}
let policies: Vec<ReplayEvictPolicy> = evict_policies
.split(',')
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.map(ReplayEvictPolicy::parse)
.collect::<Result<Vec<_>>>()
.with_context(|| format!("parsing --evict-policies='{evict_policies}'"))?;
eprintln!(
"[ablate] routers={} evict_policies={}",
modes
.iter()
.map(RouterMode::as_str)
.collect::<Vec<_>>()
.join(","),
policies
.iter()
.map(ReplayEvictPolicy::as_str)
.collect::<Vec<_>>()
.join(",")
);
let all = driver::ablate_fixed_placement(&base, &modes, &policies)?;
let agg_path = std::path::Path::new(&base.sim.output_dir).join("ablation.json");
std::fs::create_dir_all(&base.sim.output_dir)?;
std::fs::write(&agg_path, serde_json::to_string_pretty(&all)?)?;
@@ -181,13 +211,25 @@ fn cmd_validate(path: &PathBuf, overrides: &ConfigOverrides) -> Result<()> {
use kvcache_simulator::instance::compute::ComputeModel;
let cfg = load(path, overrides)?;
eprintln!("config OK: {}", cfg.model.name);
eprintln!("mode = {}", if cfg.model.is_arch_mode() { "architecture-derived" } else { "legacy manual" });
eprintln!(
"mode = {}",
if cfg.model.is_arch_mode() {
"architecture-derived"
} else {
"legacy manual"
}
);
let cm = ComputeModel::new(&cfg.model, &cfg.hardware);
eprintln!("compute: {}", cm.describe());
eprintln!("kv_block_bytes = {} ({:.2} MB{})",
eprintln!(
"kv_block_bytes = {} ({:.2} MB{})",
cfg.model.kv_block_bytes(),
cfg.model.kv_block_bytes() as f64 / 1e6,
if cfg.model.mla.is_some() { ", MLA compressed" } else { "" },
if cfg.model.mla.is_some() {
", MLA compressed"
} else {
""
},
);
let block_bytes = cfg.model.kv_block_bytes() as f64;
let hbm_blocks = (cfg.hardware.hbm_bytes / block_bytes) as u64;
@@ -251,7 +293,11 @@ fn cmd_oracle(
capacity,
per_instance_blocks,
cfg.cluster.num_instances,
if per_instance { ", per-instance mode" } else { "" }
if per_instance {
", per-instance mode"
} else {
""
}
);
let result = oracle::analyze(&records, capacity);