chore: update ablation and clean configs
This commit is contained in:
80
src/main.rs
80
src/main.rs
@@ -3,6 +3,7 @@ use clap::{Args, Parser, Subcommand};
|
||||
use std::path::PathBuf;
|
||||
|
||||
use kvcache_simulator::config::{Config, RouterMode};
|
||||
use kvcache_simulator::replay::ReplayEvictPolicy;
|
||||
use kvcache_simulator::{driver, oracle, trace::TraceReader};
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
@@ -74,7 +75,8 @@ enum Cmd {
|
||||
#[command(flatten)]
|
||||
overrides: ConfigOverrides,
|
||||
},
|
||||
/// Run the same trace under multiple routers and compare summaries.
|
||||
/// Run the same trace under multiple routers and fixed-placement eviction
|
||||
/// policies, then compare cache-hit summaries.
|
||||
Ablate {
|
||||
#[arg(short, long)]
|
||||
config: PathBuf,
|
||||
@@ -85,6 +87,10 @@ enum Cmd {
|
||||
default_value = "random,least_loaded,least_tokens,ttl_aware,min_pd,cache_load,cache_score,estimated_ttft,prefix_affinity"
|
||||
)]
|
||||
routers: String,
|
||||
/// Comma-separated eviction policies for ablation aggregation.
|
||||
/// Currently only `lru` is supported.
|
||||
#[arg(long, default_value = "lru")]
|
||||
evict_policies: String,
|
||||
#[command(flatten)]
|
||||
overrides: ConfigOverrides,
|
||||
},
|
||||
@@ -125,8 +131,9 @@ fn main() -> Result<()> {
|
||||
Cmd::Ablate {
|
||||
config,
|
||||
routers,
|
||||
evict_policies,
|
||||
overrides,
|
||||
} => cmd_ablate(&config, &routers, &overrides),
|
||||
} => cmd_ablate(&config, &routers, &evict_policies, &overrides),
|
||||
Cmd::Validate { config, overrides } => cmd_validate(&config, &overrides),
|
||||
Cmd::Oracle {
|
||||
config,
|
||||
@@ -134,7 +141,13 @@ fn main() -> Result<()> {
|
||||
capacity_blocks,
|
||||
per_instance,
|
||||
out,
|
||||
} => cmd_oracle(&config, &overrides, capacity_blocks, per_instance, out.as_deref()),
|
||||
} => cmd_oracle(
|
||||
&config,
|
||||
&overrides,
|
||||
capacity_blocks,
|
||||
per_instance,
|
||||
out.as_deref(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -151,7 +164,12 @@ fn cmd_run(path: &PathBuf, overrides: &ConfigOverrides) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cmd_ablate(path: &PathBuf, routers: &str, overrides: &ConfigOverrides) -> Result<()> {
|
||||
fn cmd_ablate(
|
||||
path: &PathBuf,
|
||||
routers: &str,
|
||||
evict_policies: &str,
|
||||
overrides: &ConfigOverrides,
|
||||
) -> Result<()> {
|
||||
let base = load(path, overrides)?;
|
||||
let modes: Vec<RouterMode> = routers
|
||||
.split(',')
|
||||
@@ -160,15 +178,27 @@ fn cmd_ablate(path: &PathBuf, routers: &str, overrides: &ConfigOverrides) -> Res
|
||||
.map(RouterMode::parse)
|
||||
.collect::<Result<Vec<_>>>()
|
||||
.with_context(|| format!("parsing --routers='{routers}'"))?;
|
||||
let mut all = Vec::new();
|
||||
for mode in modes {
|
||||
let mut cfg = base.clone();
|
||||
cfg.cluster.router.mode = mode;
|
||||
let sub = mode.as_str().to_string();
|
||||
eprintln!("[ablate] running router={}", sub);
|
||||
let out = driver::run(&cfg, Some(&sub))?;
|
||||
all.push(out.summary);
|
||||
}
|
||||
let policies: Vec<ReplayEvictPolicy> = evict_policies
|
||||
.split(',')
|
||||
.map(|s| s.trim())
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(ReplayEvictPolicy::parse)
|
||||
.collect::<Result<Vec<_>>>()
|
||||
.with_context(|| format!("parsing --evict-policies='{evict_policies}'"))?;
|
||||
eprintln!(
|
||||
"[ablate] routers={} evict_policies={}",
|
||||
modes
|
||||
.iter()
|
||||
.map(RouterMode::as_str)
|
||||
.collect::<Vec<_>>()
|
||||
.join(","),
|
||||
policies
|
||||
.iter()
|
||||
.map(ReplayEvictPolicy::as_str)
|
||||
.collect::<Vec<_>>()
|
||||
.join(",")
|
||||
);
|
||||
let all = driver::ablate_fixed_placement(&base, &modes, &policies)?;
|
||||
let agg_path = std::path::Path::new(&base.sim.output_dir).join("ablation.json");
|
||||
std::fs::create_dir_all(&base.sim.output_dir)?;
|
||||
std::fs::write(&agg_path, serde_json::to_string_pretty(&all)?)?;
|
||||
@@ -181,13 +211,25 @@ fn cmd_validate(path: &PathBuf, overrides: &ConfigOverrides) -> Result<()> {
|
||||
use kvcache_simulator::instance::compute::ComputeModel;
|
||||
let cfg = load(path, overrides)?;
|
||||
eprintln!("config OK: {}", cfg.model.name);
|
||||
eprintln!("mode = {}", if cfg.model.is_arch_mode() { "architecture-derived" } else { "legacy manual" });
|
||||
eprintln!(
|
||||
"mode = {}",
|
||||
if cfg.model.is_arch_mode() {
|
||||
"architecture-derived"
|
||||
} else {
|
||||
"legacy manual"
|
||||
}
|
||||
);
|
||||
let cm = ComputeModel::new(&cfg.model, &cfg.hardware);
|
||||
eprintln!("compute: {}", cm.describe());
|
||||
eprintln!("kv_block_bytes = {} ({:.2} MB{})",
|
||||
eprintln!(
|
||||
"kv_block_bytes = {} ({:.2} MB{})",
|
||||
cfg.model.kv_block_bytes(),
|
||||
cfg.model.kv_block_bytes() as f64 / 1e6,
|
||||
if cfg.model.mla.is_some() { ", MLA compressed" } else { "" },
|
||||
if cfg.model.mla.is_some() {
|
||||
", MLA compressed"
|
||||
} else {
|
||||
""
|
||||
},
|
||||
);
|
||||
let block_bytes = cfg.model.kv_block_bytes() as f64;
|
||||
let hbm_blocks = (cfg.hardware.hbm_bytes / block_bytes) as u64;
|
||||
@@ -251,7 +293,11 @@ fn cmd_oracle(
|
||||
capacity,
|
||||
per_instance_blocks,
|
||||
cfg.cluster.num_instances,
|
||||
if per_instance { ", per-instance mode" } else { "" }
|
||||
if per_instance {
|
||||
", per-instance mode"
|
||||
} else {
|
||||
""
|
||||
}
|
||||
);
|
||||
|
||||
let result = oracle::analyze(&records, capacity);
|
||||
|
||||
Reference in New Issue
Block a user