test: resolve real_training corpus default via CARGO_MANIFEST_DIR
cargo runs tests with cwd = crate dir, so the bare relative default data/tinystories-valid-3mb.txt didn't resolve. Anchor it to the repo root via CARGO_MANIFEST_DIR so the test runs out of the box (still overridable with XTRAIN_CORPUS). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -46,9 +46,15 @@ fn trains_on_tinystories() {
|
|||||||
std::env::var("XTRAIN_TOKENIZER")
|
std::env::var("XTRAIN_TOKENIZER")
|
||||||
.unwrap_or_else(|_| "/opt/wjh/models/gpt2/tokenizer.json".into()),
|
.unwrap_or_else(|_| "/opt/wjh/models/gpt2/tokenizer.json".into()),
|
||||||
);
|
);
|
||||||
let corpus_path = PathBuf::from(
|
// Default resolves relative to the repo root (cargo runs tests with cwd =
|
||||||
std::env::var("XTRAIN_CORPUS").unwrap_or_else(|_| "data/tinystories-valid-3mb.txt".into()),
|
// crate dir, so `../../data/...` from crates/xtrain-train); override with
|
||||||
);
|
// XTRAIN_CORPUS for any other location.
|
||||||
|
let corpus_path = PathBuf::from(std::env::var("XTRAIN_CORPUS").unwrap_or_else(|_| {
|
||||||
|
format!(
|
||||||
|
"{}/../../data/tinystories-valid-3mb.txt",
|
||||||
|
env!("CARGO_MANIFEST_DIR")
|
||||||
|
)
|
||||||
|
}));
|
||||||
|
|
||||||
let corpus = Corpus::load(&tok_path, &corpus_path);
|
let corpus = Corpus::load(&tok_path, &corpus_path);
|
||||||
println!(
|
println!(
|
||||||
|
|||||||
Reference in New Issue
Block a user