New bin export_safetensors: load an xtrain checkpoint, map every param to its HF Qwen3 tensor name, transpose 2D projection weights [in,out]->[out,in] (1D norms + [vocab,dim] embed/lm_head kept), cast to BF16 (xserv's qwen3 forward is BF16-only), and write config.json + model.safetensors + a copy of the gpt2 tokenizer.json. Sized exactly like bin/train.rs. safetensors 0.5 to match xserv. GPU body gated behind not(no_cuda). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
28 lines
906 B
TOML
28 lines
906 B
TOML
[package]
|
|
name = "xtrain-train"
|
|
version.workspace = true
|
|
edition.workspace = true
|
|
|
|
[dependencies]
|
|
xtrain-tensor = { path = "../xtrain-tensor" }
|
|
xtrain-autodiff = { path = "../xtrain-autodiff" }
|
|
xtrain-model = { path = "../xtrain-model" }
|
|
xtrain-optim = { path = "../xtrain-optim" }
|
|
xtrain-cuda = { path = "../xtrain-cuda" }
|
|
# Reuse xserv's from-scratch GPT-2/Qwen BPE (project decision). This relative
|
|
# path resolves on both ~/projects (local) and /opt/wjh/projects (dash5). The
|
|
# crate inherits xserv's workspace for its own deps (serde/regex) — Cargo reads
|
|
# the target package's workspace, not ours.
|
|
xserv-tokenizer = { path = "../../../xserv/crates/xserv-tokenizer" }
|
|
# T9 export to xserv: HF Qwen3 safetensors + BF16 weight cast.
|
|
half.workspace = true
|
|
safetensors = "0.5"
|
|
|
|
[[bin]]
|
|
name = "train"
|
|
path = "src/bin/train.rs"
|
|
|
|
[[bin]]
|
|
name = "export_safetensors"
|
|
path = "src/bin/export_safetensors.rs"
|