Files
xtrain/Cargo.lock
Gahow Wang e3912c2380 model: tiny RoPE+RMSNorm+SwiGLU transformer + overfit test
New crate xtrain-model: a from-scratch decoder built entirely from the
autodiff op set.
- Config (tiny: dim=32, 2 layers, 2 heads, head_dim=16, ffn=64).
- TinyTransformer: embedding -> N x {pre-RMSNorm -> multi-head causal
  attention (RoPE, additive causal mask, per-head SDPA) -> residual;
  pre-RMSNorm -> SwiGLU MLP -> residual} -> final RMSNorm -> LM head.
  x@W weight convention (engine GEMM is plain A@B); dim=n_heads*head_dim.
- params()/zero_grad-able leaves for the optimizer; param_to_host export.
- overfit test: char-level bring-up (embedded text -> vocab -> shifted
  targets), minimal hand-written GD (p -= lr*grad) memorises one fixed
  batch -> loss ~0 + greedy argmax matches targets. End-to-end fwd+bwd
  correctness signal. Gated #![cfg(not(no_cuda))].

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-15 16:05:20 +08:00

144 lines
3.3 KiB
TOML

# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "cc"
version = "1.2.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dad887fd958be91b5098c0248def011f4523ab786cd411be668777e55063501f"
dependencies = [
"find-msvc-tools",
"shlex",
]
[[package]]
name = "cfg-if"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "crunchy"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
[[package]]
name = "find-msvc-tools"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
[[package]]
name = "half"
version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
dependencies = [
"cfg-if",
"crunchy",
"zerocopy",
]
[[package]]
name = "proc-macro2"
version = "1.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
dependencies = [
"proc-macro2",
]
[[package]]
name = "shlex"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba"
[[package]]
name = "smallvec"
version = "1.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ed6a63f02c8539c91a8685a86f4099661ba3da017932f6ebbea6de3f0fa7c90"
[[package]]
name = "syn"
version = "2.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
[[package]]
name = "xtrain-autodiff"
version = "0.1.0"
dependencies = [
"xtrain-cuda",
"xtrain-tensor",
]
[[package]]
name = "xtrain-cuda"
version = "0.1.0"
dependencies = [
"cc",
]
[[package]]
name = "xtrain-model"
version = "0.1.0"
dependencies = [
"xtrain-autodiff",
"xtrain-cuda",
"xtrain-tensor",
]
[[package]]
name = "xtrain-tensor"
version = "0.1.0"
dependencies = [
"half",
"smallvec",
"xtrain-autodiff",
"xtrain-cuda",
]
[[package]]
name = "zerocopy"
version = "0.8.52"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce1022995ff5ff5d841ad7d994facc23098cd40152f2c1d11cd607c6f530653f"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.52"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ae7f38b72ec2a254e2b87ef277cf2cd4fb97cbebf944faa6f33354da0867930"
dependencies = [
"proc-macro2",
"quote",
"syn",
]