Files
xserv/crates/xserv-kernels/build.rs
Gahow Wang 6035ffdc0b phase 5: naive multi-head attention
- Batched GEMM via cublasGemmStridedBatchedEx
- Causal mask CUDA kernel (F32 + BF16)
- Element-wise scale CUDA kernel (F32 + BF16)
- attention() composing: batched_matmul + scale + causal_mask + softmax
- Fixed to_device/contiguous infinite recursion (GPU contiguous via CPU round-trip)
- 5 attention tests passing (max_err < 3e-7 F32)
- Total: 61 tests passing across all crates

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-21 21:17:23 +08:00

30 lines
1.0 KiB
Rust

use std::env;
fn main() {
let cuda_path = env::var("CUDA_HOME")
.or_else(|_| env::var("CUDA_PATH"))
.unwrap_or_else(|_| "/usr/local/cuda".to_string());
println!("cargo:rustc-link-search=native={cuda_path}/lib64");
println!("cargo:rustc-link-lib=dylib=cudart");
println!("cargo:rustc-link-lib=dylib=cublas");
cc::Build::new()
.cuda(true)
.cudart("shared")
.flag("-gencode=arch=compute_120,code=sm_120")
.include("../../csrc")
.file("../../csrc/gemm/naive.cu")
.file("../../csrc/gemm/tiled.cu")
.file("../../csrc/normalization/rmsnorm.cu")
.file("../../csrc/normalization/layernorm.cu")
.file("../../csrc/activation/activations.cu")
.file("../../csrc/reduce/softmax.cu")
.file("../../csrc/embedding/embedding.cu")
.file("../../csrc/embedding/rope.cu")
.file("../../csrc/attention/causal_mask.cu")
.compile("xserv_kernels");
println!("cargo:rerun-if-changed=../../csrc/");
}