use std::env; fn main() { let cuda_path = env::var("CUDA_HOME") .or_else(|_| env::var("CUDA_PATH")) .unwrap_or_else(|_| "/usr/local/cuda".to_string()); println!("cargo:rustc-link-search=native={cuda_path}/lib64"); println!("cargo:rustc-link-lib=dylib=cudart"); println!("cargo:rustc-link-lib=dylib=cublas"); println!("cargo:rustc-link-lib=dylib=cublasLt"); cc::Build::new() .cuda(true) .cudart("shared") .flag("-gencode=arch=compute_120,code=sm_120") .include("../../csrc") .file("../../csrc/gemm/naive.cu") .file("../../csrc/gemm/tiled.cu") .file("../../csrc/gemm/gemv.cu") .file("../../csrc/normalization/rmsnorm.cu") .file("../../csrc/normalization/layernorm.cu") .file("../../csrc/activation/activations.cu") .file("../../csrc/reduce/softmax.cu") .file("../../csrc/reduce/argmax.cu") .file("../../csrc/embedding/embedding.cu") .file("../../csrc/embedding/rope.cu") .file("../../csrc/attention/causal_mask.cu") .file("../../csrc/embedding/transpose.cu") .file("../../csrc/attention/flash_attention.cu") .file("../../csrc/attention/paged_attention.cu") .file("../../csrc/attention/reshape_and_cache.cu") .file("../../csrc/moe/moe_kernels.cu") .file("../../csrc/quantization/dequant_fp8.cu") .file("../../csrc/quantization/quantize_fp8.cu") .file("../../csrc/quantization/mxfp4_gemm.cu") .compile("xserv_kernels"); println!("cargo:rerun-if-changed=../../csrc/"); }