- unsafe extern "C" blocks (Rust 2024 requirement) - unsafe blocks inside unsafe fn bodies - Use cudaMemGetInfo for accurate GPU memory reporting - Remove cc "cuda" feature (doesn't exist, built-in) - All 12 tests pass on RTX 5090 (CC 12.0, 170 SMs, 32GB) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
74 lines
2.2 KiB
Rust
74 lines
2.2 KiB
Rust
use std::ffi::c_void;
|
|
use std::os::raw::c_char;
|
|
|
|
pub type CudaStream = *mut c_void;
|
|
pub type CudaEvent = *mut c_void;
|
|
|
|
pub const CUDA_MEMCPY_H2D: i32 = 1;
|
|
pub const CUDA_MEMCPY_D2H: i32 = 2;
|
|
pub const CUDA_MEMCPY_D2D: i32 = 3;
|
|
|
|
pub const CUDA_SUCCESS: i32 = 0;
|
|
pub const CUDA_ERROR_OUT_OF_MEMORY: i32 = 2;
|
|
|
|
#[repr(C)]
|
|
pub struct CudaDeviceProp {
|
|
pub name: [c_char; 256],
|
|
pub total_global_mem: usize,
|
|
pub shared_mem_per_block: usize,
|
|
pub regs_per_block: i32,
|
|
pub warp_size: i32,
|
|
pub max_threads_per_block: i32,
|
|
pub max_threads_dim: [i32; 3],
|
|
pub max_grid_size: [i32; 3],
|
|
pub clock_rate: i32,
|
|
pub total_const_mem: usize,
|
|
pub major: i32,
|
|
pub minor: i32,
|
|
// There are many more fields; we only read up to what we need.
|
|
// cudaDeviceProp is a large struct (~1KB). We pad the rest.
|
|
_pad: [u8; 4096],
|
|
}
|
|
|
|
unsafe extern "C" {
|
|
// --- Device ---
|
|
pub fn cudaGetDeviceCount(count: *mut i32) -> i32;
|
|
pub fn cudaSetDevice(device: i32) -> i32;
|
|
pub fn cudaGetDevice(device: *mut i32) -> i32;
|
|
pub fn cudaGetDeviceProperties(prop: *mut CudaDeviceProp, device: i32) -> i32;
|
|
pub fn cudaDeviceSynchronize() -> i32;
|
|
|
|
// --- Memory ---
|
|
pub fn cudaMalloc(devptr: *mut *mut u8, size: usize) -> i32;
|
|
pub fn cudaFree(devptr: *mut u8) -> i32;
|
|
pub fn cudaMallocHost(ptr: *mut *mut u8, size: usize) -> i32;
|
|
pub fn cudaFreeHost(ptr: *mut u8) -> i32;
|
|
pub fn cudaMemcpy(dst: *mut u8, src: *const u8, count: usize, kind: i32) -> i32;
|
|
pub fn cudaMemcpyAsync(
|
|
dst: *mut u8,
|
|
src: *const u8,
|
|
count: usize,
|
|
kind: i32,
|
|
stream: CudaStream,
|
|
) -> i32;
|
|
pub fn cudaMemset(devptr: *mut u8, value: i32, count: usize) -> i32;
|
|
|
|
// --- Stream ---
|
|
pub fn cudaStreamCreate(stream: *mut CudaStream) -> i32;
|
|
pub fn cudaStreamDestroy(stream: CudaStream) -> i32;
|
|
pub fn cudaStreamSynchronize(stream: CudaStream) -> i32;
|
|
|
|
// --- Error ---
|
|
pub fn cudaGetLastError() -> i32;
|
|
pub fn cudaGetErrorString(error: i32) -> *const c_char;
|
|
|
|
// --- Our test kernel ---
|
|
pub fn launch_vecadd_f32(
|
|
a: *const f32,
|
|
b: *const f32,
|
|
c: *mut f32,
|
|
n: i32,
|
|
stream: CudaStream,
|
|
);
|
|
}
|