Files
xserv/crates/xserv-cuda/src/ffi.rs
Gahow Wang c8f7bc0c3c phase 0+1: fix Rust 2024 edition compat + memory query
- unsafe extern "C" blocks (Rust 2024 requirement)
- unsafe blocks inside unsafe fn bodies
- Use cudaMemGetInfo for accurate GPU memory reporting
- Remove cc "cuda" feature (doesn't exist, built-in)
- All 12 tests pass on RTX 5090 (CC 12.0, 170 SMs, 32GB)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-21 19:40:49 +08:00

74 lines
2.2 KiB
Rust

use std::ffi::c_void;
use std::os::raw::c_char;
pub type CudaStream = *mut c_void;
pub type CudaEvent = *mut c_void;
pub const CUDA_MEMCPY_H2D: i32 = 1;
pub const CUDA_MEMCPY_D2H: i32 = 2;
pub const CUDA_MEMCPY_D2D: i32 = 3;
pub const CUDA_SUCCESS: i32 = 0;
pub const CUDA_ERROR_OUT_OF_MEMORY: i32 = 2;
#[repr(C)]
pub struct CudaDeviceProp {
pub name: [c_char; 256],
pub total_global_mem: usize,
pub shared_mem_per_block: usize,
pub regs_per_block: i32,
pub warp_size: i32,
pub max_threads_per_block: i32,
pub max_threads_dim: [i32; 3],
pub max_grid_size: [i32; 3],
pub clock_rate: i32,
pub total_const_mem: usize,
pub major: i32,
pub minor: i32,
// There are many more fields; we only read up to what we need.
// cudaDeviceProp is a large struct (~1KB). We pad the rest.
_pad: [u8; 4096],
}
unsafe extern "C" {
// --- Device ---
pub fn cudaGetDeviceCount(count: *mut i32) -> i32;
pub fn cudaSetDevice(device: i32) -> i32;
pub fn cudaGetDevice(device: *mut i32) -> i32;
pub fn cudaGetDeviceProperties(prop: *mut CudaDeviceProp, device: i32) -> i32;
pub fn cudaDeviceSynchronize() -> i32;
// --- Memory ---
pub fn cudaMalloc(devptr: *mut *mut u8, size: usize) -> i32;
pub fn cudaFree(devptr: *mut u8) -> i32;
pub fn cudaMallocHost(ptr: *mut *mut u8, size: usize) -> i32;
pub fn cudaFreeHost(ptr: *mut u8) -> i32;
pub fn cudaMemcpy(dst: *mut u8, src: *const u8, count: usize, kind: i32) -> i32;
pub fn cudaMemcpyAsync(
dst: *mut u8,
src: *const u8,
count: usize,
kind: i32,
stream: CudaStream,
) -> i32;
pub fn cudaMemset(devptr: *mut u8, value: i32, count: usize) -> i32;
// --- Stream ---
pub fn cudaStreamCreate(stream: *mut CudaStream) -> i32;
pub fn cudaStreamDestroy(stream: CudaStream) -> i32;
pub fn cudaStreamSynchronize(stream: CudaStream) -> i32;
// --- Error ---
pub fn cudaGetLastError() -> i32;
pub fn cudaGetErrorString(error: i32) -> *const c_char;
// --- Our test kernel ---
pub fn launch_vecadd_f32(
a: *const f32,
b: *const f32,
c: *mut f32,
n: i32,
stream: CudaStream,
);
}