phase 0+1: project scaffold + xserv-cuda crate
- Cargo workspace with xserv-cuda crate - CUDA FFI bindings (cudart: memory, stream, device, error) - GpuBuffer RAII wrapper with H2D/D2H/D2D copy - CudaStream wrapper with RAII Drop - CachingAllocator with size-bucketed free lists - PinnedBuffer for page-locked host memory - Device info query via cudaDeviceGetAttribute - Vector-add CUDA kernel smoke test - Integration test suite (11 tests) - build.rs: cc crate compiles .cu for SM 12.0 - sync-and-build.sh for remote build on dash5 - Roadmap doc (docs/00-roadmap.md) and Phase 0+1 design doc Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
16
csrc/test/vecadd.cu
Normal file
16
csrc/test/vecadd.cu
Normal file
@@ -0,0 +1,16 @@
|
||||
extern "C" {
|
||||
|
||||
__global__ void vecadd_f32(const float* a, const float* b, float* c, int n) {
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < n) {
|
||||
c[idx] = a[idx] + b[idx];
|
||||
}
|
||||
}
|
||||
|
||||
void launch_vecadd_f32(const float* a, const float* b, float* c, int n, void* stream) {
|
||||
int block = 256;
|
||||
int grid = (n + block - 1) / block;
|
||||
vecadd_f32<<<grid, block, 0, (cudaStream_t)stream>>>(a, b, c, n);
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user