Initial project scaffold
This commit is contained in:
40
kernels/cuda/src/tiled_matmul.cu
Normal file
40
kernels/cuda/src/tiled_matmul.cu
Normal file
@@ -0,0 +1,40 @@
|
||||
#include "../include/common.h"
|
||||
#include "../include/cuda_utils.h"
|
||||
|
||||
namespace kernel_lab {
|
||||
|
||||
__global__ void tiled_matmul_kernel(
|
||||
const float* a,
|
||||
const float* b,
|
||||
float* c,
|
||||
int64_t m,
|
||||
int64_t n,
|
||||
int64_t k) {
|
||||
// TODO(student): map blockIdx/threadIdx to a C tile.
|
||||
// TODO(student): cooperatively load A and B tiles into shared memory.
|
||||
// TODO(student): accumulate partial products across the K dimension.
|
||||
(void)a;
|
||||
(void)b;
|
||||
(void)c;
|
||||
(void)m;
|
||||
(void)n;
|
||||
(void)k;
|
||||
}
|
||||
|
||||
torch::Tensor tiled_matmul_cuda(torch::Tensor a, torch::Tensor b) {
|
||||
LAB_CHECK_CUDA(a);
|
||||
LAB_CHECK_CUDA(b);
|
||||
LAB_CHECK_CONTIGUOUS(a);
|
||||
LAB_CHECK_CONTIGUOUS(b);
|
||||
TORCH_CHECK(a.dim() == 2 && b.dim() == 2, "tiled_matmul_cuda expects 2D tensors");
|
||||
TORCH_CHECK(a.size(1) == b.size(0), "inner dimensions must match");
|
||||
TORCH_CHECK(a.scalar_type() == torch::kFloat32, "tiled_matmul_cuda currently assumes float32");
|
||||
TORCH_CHECK(b.scalar_type() == torch::kFloat32, "tiled_matmul_cuda currently assumes float32");
|
||||
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"TODO(student): implement tiled_matmul_cuda in kernels/cuda/src/tiled_matmul.cu.");
|
||||
return torch::Tensor();
|
||||
}
|
||||
|
||||
} // namespace kernel_lab
|
||||
Reference in New Issue
Block a user