Files
xtrain/csrc/ops/elementwise.cu
Gahow Wang 63dc05fd10 tensor: add scale elementwise CUDA kernel + FFI
New csrc/ops/elementwise.cu (out[i]=in[i]*alpha), compiled by
xtrain-cuda/build.rs and exposed via launch_scale_f32 FFI, gated behind
not(no_cuda) like the existing vecadd smoke test.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-15 15:13:06 +08:00

18 lines
499 B
Plaintext

extern "C" {
// out[i] = in[i] * alpha (in-place safe: out may alias in)
__global__ void scale_f32(const float* in, float* out, float alpha, int n) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < n) {
out[idx] = in[idx] * alpha;
}
}
void launch_scale_f32(const float* in, float* out, float alpha, int n, void* stream) {
int block = 256;
int grid = (n + block - 1) / block;
scale_f32<<<grid, block, 0, (cudaStream_t)stream>>>(in, out, alpha, n);
}
}