Initial project scaffold
This commit is contained in:
44
kernels/triton/vector_add.py
Normal file
44
kernels/triton/vector_add.py
Normal file
@@ -0,0 +1,44 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import torch
|
||||
|
||||
try:
|
||||
import triton
|
||||
import triton.language as tl
|
||||
except ImportError: # pragma: no cover - depends on local environment
|
||||
triton = None
|
||||
tl = None
|
||||
|
||||
|
||||
TRITON_AVAILABLE = triton is not None
|
||||
|
||||
|
||||
if TRITON_AVAILABLE:
|
||||
|
||||
@triton.jit
|
||||
def vector_add_kernel(
|
||||
x_ptr,
|
||||
y_ptr,
|
||||
out_ptr,
|
||||
num_elements,
|
||||
block_size: tl.constexpr,
|
||||
):
|
||||
pid = tl.program_id(axis=0)
|
||||
offsets = pid * block_size + tl.arange(0, block_size)
|
||||
mask = offsets < num_elements
|
||||
# TODO(student): load x and y using masked tl.load calls.
|
||||
# TODO(student): add the vectors.
|
||||
# TODO(student): write the result with tl.store.
|
||||
pass
|
||||
|
||||
|
||||
def triton_vector_add(x: torch.Tensor, y: torch.Tensor, block_size: int = 1024) -> torch.Tensor:
|
||||
"""Student entrypoint for the Triton vector add task."""
|
||||
if not TRITON_AVAILABLE:
|
||||
raise RuntimeError("Triton is not installed in this environment.")
|
||||
if x.shape != y.shape:
|
||||
raise ValueError(f"shape mismatch: {x.shape} vs {y.shape}")
|
||||
if not x.is_cuda or not y.is_cuda:
|
||||
raise ValueError("Triton kernels in this lab expect CUDA tensors.")
|
||||
raise NotImplementedError("TODO(student): launch vector_add_kernel and return the output tensor.")
|
||||
|
||||
Reference in New Issue
Block a user