Initial project scaffold

2026-04-10 13:15:06 +00:00
commit a4a6b1f1c8
94 changed files with 3964 additions and 0 deletions
--- a/kernels/triton/vector_add.py
+++ b/kernels/triton/vector_add.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+import torch
+
+try:
+    import triton
+    import triton.language as tl
+except ImportError:  # pragma: no cover - depends on local environment
+    triton = None
+    tl = None
+
+
+TRITON_AVAILABLE = triton is not None
+
+
+if TRITON_AVAILABLE:
+
+    @triton.jit
+    def vector_add_kernel(
+        x_ptr,
+        y_ptr,
+        out_ptr,
+        num_elements,
+        block_size: tl.constexpr,
+    ):
+        pid = tl.program_id(axis=0)
+        offsets = pid * block_size + tl.arange(0, block_size)
+        mask = offsets < num_elements
+        # TODO(student): load x and y using masked tl.load calls.
+        # TODO(student): add the vectors.
+        # TODO(student): write the result with tl.store.
+        pass
+
+
+def triton_vector_add(x: torch.Tensor, y: torch.Tensor, block_size: int = 1024) -> torch.Tensor:
+    """Student entrypoint for the Triton vector add task."""
+    if not TRITON_AVAILABLE:
+        raise RuntimeError("Triton is not installed in this environment.")
+    if x.shape != y.shape:
+        raise ValueError(f"shape mismatch: {x.shape} vs {y.shape}")
+    if not x.is_cuda or not y.is_cuda:
+        raise ValueError("Triton kernels in this lab expect CUDA tensors.")
+    raise NotImplementedError("TODO(student): launch vector_add_kernel and return the output tensor.")
+