cmake_minimum_required(VERSION 3.25) project(kernel_lab LANGUAGES CXX CUDA) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CUDA_STANDARD 17) set(CMAKE_POSITION_INDEPENDENT_CODE ON) if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) set(CMAKE_CUDA_ARCHITECTURES 120 CACHE STRING "Target CUDA architectures") endif() find_package(Torch REQUIRED) add_library(kernel_lab_extension SHARED binding/binding.cpp src/vector_add.cu src/row_softmax.cu src/tiled_matmul.cu src/online_softmax.cu src/flash_attention_fwd.cu ) target_include_directories(kernel_lab_extension PRIVATE include) target_link_libraries(kernel_lab_extension PRIVATE "${TORCH_LIBRARIES}") target_compile_features(kernel_lab_extension PRIVATE cxx_std_17) set_target_properties(kernel_lab_extension PROPERTIES PREFIX "" CUDA_SEPARABLE_COMPILATION ON )