1cmake_minimum_required(VERSION 3.18 FATAL_ERROR) 2set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH}) 3 4if(NOT MSVC) 5 string(APPEND CMAKE_CXX_FLAGS " -Wno-ignored-qualifiers") 6 string(APPEND CMAKE_C_FLAGS " -Wno-ignored-qualifiers") 7 string(APPEND CMAKE_CXX_FLAGS " -Wno-absolute-value") 8 string(APPEND CMAKE_C_FLAGS " -Wno-absolute-value") 9endif(NOT MSVC) 10 11# Can be compiled standalone 12if(NOT AT_INSTALL_BIN_DIR OR NOT AT_INSTALL_LIB_DIR OR NOT AT_INSTALL_INCLUDE_DIR OR NOT AT_INSTALL_SHARE_DIR) 13 set(AT_INSTALL_BIN_DIR "bin" CACHE PATH "AT install binary subdirectory") 14 set(AT_INSTALL_LIB_DIR "lib" CACHE PATH "AT install library subdirectory") 15 set(AT_INSTALL_INCLUDE_DIR "include" CACHE PATH "AT install include subdirectory") 16 set(AT_INSTALL_SHARE_DIR "share" CACHE PATH "AT install include subdirectory") 17endif() 18 19# These flag are used in Config but set externally. We must normalize them to 20# 0/1 otherwise `#if ON` will be evaluated to false. 21macro(set_bool OUT IN) 22 if(${IN}) 23 set(${OUT} 1) 24 else() 25 set(${OUT} 0) 26 endif() 27endmacro() 28 29set_bool(AT_BUILD_WITH_BLAS USE_BLAS) 30set_bool(AT_BUILD_WITH_LAPACK USE_LAPACK) 31set_bool(AT_BLAS_F2C BLAS_F2C) 32set_bool(AT_BLAS_USE_CBLAS_DOT BLAS_USE_CBLAS_DOT) 33set_bool(AT_MAGMA_ENABLED USE_MAGMA) 34set_bool(CAFFE2_STATIC_LINK_CUDA_INT CAFFE2_STATIC_LINK_CUDA) 35set_bool(AT_CUDNN_ENABLED CAFFE2_USE_CUDNN) 36set_bool(AT_CUSPARSELT_ENABLED CAFFE2_USE_CUSPARSELT) 37 38configure_file(Config.h.in "${CMAKE_CURRENT_SOURCE_DIR}/Config.h") 39# TODO: Do not generate CUDAConfig.h for ROCm BUILDS 40# At the moment, `jit_macors.h` include CUDAConfig.h for both CUDA and HIP builds 41if(USE_CUDA OR USE_ROCM) 42 configure_file(cuda/CUDAConfig.h.in "${CMAKE_CURRENT_SOURCE_DIR}/cuda/CUDAConfig.h") 43endif() 44if(USE_ROCM) 45 configure_file(hip/HIPConfig.h.in "${CMAKE_CURRENT_SOURCE_DIR}/hip/HIPConfig.h") 46endif() 47 48# NB: If you edit these globs, you'll have to update setup.py package_data as well 49file(GLOB_RECURSE ATen_CORE_HEADERS "core/*.h") 50file(GLOB_RECURSE ATen_CORE_SRCS "core/*.cpp") 51file(GLOB_RECURSE ATen_TRANSFORMER_HEADERS "native/transformers/*.h") 52if(NOT BUILD_LITE_INTERPRETER) 53 file(GLOB_RECURSE ATen_CORE_TEST_SRCS "core/*_test.cpp") 54endif() 55EXCLUDE(ATen_CORE_SRCS "${ATen_CORE_SRCS}" ${ATen_CORE_TEST_SRCS}) 56 57file(GLOB base_h "*.h" "detail/*.h" "cpu/*.h" "cpu/vec/vec512/*.h" "cpu/vec/vec256/*.h" "cpu/vec/vec256/vsx/*.h" "cpu/vec/vec256/zarch/*.h" "cpu/vec/*.h" "quantized/*.h" "functorch/*.h") 58file(GLOB base_cpp "*.cpp" "detail/*.cpp" "cpu/*.cpp" "functorch/*.cpp") 59file(GLOB cuda_h "cuda/*.h" "cuda/detail/*.h" "cuda/*.cuh" "cuda/detail/*.cuh" "cuda/tunable/*.cuh" "cuda/tunable/*.h") 60file(GLOB cuda_cpp "cuda/*.cpp" "cuda/detail/*.cpp" "cuda/tunable/*.cpp") 61file(GLOB cuda_nvrtc_stub_h "cuda/nvrtc_stub/*.h") 62file(GLOB cuda_nvrtc_stub_cpp "cuda/nvrtc_stub/*.cpp") 63file(GLOB cuda_cu "cuda/*.cu" "cuda/detail/*.cu" "cuda/tunable/*.cu") 64file(GLOB cudnn_h "cudnn/*.h" "cudnn/*.cuh") 65file(GLOB cudnn_cpp "cudnn/*.cpp") 66file(GLOB ops_h "ops/*.h") 67 68file(GLOB xpu_h "xpu/*.h" "xpu/detail/*.h") 69file(GLOB xpu_cpp "xpu/*.cpp" "xpu/detail/*.cpp") 70 71file(GLOB hip_h "hip/*.h" "hip/detail/*.h" "hip/*.cuh" "hip/detail/*.cuh" "hip/impl/*.h" "hip/tunable/*.cuh" "hip/tunable/*.h") 72file(GLOB hip_cpp "hip/*.cpp" "hip/detail/*.cpp" "hip/impl/*.cpp" "hip/tunable/*.cpp") 73list(REMOVE_ITEM hip_cpp "${CMAKE_CURRENT_SOURCE_DIR}/hip/detail/LazyNVRTC.cpp") 74file(GLOB hip_hip "hip/*.hip" "hip/detail/*.hip" "hip/impl/*.hip" "hip/tunable/*.hip") 75file(GLOB hip_nvrtc_stub_h "hip/nvrtc_stub/*.h") 76file(GLOB hip_nvrtc_stub_cpp "hip/nvrtc_stub/*.cpp") 77file(GLOB miopen_h "miopen/*.h") 78file(GLOB miopen_cpp "miopen/*.cpp") 79 80file(GLOB mkl_cpp "mkl/*.cpp") 81file(GLOB mkldnn_cpp "mkldnn/*.cpp") 82 83file(GLOB mkldnn_xpu_cpp "native/mkldnn/xpu/*.cpp" "native/mkldnn/xpu/detail/*.cpp") 84 85file(GLOB native_cpp "native/*.cpp") 86file(GLOB native_mkl_cpp "native/mkl/*.cpp") 87file(GLOB native_mkldnn_cpp "native/mkldnn/*.cpp") 88file(GLOB vulkan_cpp "vulkan/*.cpp") 89file(GLOB native_vulkan_cpp "native/vulkan/*.cpp" "native/vulkan/api/*.cpp" "native/vulkan/impl/*.cpp" "native/vulkan/ops/*.cpp") 90 91# Metal 92file(GLOB metal_h "metal/*.h") 93file(GLOB metal_cpp "metal/*.cpp") 94file(GLOB_RECURSE native_metal_h "native/metal/*.h") 95file(GLOB metal_test_srcs "native/metal/mpscnn/tests/*.mm") 96file(GLOB_RECURSE native_metal_srcs "native/metal/*.mm" "native/metal/*.cpp") 97EXCLUDE(native_metal_srcs "${native_metal_srcs}" ${metal_test_srcs}) 98file(GLOB metal_prepack_h "native/metal/MetalPrepackOpContext.h") 99file(GLOB metal_prepack_cpp "native/metal/MetalPrepackOpRegister.cpp") 100 101file(GLOB native_ao_sparse_cpp 102 "native/ao_sparse/*.cpp" 103 "native/ao_sparse/cpu/*.cpp" 104 "native/ao_sparse/quantized/*.cpp" 105 "native/ao_sparse/quantized/cpu/*.cpp") 106# MPS 107file(GLOB mps_cpp "mps/*.cpp") 108file(GLOB mps_mm "mps/*.mm") 109file(GLOB mps_h "mps/*.h") 110file(GLOB_RECURSE native_mps_cpp "native/mps/*.cpp") 111file(GLOB_RECURSE native_mps_mm "native/mps/*.mm") 112file(GLOB_RECURSE native_mps_h "native/mps/*.h") 113 114file(GLOB native_sparse_cpp "native/sparse/*.cpp") 115file(GLOB native_quantized_cpp 116 "native/quantized/*.cpp" 117 "native/quantized/cpu/*.cpp") 118file(GLOB native_nested_cpp "native/nested/*.cpp") 119file(GLOB native_transformers_cpp "native/transformers/*.cpp") 120 121file(GLOB native_h "native/*.h") 122file(GLOB native_ao_sparse_h 123 "native/ao_sparse/*.h" 124 "native/ao_sparse/cpu/*.h" 125 "native/ao_sparse/quantized/*.h" 126 "native/ao_sparse/quantized/cpu/*.h") 127file(GLOB native_quantized_h "native/quantized/*.h" "native/quantized/cpu/*.h" "native/quantized/cudnn/*.h") 128file(GLOB native_cpu_h "native/cpu/*.h") 129file(GLOB native_utils_h "native/utils/*.h") 130 131file(GLOB native_cuda_cu "native/cuda/*.cu") 132file(GLOB native_cuda_cpp "native/cuda/*.cpp") 133file(GLOB native_cuda_h "native/cuda/*.h" "native/cuda/*.cuh") 134file(GLOB native_cuda_linalg_cpp "native/cuda/linalg/*.cpp") 135file(GLOB native_hip_h "native/hip/*.h" "native/hip/*.cuh") 136file(GLOB native_cudnn_cpp "native/cudnn/*.cpp") 137file(GLOB native_sparse_cuda_cu "native/sparse/cuda/*.cu") 138file(GLOB native_sparse_cuda_cpp "native/sparse/cuda/*.cpp") 139file(GLOB native_quantized_cuda_cu "native/quantized/cuda/*.cu") 140file(GLOB native_quantized_cuda_cpp "native/quantized/cuda/*.cpp") 141file(GLOB native_quantized_cudnn_cpp "native/quantized/cudnn/*.cpp") 142file(GLOB native_nested_h "native/nested/*.h") 143file(GLOB native_nested_cuda_cu "native/nested/cuda/*.cu") 144file(GLOB native_nested_cuda_cpp "native/nested/cuda/*.cpp") 145 146file(GLOB native_hip_hip "native/hip/*.hip") 147file(GLOB native_hip_cpp "native/hip/*.cpp") 148file(GLOB native_hip_linalg_cpp "native/hip/linalg/*.cpp") 149file(GLOB native_miopen_cpp "native/miopen/*.cpp") 150file(GLOB native_cudnn_hip_cpp "native/cudnn/hip/*.cpp") 151file(GLOB native_nested_hip_hip "native/nested/hip/*.hip") 152file(GLOB native_nested_hip_cpp "native/nested/hip/*.cpp") 153file(GLOB native_sparse_hip_hip "native/sparse/hip/*.hip") 154file(GLOB native_sparse_hip_cpp "native/sparse/hip/*.cpp") 155file(GLOB native_quantized_hip_hip "native/quantized/hip/*.hip") 156file(GLOB native_quantized_hip_cpp "native/quantized/hip/*.cpp") 157file(GLOB native_transformers_cuda_cu "native/transformers/cuda/*.cu") 158file(GLOB native_transformers_cuda_cpp "native/transformers/cuda/*.cpp") 159file(GLOB native_transformers_hip_hip "native/transformers/hip/*.hip") 160file(GLOB native_transformers_hip_cpp "native/transformers/hip/*.cpp") 161file(GLOB native_quantized_cudnn_hip_cpp "native/quantized/cudnn/hip/*.cpp") 162file(GLOB native_utils_cpp "native/utils/*.cpp") 163 164# flash_attention sources 165file(GLOB flash_attention_cuda_cu "native/transformers/cuda/flash_attn/*.cu") 166file(GLOB flash_attention_cuda_kernels_cu "native/transformers/cuda/flash_attn/kernels/*.cu") 167file(GLOB flash_attention_cuda_cpp "native/transformers/cuda/flash_attn/*.cpp") 168 169# flash_attention sources 170file(GLOB flash_attention_hip_hip "native/transformers/hip/flash_attn/*.hip") 171file(GLOB flash_attention_src_hip_hip "native/transformers/hip/flash_attn/src/*.hip") 172 173#Mem_eff attention sources 174file(GLOB mem_eff_attention_cuda_cu "native/transformers/cuda/mem_eff_attention/*.cu") 175file(GLOB mem_eff_attention_cuda_kernels_cu "native/transformers/cuda/mem_eff_attention/kernels/*.cu") 176file(GLOB mem_eff_attention_cuda_cpp "native/transformers/cuda/mem_eff_attention/*.cpp") 177 178if(USE_FLASH_ATTENTION) 179 list(APPEND native_transformers_cuda_cu ${flash_attention_cuda_cu}) 180 list(APPEND native_transformers_cuda_cu ${flash_attention_cuda_kernels_cu}) 181 list(APPEND native_transformers_cuda_cpp ${flash_attention_cuda_cpp}) 182 list(APPEND FLASH_ATTENTION_CUDA_SOURCES ${flash_attention_cuda_cu} ${flash_attention_cuda_kernels_cu}) 183 list(APPEND ATen_ATTENTION_KERNEL_SRCS ${flash_attention_cuda_kernels_cu}) 184 185 list(APPEND native_transformers_hip_hip ${flash_attention_hip_hip}) 186 list(APPEND native_transformers_src_hip_hip ${flash_attention_src_hip_hip}) 187endif() 188 189if(USE_MEM_EFF_ATTENTION) 190 list(APPEND native_transformers_cuda_cu ${mem_eff_attention_cuda_cu}) 191 list(APPEND native_transformers_cuda_cu ${mem_eff_attention_cuda_kernels_cu}) 192 list(APPEND native_transformers_cuda_cpp ${mem_eff_attention_cuda_cpp}) 193 list(APPEND MEM_EFF_ATTENTION_CUDA_SOURCES ${native_transformers_cuda_cu} ${mem_eff_attention_cuda_cu} ${mem_eff_attention_cuda_kernels_cu}) 194 list(APPEND ATen_ATTENTION_KERNEL_SRCS ${mem_eff_attention_cuda_kernels_cu}) 195endif() 196 197# XNNPACK 198file(GLOB native_xnnpack "native/xnnpack/*.cpp") 199 200# Add files needed from jit folders 201append_filelist("jit_core_headers" ATen_CORE_HEADERS) 202append_filelist("jit_core_sources" ATen_CORE_SRCS) 203 204add_subdirectory(quantized) 205add_subdirectory(nnapi) 206 207if(BUILD_LITE_INTERPRETER) 208 set(all_cpu_cpp ${generated_sources} ${core_generated_sources} ${cpu_kernel_cpp}) 209 append_filelist("jit_core_sources" all_cpu_cpp) 210 append_filelist("aten_cpu_source_non_codegen_list" all_cpu_cpp) 211 append_filelist("aten_native_source_non_codegen_list" all_cpu_cpp) 212else() 213 set( 214 all_cpu_cpp ${base_cpp} ${ATen_CORE_SRCS} ${native_cpp} 215 ${native_ao_sparse_cpp} ${native_sparse_cpp} ${native_nested_cpp} 216 ${native_quantized_cpp} ${native_mkl_cpp} ${native_mkldnn_cpp} 217 ${native_transformers_cpp} 218 ${native_utils_cpp} ${native_xnnpack} ${generated_sources} ${core_generated_sources} 219 ${ATen_CPU_SRCS} ${ATen_QUANTIZED_SRCS} ${ATen_NNAPI_SRCS} ${cpu_kernel_cpp} 220 ) 221endif() 222 223if(USE_LIGHTWEIGHT_DISPATCH) 224 set(all_cpu_cpp ${all_cpu_cpp} ${generated_unboxing_sources}) 225endif() 226if(AT_MKL_ENABLED) 227 set(all_cpu_cpp ${all_cpu_cpp} ${mkl_cpp}) 228endif() 229if(AT_MKLDNN_ENABLED) 230 set(all_cpu_cpp ${all_cpu_cpp} ${mkldnn_cpp}) 231endif() 232if(USE_VULKAN) 233 set(all_cpu_cpp ${all_cpu_cpp} ${vulkan_cpp} ${native_vulkan_cpp} ${vulkan_generated_cpp}) 234else() 235 set(all_cpu_cpp ${all_cpu_cpp} ${vulkan_cpp}) 236endif() 237 238if(USE_XPU) 239 list(APPEND ATen_XPU_SRCS ${mkldnn_xpu_cpp}) 240 list(APPEND ATen_XPU_DEPENDENCY_LIBS xpu_mkldnn) 241 242 list(APPEND ATen_XPU_DEPENDENCY_LIBS ${OCL_LIBRARY}) 243 list(APPEND ATen_XPU_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/native/mkldnn/xpu) 244 list(APPEND ATen_XPU_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/native/mkldnn/xpu/detail) 245 list(APPEND ATen_XPU_INCLUDE ${PROJECT_SOURCE_DIR}/third_party/ideep/mkl-dnn/include) 246 list(APPEND ATen_XPU_INCLUDE ${XPU_MKLDNN_INCLUDE}) 247 248 list(APPEND ATen_XPU_INCLUDE ${SYCL_INCLUDE_DIR}) 249 list(APPEND ATen_XPU_DEPENDENCY_LIBS ${SYCL_LIBRARY}) 250endif() 251 252# Metal 253if(USE_PYTORCH_METAL_EXPORT) 254 # Add files needed from exporting metal models(optimized_for_mobile) 255 set(all_cpu_cpp ${all_cpu_cpp} ${metal_cpp} ${metal_prepack_cpp}) 256elseif(APPLE AND USE_PYTORCH_METAL) 257 # Compile Metal kernels 258 set(all_cpu_cpp ${all_cpu_cpp} ${metal_cpp} ${native_metal_srcs}) 259else() 260 set(all_cpu_cpp ${all_cpu_cpp} ${metal_cpp}) 261endif() 262 263if(USE_CUDA AND USE_ROCM) 264 message(FATAL_ERROR "ATen doesn't not currently support simultaneously building with CUDA and ROCM") 265endif() 266 267if(USE_CUDA) 268 list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/cuda) 269 list(APPEND ATen_CUDA_CU_SRCS 270 ${cuda_cu} 271 ${native_cuda_cu} 272 ${native_nested_cuda_cu} 273 ${native_sparse_cuda_cu} 274 ${native_quantized_cuda_cu} 275 ${native_transformers_cuda_cu} 276 ${cuda_generated_sources} 277 ) 278 list(APPEND ATen_CUDA_CPP_SRCS 279 ${cuda_cpp} 280 ${native_cuda_cpp} 281 ${native_cudnn_cpp} 282 ${native_miopen_cpp} 283 ${native_nested_cuda_cpp} 284 ${native_quantized_cuda_cpp} 285 ${native_quantized_cudnn_cpp} 286 ${native_sparse_cuda_cpp} 287 ${native_transformers_cuda_cpp} 288 ) 289 set(ATen_CUDA_LINALG_SRCS ${native_cuda_linalg_cpp}) 290 if(NOT BUILD_LAZY_CUDA_LINALG) 291 list(APPEND ATen_CUDA_CU_SRCS ${native_cuda_linalg_cpp}) 292 endif() 293 if(CAFFE2_USE_CUDNN) 294 list(APPEND ATen_CUDA_CPP_SRCS ${cudnn_cpp}) 295 endif() 296 297 append_filelist("aten_cuda_cu_source_list" ATen_CUDA_CU_SRCS) 298 append_filelist("aten_cuda_with_sort_by_key_source_list" ATen_CUDA_SRCS_W_SORT_BY_KEY) 299 append_filelist("aten_cuda_cu_with_sort_by_key_source_list" ATen_CUDA_CU_SRCS_W_SORT_BY_KEY) 300 301 exclude(ATen_CUDA_CPP_SRCS "${ATen_CUDA_CPP_SRCS}" 302 ${ATen_CUDA_CU_SRCS} 303 ${ATen_CUDA_SRCS_W_SORT_BY_KEY} ${ATen_CUDA_CU_SRCS_W_SORT_BY_KEY}) 304 exclude(ATen_CUDA_CU_SRCS "${ATen_CUDA_CU_SRCS}" 305 ${ATen_CUDA_SRCS_W_SORT_BY_KEY} ${ATen_CUDA_CU_SRCS_W_SORT_BY_KEY}) 306endif() 307 308if(USE_ROCM) 309 list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/hip) 310 list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include) 311 list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include) 312 list(APPEND ATen_HIP_SRCS 313 ${ATen_HIP_SRCS} 314 ${hip_hip} 315 ${native_hip_hip} 316 ${native_nested_hip_hip} 317 ${native_sparse_hip_hip} 318 ${native_quantized_hip_hip} 319 ${native_transformers_hip_hip} ${native_transformers_src_hip_hip} 320 ) 321 # TODO: Codegen separate files for HIP and use those (s/cuda_generated_sources/hip_generated_sources) 322 list(APPEND all_hip_cpp 323 ${native_nested_hip_cpp} 324 ${native_sparse_hip_cpp} 325 ${native_quantized_hip_cpp} 326 ${native_transformers_hip_cpp} 327 ${native_quantized_cudnn_hip_cpp} 328 ${hip_cpp} 329 ${native_hip_cpp} 330 ${native_hip_linalg_cpp} 331 ${cuda_generated_sources} 332 ${ATen_HIP_SRCS} 333 ${native_miopen_cpp} 334 ${native_cudnn_hip_cpp} 335 ${miopen_cpp} 336 ${all_hip_cpp} 337 ) 338endif() 339 340if(USE_XPU) 341 list(APPEND ATen_XPU_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/xpu) 342 list(APPEND ATen_XPU_SRCS ${xpu_cpp}) 343endif() 344 345list(APPEND ATen_CPU_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/..) 346 347if(BLAS_FOUND) 348 if($ENV{TH_BINARY_BUILD}) 349 message(STATUS "TH_BINARY_BUILD detected. Enabling special linkage.") 350 list(APPEND ATen_CPU_DEPENDENCY_LIBS 351 "${BLAS_LIBRARIES};${BLAS_LIBRARIES};${BLAS_LIBRARIES}") 352 else($ENV{TH_BINARY_BUILD}) 353 list(APPEND ATen_CPU_DEPENDENCY_LIBS ${BLAS_LIBRARIES}) 354 endif($ENV{TH_BINARY_BUILD}) 355endif(BLAS_FOUND) 356 357if(LAPACK_FOUND) 358 list(APPEND ATen_CPU_DEPENDENCY_LIBS ${LAPACK_LIBRARIES}) 359 if(USE_CUDA AND MSVC) 360 # Although Lapack provides CPU (and thus, one might expect that ATen_cuda 361 # would not need this at all), some of our libraries (magma in particular) 362 # backend to CPU BLAS/LAPACK implementations, and so it is very important 363 # we get the *right* implementation, because even if the symbols are the 364 # same, LAPACK implementions may have different calling conventions. 365 # This caused https://github.com/pytorch/pytorch/issues/7353 366 # 367 # We do NOT do this on Linux, since we just rely on torch_cpu to 368 # provide all of the symbols we need 369 list(APPEND ATen_CUDA_DEPENDENCY_LIBS ${LAPACK_LIBRARIES}) 370 endif() 371endif(LAPACK_FOUND) 372 373if(UNIX AND NOT APPLE) 374 include(CheckLibraryExists) 375 # https://github.com/libgit2/libgit2/issues/2128#issuecomment-35649830 376 CHECK_LIBRARY_EXISTS(rt clock_gettime "time.h" NEED_LIBRT) 377 if(NEED_LIBRT) 378 list(APPEND ATen_CPU_DEPENDENCY_LIBS rt) 379 set(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES} rt) 380 endif(NEED_LIBRT) 381endif(UNIX AND NOT APPLE) 382 383if(UNIX) 384 include(CheckFunctionExists) 385 set(CMAKE_EXTRA_INCLUDE_FILES "sys/mman.h") 386 CHECK_FUNCTION_EXISTS(mmap HAVE_MMAP) 387 if(HAVE_MMAP) 388 add_definitions(-DHAVE_MMAP=1) 389 endif(HAVE_MMAP) 390 # done for lseek: https://www.gnu.org/software/libc/manual/html_node/File-Position-Primitive.html 391 add_definitions(-D_FILE_OFFSET_BITS=64) 392 CHECK_FUNCTION_EXISTS(shm_open HAVE_SHM_OPEN) 393 if(HAVE_SHM_OPEN) 394 add_definitions(-DHAVE_SHM_OPEN=1) 395 endif(HAVE_SHM_OPEN) 396 CHECK_FUNCTION_EXISTS(shm_unlink HAVE_SHM_UNLINK) 397 if(HAVE_SHM_UNLINK) 398 add_definitions(-DHAVE_SHM_UNLINK=1) 399 endif(HAVE_SHM_UNLINK) 400 CHECK_FUNCTION_EXISTS(malloc_usable_size HAVE_MALLOC_USABLE_SIZE) 401 if(HAVE_MALLOC_USABLE_SIZE) 402 add_definitions(-DHAVE_MALLOC_USABLE_SIZE=1) 403 endif(HAVE_MALLOC_USABLE_SIZE) 404endif(UNIX) 405 406ADD_DEFINITIONS(-DUSE_EXTERNAL_MZCRC) 407 408if(NOT MSVC) 409 list(APPEND ATen_CPU_DEPENDENCY_LIBS m) 410endif() 411 412if(AT_NNPACK_ENABLED) 413 include_directories(${NNPACK_INCLUDE_DIRS}) 414 list(APPEND ATen_CPU_DEPENDENCY_LIBS nnpack) # cpuinfo is added below 415endif() 416 417if(MKLDNN_FOUND) 418 list(APPEND ATen_CPU_DEPENDENCY_LIBS ${MKLDNN_LIBRARIES}) 419endif(MKLDNN_FOUND) 420 421if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(s390x|ppc64le)$") 422 list(APPEND ATen_CPU_DEPENDENCY_LIBS cpuinfo) 423endif() 424 425if(NOT EMSCRIPTEN AND NOT INTERN_BUILD_MOBILE) 426 if(NOT MSVC) 427 # Bump up optimization level for sleef to -O1, since at -O0 the compiler 428 # excessively spills intermediate vector registers to the stack 429 # and makes things run impossibly slowly 430 set(OLD_CMAKE_C_FLAGS_DEBUG ${CMAKE_C_FLAGS_DEBUG}) 431 if(${CMAKE_C_FLAGS_DEBUG} MATCHES "-O0") 432 string(REGEX REPLACE "-O0" "-O1" CMAKE_C_FLAGS_DEBUG ${OLD_CMAKE_C_FLAGS_DEBUG}) 433 else() 434 set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O1") 435 endif() 436 endif() 437 438 if(NOT USE_SYSTEM_SLEEF) 439 set(SLEEF_BUILD_SHARED_LIBS OFF CACHE BOOL "Build sleef static" FORCE) 440 set(SLEEF_BUILD_DFT OFF CACHE BOOL "Don't build sleef DFT lib" FORCE) 441 set(SLEEF_BUILD_GNUABI_LIBS OFF CACHE BOOL "Don't build sleef gnuabi libs" FORCE) 442 set(SLEEF_BUILD_TESTS OFF CACHE BOOL "Don't build sleef tests" FORCE) 443 set(SLEEF_BUILD_SCALAR_LIB OFF CACHE BOOL "libsleefscalar will be built." FORCE) 444 if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") 445 if(CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64" OR CMAKE_OSX_ARCHITECTURES MATCHES "arm64") 446 set(DISABLE_SVE ON CACHE BOOL "Xcode's clang-12.5 crashes while trying to compile SVE code" FORCE) 447 endif() 448 endif() 449 add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/sleef" ${CMAKE_BINARY_DIR}/sleef) 450 set_property(TARGET sleef PROPERTY FOLDER "dependencies") 451 list(APPEND ATen_THIRD_PARTY_INCLUDE ${CMAKE_BINARY_DIR}/include) 452 link_directories(${CMAKE_BINARY_DIR}/sleef/lib) 453 else() 454 add_library(sleef SHARED IMPORTED) 455 find_library(SLEEF_LIBRARY sleef) 456 if(NOT SLEEF_LIBRARY) 457 message(FATAL_ERROR "Cannot find sleef") 458 endif() 459 message("Found sleef: ${SLEEF_LIBRARY}") 460 set_target_properties(sleef PROPERTIES IMPORTED_LOCATION "${SLEEF_LIBRARY}") 461 endif() 462 list(APPEND ATen_CPU_DEPENDENCY_LIBS sleef) 463 464 if(NOT MSVC) 465 set(CMAKE_C_FLAGS_DEBUG ${OLD_CMAKE_C_FLAGS_DEBUG}) 466 endif() 467endif() 468 469if(USE_CUDA AND NOT USE_ROCM) 470 list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/include) 471 list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/tools/util/include) 472 if($ENV{ATEN_STATIC_CUDA}) 473 list(APPEND ATen_CUDA_DEPENDENCY_LIBS 474 ${CUDA_LIBRARIES} 475 CUDA::cusparse_static 476 CUDA::cufft_static_nocallback 477 ) 478 if(NOT BUILD_LAZY_CUDA_LINALG) 479 if(CUDA_VERSION_MAJOR LESS_EQUAL 11) 480 list(APPEND ATen_CUDA_DEPENDENCY_LIBS 481 CUDA::cusolver_static 482 ${CUDAToolkit_LIBRARY_DIR}/liblapack_static.a # needed for libcusolver_static 483 ) 484 elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 12) 485 list(APPEND ATen_CUDA_DEPENDENCY_LIBS 486 CUDA::cusolver_static 487 ${CUDAToolkit_LIBRARY_DIR}/libcusolver_lapack_static.a # needed for libcusolver_static 488 ) 489 endif() 490 endif() 491 else() 492 list(APPEND ATen_CUDA_DEPENDENCY_LIBS 493 ${CUDA_LIBRARIES} 494 CUDA::cusparse 495 CUDA::cufft 496 ) 497 if(NOT BUILD_LAZY_CUDA_LINALG) 498 list(APPEND ATen_CUDA_DEPENDENCY_LIBS 499 CUDA::cusolver 500 ) 501 endif() 502 endif() 503 504 if(CAFFE2_USE_CUDNN) 505 list(APPEND ATen_CUDA_DEPENDENCY_LIBS ${CUDNN_LIBRARIES}) 506 endif() 507 if($ENV{ATEN_STATIC_CUDA}) 508 list(APPEND ATen_CUDA_DEPENDENCY_LIBS 509 CUDA::culibos 510 CUDA::cudart_static 511 ) 512 endif($ENV{ATEN_STATIC_CUDA}) 513endif() 514 515 516 if(USE_MAGMA) 517 if(USE_CUDA AND NOT BUILD_LAZY_CUDA_LINALG) 518 list(APPEND ATen_CUDA_DEPENDENCY_LIBS torch::magma) 519 endif(USE_CUDA AND NOT BUILD_LAZY_CUDA_LINALG) 520 if(USE_ROCM) 521 list(APPEND ATen_HIP_DEPENDENCY_LIBS torch::magma) 522 endif(USE_ROCM) 523 if(MSVC) 524 if($ENV{TH_BINARY_BUILD}) 525 # Do not do this on Linux: see Note [Extra MKL symbols for MAGMA in torch_cpu] 526 # in caffe2/CMakeLists.txt 527 list(APPEND ATen_CUDA_DEPENDENCY_LIBS 528 "${BLAS_LIBRARIES};${BLAS_LIBRARIES};${BLAS_LIBRARIES}") 529 endif($ENV{TH_BINARY_BUILD}) 530 endif(MSVC) 531 endif(USE_MAGMA) 532 533# Include CPU paths for CUDA/HIP as well 534list(APPEND ATen_CUDA_INCLUDE ${ATen_CPU_INCLUDE}) 535list(APPEND ATen_HIP_INCLUDE ${ATen_CPU_INCLUDE}) 536list(APPEND ATen_VULKAN_INCLUDE ${ATen_CPU_INCLUDE}) 537 538# We have two libraries: libATen_cpu.so and libATen_cuda.so, 539# with libATen_cuda.so depending on libATen_cpu.so. The CPU library 540# contains CPU code only. libATen_cpu.so is invariant to the setting 541# of USE_CUDA (it always builds the same way); libATen_cuda.so is only 542# built when USE_CUDA=1 and CUDA is available. (libATen_hip.so works 543# the same way as libATen_cuda.so) 544set(ATen_CPU_SRCS ${all_cpu_cpp}) 545list(APPEND ATen_CPU_DEPENDENCY_LIBS ATEN_CPU_FILES_GEN_LIB) 546 547if(USE_CUDA) 548 set(ATen_NVRTC_STUB_SRCS ${cuda_nvrtc_stub_cpp}) 549 list(APPEND ATen_CUDA_DEPENDENCY_LIBS ATEN_CUDA_FILES_GEN_LIB) 550endif() 551 552if(USE_MPS) 553 set(ATen_MPS_SRCS ${ATen_MPS_SRCS} ${mps_cpp} ${mps_mm} ${mps_h} ${native_mps_cpp} ${native_mps_mm} ${native_mps_h}) 554endif() 555 556if(USE_ROCM) 557 set(ATen_HIP_SRCS ${all_hip_cpp}) 558 # caffe2_nvrtc's stubs to driver APIs are useful for HIP. 559 # See NOTE [ ATen NVRTC Stub and HIP ] 560 set(ATen_NVRTC_STUB_SRCS ${hip_nvrtc_stub_cpp}) 561 # NB: Instead of adding it to this list, we add it by hand 562 # to caffe2_hip, because it needs to be a PRIVATE dependency 563 # list(APPEND ATen_HIP_DEPENDENCY_LIBS ATEN_CUDA_FILES_GEN_LIB) 564endif() 565 566set(ATEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${AT_INSTALL_INCLUDE_DIR}") 567configure_file(ATenConfig.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/ATenConfig.cmake") 568install(FILES "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/ATenConfig.cmake" 569 DESTINATION "${AT_INSTALL_SHARE_DIR}/cmake/ATen") 570 571set(INSTALL_HEADERS ${base_h} ${ATen_CORE_HEADERS} ${native_nested_h} ${ATen_TRANSFORMER_HEADERS}) 572if(NOT INTERN_BUILD_MOBILE) 573 list(APPEND INSTALL_HEADERS ${native_h} ${native_cpu_h} ${native_ao_sparse_h} ${native_quantized_h} ${cuda_h} ${native_cuda_h} ${native_hip_h} ${cudnn_h} ${hip_h} ${xpu_h} ${mps_h} ${native_mps_h} ${native_utils_h} ${miopen_h}) 574 # Metal 575 if(USE_PYTORCH_METAL_EXPORT) 576 # Add files needed from exporting metal models(optimized_for_mobile) 577 list(APPEND INSTALL_HEADERS ${metal_h} ${metal_prepack_h}) 578 elseif(APPLE AND USE_PYTORCH_METAL) 579 # Needed by Metal kernels 580 list(APPEND INSTALL_HEADERS ${metal_h} ${native_metal_h}) 581 else() 582 list(APPEND INSTALL_HEADERS ${metal_h}) 583 endif() 584else() 585 if(IOS AND USE_PYTORCH_METAL) 586 list(APPEND INSTALL_HEADERS ${metal_h} ${native_metal_h}) 587 else() 588 list(APPEND INSTALL_HEADERS ${metal_h} ${metal_prepack_h}) 589 endif() 590endif() 591 592# https://stackoverflow.com/questions/11096471/how-can-i-install-a-hierarchy-of-files-using-cmake 593foreach(HEADER ${INSTALL_HEADERS}) 594 string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "ATen/" HEADER_SUB ${HEADER}) 595 string(REPLACE "${Torch_SOURCE_DIR}/" "" HEADER_SUB ${HEADER_SUB}) 596 get_filename_component(DIR ${HEADER_SUB} DIRECTORY) 597 install(FILES ${HEADER} DESTINATION "${AT_INSTALL_INCLUDE_DIR}/${DIR}") 598endforeach() 599 600# TODO: Install hip_generated_headers when we have it 601foreach(HEADER ${generated_headers} ${cuda_generated_headers}) 602 # NB: Assumed to be flat 603 install(FILES ${HEADER} DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen) 604endforeach() 605 606message("AT_INSTALL_INCLUDE_DIR ${AT_INSTALL_INCLUDE_DIR}/ATen/core") 607foreach(HEADER ${core_generated_headers}) 608 message("core header install: ${HEADER}") 609 install(FILES ${HEADER} DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen/core) 610endforeach() 611 612install(FILES ${ops_h} ${ops_generated_headers} 613 DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen/ops) 614install(FILES ${CMAKE_BINARY_DIR}/aten/src/ATen/Declarations.yaml 615 DESTINATION ${AT_INSTALL_SHARE_DIR}/ATen) 616 617if(ATEN_NO_TEST) 618 message("disable test because ATEN_NO_TEST is set") 619elseif(BUILD_LITE_INTERPRETER) 620 message("disable aten test when BUILD_LITE_INTERPRETER is enabled") 621else() 622 add_subdirectory(test) 623endif() 624 625list(APPEND ATen_MOBILE_BENCHMARK_SRCS 626 ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/tensor_add.cpp) 627list(APPEND ATen_MOBILE_BENCHMARK_SRCS 628 ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/quantize_per_channel.cpp) 629list(APPEND ATen_MOBILE_BENCHMARK_SRCS 630 ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/stateful_conv1d.cpp) 631 632# Pass source, includes, and libs to parent 633set(ATen_CORE_SRCS ${ATen_CORE_SRCS} PARENT_SCOPE) 634set(ATen_CPU_SRCS ${ATen_CPU_SRCS} PARENT_SCOPE) 635set(ATen_XPU_SRCS ${ATen_XPU_SRCS} PARENT_SCOPE) 636set(ATen_CUDA_CU_SRCS ${ATen_CUDA_CU_SRCS} PARENT_SCOPE) 637set(ATen_CUDA_CPP_SRCS ${ATen_CUDA_CPP_SRCS} PARENT_SCOPE) 638set(ATen_CUDA_LINALG_SRCS ${ATen_CUDA_LINALG_SRCS} PARENT_SCOPE) 639set(ATen_CUDA_SRCS_W_SORT_BY_KEY ${ATen_CUDA_SRCS_W_SORT_BY_KEY} PARENT_SCOPE) 640set(ATen_CUDA_CU_SRCS_W_SORT_BY_KEY ${ATen_CUDA_CU_SRCS_W_SORT_BY_KEY} PARENT_SCOPE) 641set(ATen_NVRTC_STUB_SRCS ${ATen_NVRTC_STUB_SRCS} PARENT_SCOPE) 642set(ATen_HIP_SRCS ${ATen_HIP_SRCS} PARENT_SCOPE) 643set(ATen_MPS_SRCS ${ATen_MPS_SRCS} PARENT_SCOPE) 644set(ATen_XPU_SRCS ${ATen_XPU_SRCS} PARENT_SCOPE) 645set(ATen_QUANTIZED_SRCS ${ATen_QUANTIZED_SRCS} PARENT_SCOPE) 646set(ATen_CPU_TEST_SRCS ${ATen_CPU_TEST_SRCS} PARENT_SCOPE) 647set(ATen_CUDA_TEST_SRCS ${ATen_CUDA_TEST_SRCS} PARENT_SCOPE) 648set(ATen_XPU_TEST_SRCS ${ATen_XPU_TEST_SRCS} PARENT_SCOPE) 649set(ATen_CORE_TEST_SRCS ${ATen_CORE_TEST_SRCS} PARENT_SCOPE) 650set(ATen_HIP_TEST_SRCS ${ATen_HIP_TEST_SRCS} PARENT_SCOPE) 651set(ATen_VULKAN_TEST_SRCS ${ATen_VULKAN_TEST_SRCS} PARENT_SCOPE) 652set(ATen_MOBILE_BENCHMARK_SRCS ${ATen_MOBILE_BENCHMARK_SRCS} PARENT_SCOPE) 653set(ATen_MOBILE_TEST_SRCS ${ATen_MOBILE_TEST_SRCS} ${ATen_VULKAN_TEST_SRCS} PARENT_SCOPE) 654set(ATen_VEC_TEST_SRCS ${ATen_VEC_TEST_SRCS} PARENT_SCOPE) 655set(ATen_QUANTIZED_TEST_SRCS ${ATen_QUANTIZED_TEST_SRCS} PARENT_SCOPE) 656set(ATen_MPS_TEST_SRCS ${ATen_MPS_TEST_SRCS} PARENT_SCOPE) 657set(ATen_CPU_INCLUDE ${ATen_CPU_INCLUDE} PARENT_SCOPE) 658set(ATen_THIRD_PARTY_INCLUDE ${ATen_THIRD_PARTY_INCLUDE} PARENT_SCOPE) 659set(ATen_CUDA_INCLUDE ${ATen_CUDA_INCLUDE} PARENT_SCOPE) 660set(ATen_HIP_INCLUDE ${ATen_HIP_INCLUDE} PARENT_SCOPE) 661set(ATen_XPU_INCLUDE ${ATen_XPU_INCLUDE} PARENT_SCOPE) 662set(ATen_VULKAN_INCLUDE ${ATen_VULKAN_INCLUDE} PARENT_SCOPE) 663set(ATen_CPU_DEPENDENCY_LIBS ${ATen_CPU_DEPENDENCY_LIBS} PARENT_SCOPE) 664set(ATen_CUDA_DEPENDENCY_LIBS ${ATen_CUDA_DEPENDENCY_LIBS} PARENT_SCOPE) 665set(ATen_XPU_DEPENDENCY_LIBS ${ATen_XPU_DEPENDENCY_LIBS} PARENT_SCOPE) 666set(ATen_HIP_DEPENDENCY_LIBS ${ATen_HIP_DEPENDENCY_LIBS} PARENT_SCOPE) 667set(FLASH_ATTENTION_CUDA_SOURCES ${FLASH_ATTENTION_CUDA_SOURCES} PARENT_SCOPE) 668set(MEM_EFF_ATTENTION_CUDA_SOURCES ${MEM_EFF_ATTENTION_CUDA_SOURCES} PARENT_SCOPE) 669set(ATen_ATTENTION_KERNEL_SRCS ${ATen_ATTENTION_KERNEL_SRCS} PARENT_SCOPE) 670