xref: /aosp_15_r20/external/pytorch/aten/src/ATen/CMakeLists.txt (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
2set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH})
3
4if(NOT MSVC)
5  string(APPEND CMAKE_CXX_FLAGS " -Wno-ignored-qualifiers")
6  string(APPEND CMAKE_C_FLAGS " -Wno-ignored-qualifiers")
7  string(APPEND CMAKE_CXX_FLAGS " -Wno-absolute-value")
8  string(APPEND CMAKE_C_FLAGS " -Wno-absolute-value")
9endif(NOT MSVC)
10
11# Can be compiled standalone
12if(NOT AT_INSTALL_BIN_DIR OR NOT AT_INSTALL_LIB_DIR OR NOT AT_INSTALL_INCLUDE_DIR OR NOT AT_INSTALL_SHARE_DIR)
13  set(AT_INSTALL_BIN_DIR "bin" CACHE PATH "AT install binary subdirectory")
14  set(AT_INSTALL_LIB_DIR "lib" CACHE PATH "AT install library subdirectory")
15  set(AT_INSTALL_INCLUDE_DIR "include" CACHE PATH "AT install include subdirectory")
16  set(AT_INSTALL_SHARE_DIR "share" CACHE PATH "AT install include subdirectory")
17endif()
18
19# These flag are used in Config but set externally. We must normalize them to
20# 0/1 otherwise `#if ON` will be evaluated to false.
21macro(set_bool OUT IN)
22  if(${IN})
23    set(${OUT} 1)
24  else()
25    set(${OUT} 0)
26  endif()
27endmacro()
28
29set_bool(AT_BUILD_WITH_BLAS USE_BLAS)
30set_bool(AT_BUILD_WITH_LAPACK USE_LAPACK)
31set_bool(AT_BLAS_F2C BLAS_F2C)
32set_bool(AT_BLAS_USE_CBLAS_DOT BLAS_USE_CBLAS_DOT)
33set_bool(AT_MAGMA_ENABLED USE_MAGMA)
34set_bool(CAFFE2_STATIC_LINK_CUDA_INT CAFFE2_STATIC_LINK_CUDA)
35set_bool(AT_CUDNN_ENABLED CAFFE2_USE_CUDNN)
36set_bool(AT_CUSPARSELT_ENABLED CAFFE2_USE_CUSPARSELT)
37
38configure_file(Config.h.in "${CMAKE_CURRENT_SOURCE_DIR}/Config.h")
39# TODO: Do not generate CUDAConfig.h for ROCm BUILDS
40# At the moment, `jit_macors.h` include CUDAConfig.h for both CUDA and HIP builds
41if(USE_CUDA OR USE_ROCM)
42  configure_file(cuda/CUDAConfig.h.in "${CMAKE_CURRENT_SOURCE_DIR}/cuda/CUDAConfig.h")
43endif()
44if(USE_ROCM)
45  configure_file(hip/HIPConfig.h.in "${CMAKE_CURRENT_SOURCE_DIR}/hip/HIPConfig.h")
46endif()
47
48# NB: If you edit these globs, you'll have to update setup.py package_data as well
49file(GLOB_RECURSE ATen_CORE_HEADERS  "core/*.h")
50file(GLOB_RECURSE ATen_CORE_SRCS "core/*.cpp")
51file(GLOB_RECURSE ATen_TRANSFORMER_HEADERS "native/transformers/*.h")
52if(NOT BUILD_LITE_INTERPRETER)
53  file(GLOB_RECURSE ATen_CORE_TEST_SRCS "core/*_test.cpp")
54endif()
55EXCLUDE(ATen_CORE_SRCS "${ATen_CORE_SRCS}" ${ATen_CORE_TEST_SRCS})
56
57file(GLOB base_h "*.h" "detail/*.h" "cpu/*.h" "cpu/vec/vec512/*.h" "cpu/vec/vec256/*.h" "cpu/vec/vec256/vsx/*.h" "cpu/vec/vec256/zarch/*.h" "cpu/vec/*.h" "quantized/*.h" "functorch/*.h")
58file(GLOB base_cpp "*.cpp" "detail/*.cpp" "cpu/*.cpp" "functorch/*.cpp")
59file(GLOB cuda_h "cuda/*.h" "cuda/detail/*.h" "cuda/*.cuh" "cuda/detail/*.cuh" "cuda/tunable/*.cuh" "cuda/tunable/*.h")
60file(GLOB cuda_cpp "cuda/*.cpp" "cuda/detail/*.cpp" "cuda/tunable/*.cpp")
61file(GLOB cuda_nvrtc_stub_h "cuda/nvrtc_stub/*.h")
62file(GLOB cuda_nvrtc_stub_cpp "cuda/nvrtc_stub/*.cpp")
63file(GLOB cuda_cu "cuda/*.cu" "cuda/detail/*.cu" "cuda/tunable/*.cu")
64file(GLOB cudnn_h "cudnn/*.h" "cudnn/*.cuh")
65file(GLOB cudnn_cpp "cudnn/*.cpp")
66file(GLOB ops_h "ops/*.h")
67
68file(GLOB xpu_h "xpu/*.h" "xpu/detail/*.h")
69file(GLOB xpu_cpp "xpu/*.cpp" "xpu/detail/*.cpp")
70
71file(GLOB hip_h "hip/*.h" "hip/detail/*.h" "hip/*.cuh" "hip/detail/*.cuh" "hip/impl/*.h" "hip/tunable/*.cuh" "hip/tunable/*.h")
72file(GLOB hip_cpp "hip/*.cpp" "hip/detail/*.cpp" "hip/impl/*.cpp" "hip/tunable/*.cpp")
73list(REMOVE_ITEM hip_cpp "${CMAKE_CURRENT_SOURCE_DIR}/hip/detail/LazyNVRTC.cpp")
74file(GLOB hip_hip "hip/*.hip" "hip/detail/*.hip" "hip/impl/*.hip" "hip/tunable/*.hip")
75file(GLOB hip_nvrtc_stub_h "hip/nvrtc_stub/*.h")
76file(GLOB hip_nvrtc_stub_cpp "hip/nvrtc_stub/*.cpp")
77file(GLOB miopen_h "miopen/*.h")
78file(GLOB miopen_cpp "miopen/*.cpp")
79
80file(GLOB mkl_cpp "mkl/*.cpp")
81file(GLOB mkldnn_cpp "mkldnn/*.cpp")
82
83file(GLOB mkldnn_xpu_cpp "native/mkldnn/xpu/*.cpp" "native/mkldnn/xpu/detail/*.cpp")
84
85file(GLOB native_cpp "native/*.cpp")
86file(GLOB native_mkl_cpp "native/mkl/*.cpp")
87file(GLOB native_mkldnn_cpp "native/mkldnn/*.cpp")
88file(GLOB vulkan_cpp "vulkan/*.cpp")
89file(GLOB native_vulkan_cpp "native/vulkan/*.cpp" "native/vulkan/api/*.cpp" "native/vulkan/impl/*.cpp" "native/vulkan/ops/*.cpp")
90
91# Metal
92file(GLOB metal_h "metal/*.h")
93file(GLOB metal_cpp "metal/*.cpp")
94file(GLOB_RECURSE native_metal_h "native/metal/*.h")
95file(GLOB metal_test_srcs "native/metal/mpscnn/tests/*.mm")
96file(GLOB_RECURSE native_metal_srcs "native/metal/*.mm" "native/metal/*.cpp")
97EXCLUDE(native_metal_srcs "${native_metal_srcs}" ${metal_test_srcs})
98file(GLOB metal_prepack_h "native/metal/MetalPrepackOpContext.h")
99file(GLOB metal_prepack_cpp "native/metal/MetalPrepackOpRegister.cpp")
100
101file(GLOB native_ao_sparse_cpp
102            "native/ao_sparse/*.cpp"
103            "native/ao_sparse/cpu/*.cpp"
104            "native/ao_sparse/quantized/*.cpp"
105            "native/ao_sparse/quantized/cpu/*.cpp")
106# MPS
107file(GLOB mps_cpp "mps/*.cpp")
108file(GLOB mps_mm "mps/*.mm")
109file(GLOB mps_h "mps/*.h")
110file(GLOB_RECURSE native_mps_cpp "native/mps/*.cpp")
111file(GLOB_RECURSE native_mps_mm "native/mps/*.mm")
112file(GLOB_RECURSE native_mps_h "native/mps/*.h")
113
114file(GLOB native_sparse_cpp "native/sparse/*.cpp")
115file(GLOB native_quantized_cpp
116            "native/quantized/*.cpp"
117            "native/quantized/cpu/*.cpp")
118file(GLOB native_nested_cpp "native/nested/*.cpp")
119file(GLOB native_transformers_cpp "native/transformers/*.cpp")
120
121file(GLOB native_h "native/*.h")
122file(GLOB native_ao_sparse_h
123            "native/ao_sparse/*.h"
124            "native/ao_sparse/cpu/*.h"
125            "native/ao_sparse/quantized/*.h"
126            "native/ao_sparse/quantized/cpu/*.h")
127file(GLOB native_quantized_h "native/quantized/*.h" "native/quantized/cpu/*.h" "native/quantized/cudnn/*.h")
128file(GLOB native_cpu_h "native/cpu/*.h")
129file(GLOB native_utils_h "native/utils/*.h")
130
131file(GLOB native_cuda_cu "native/cuda/*.cu")
132file(GLOB native_cuda_cpp "native/cuda/*.cpp")
133file(GLOB native_cuda_h "native/cuda/*.h" "native/cuda/*.cuh")
134file(GLOB native_cuda_linalg_cpp "native/cuda/linalg/*.cpp")
135file(GLOB native_hip_h "native/hip/*.h" "native/hip/*.cuh")
136file(GLOB native_cudnn_cpp "native/cudnn/*.cpp")
137file(GLOB native_sparse_cuda_cu "native/sparse/cuda/*.cu")
138file(GLOB native_sparse_cuda_cpp "native/sparse/cuda/*.cpp")
139file(GLOB native_quantized_cuda_cu "native/quantized/cuda/*.cu")
140file(GLOB native_quantized_cuda_cpp "native/quantized/cuda/*.cpp")
141file(GLOB native_quantized_cudnn_cpp "native/quantized/cudnn/*.cpp")
142file(GLOB native_nested_h "native/nested/*.h")
143file(GLOB native_nested_cuda_cu "native/nested/cuda/*.cu")
144file(GLOB native_nested_cuda_cpp "native/nested/cuda/*.cpp")
145
146file(GLOB native_hip_hip "native/hip/*.hip")
147file(GLOB native_hip_cpp "native/hip/*.cpp")
148file(GLOB native_hip_linalg_cpp "native/hip/linalg/*.cpp")
149file(GLOB native_miopen_cpp "native/miopen/*.cpp")
150file(GLOB native_cudnn_hip_cpp "native/cudnn/hip/*.cpp")
151file(GLOB native_nested_hip_hip "native/nested/hip/*.hip")
152file(GLOB native_nested_hip_cpp "native/nested/hip/*.cpp")
153file(GLOB native_sparse_hip_hip "native/sparse/hip/*.hip")
154file(GLOB native_sparse_hip_cpp "native/sparse/hip/*.cpp")
155file(GLOB native_quantized_hip_hip "native/quantized/hip/*.hip")
156file(GLOB native_quantized_hip_cpp "native/quantized/hip/*.cpp")
157file(GLOB native_transformers_cuda_cu "native/transformers/cuda/*.cu")
158file(GLOB native_transformers_cuda_cpp "native/transformers/cuda/*.cpp")
159file(GLOB native_transformers_hip_hip "native/transformers/hip/*.hip")
160file(GLOB native_transformers_hip_cpp "native/transformers/hip/*.cpp")
161file(GLOB native_quantized_cudnn_hip_cpp "native/quantized/cudnn/hip/*.cpp")
162file(GLOB native_utils_cpp "native/utils/*.cpp")
163
164# flash_attention sources
165file(GLOB flash_attention_cuda_cu "native/transformers/cuda/flash_attn/*.cu")
166file(GLOB flash_attention_cuda_kernels_cu "native/transformers/cuda/flash_attn/kernels/*.cu")
167file(GLOB flash_attention_cuda_cpp "native/transformers/cuda/flash_attn/*.cpp")
168
169# flash_attention sources
170file(GLOB flash_attention_hip_hip "native/transformers/hip/flash_attn/*.hip")
171file(GLOB flash_attention_src_hip_hip "native/transformers/hip/flash_attn/src/*.hip")
172
173#Mem_eff attention sources
174file(GLOB mem_eff_attention_cuda_cu "native/transformers/cuda/mem_eff_attention/*.cu")
175file(GLOB mem_eff_attention_cuda_kernels_cu "native/transformers/cuda/mem_eff_attention/kernels/*.cu")
176file(GLOB mem_eff_attention_cuda_cpp "native/transformers/cuda/mem_eff_attention/*.cpp")
177
178if(USE_FLASH_ATTENTION)
179  list(APPEND native_transformers_cuda_cu ${flash_attention_cuda_cu})
180  list(APPEND native_transformers_cuda_cu ${flash_attention_cuda_kernels_cu})
181  list(APPEND native_transformers_cuda_cpp ${flash_attention_cuda_cpp})
182  list(APPEND FLASH_ATTENTION_CUDA_SOURCES ${flash_attention_cuda_cu} ${flash_attention_cuda_kernels_cu})
183  list(APPEND ATen_ATTENTION_KERNEL_SRCS ${flash_attention_cuda_kernels_cu})
184
185  list(APPEND native_transformers_hip_hip ${flash_attention_hip_hip})
186  list(APPEND native_transformers_src_hip_hip ${flash_attention_src_hip_hip})
187endif()
188
189if(USE_MEM_EFF_ATTENTION)
190  list(APPEND native_transformers_cuda_cu ${mem_eff_attention_cuda_cu})
191  list(APPEND native_transformers_cuda_cu ${mem_eff_attention_cuda_kernels_cu})
192  list(APPEND native_transformers_cuda_cpp ${mem_eff_attention_cuda_cpp})
193  list(APPEND MEM_EFF_ATTENTION_CUDA_SOURCES ${native_transformers_cuda_cu} ${mem_eff_attention_cuda_cu} ${mem_eff_attention_cuda_kernels_cu})
194  list(APPEND ATen_ATTENTION_KERNEL_SRCS ${mem_eff_attention_cuda_kernels_cu})
195endif()
196
197# XNNPACK
198file(GLOB native_xnnpack "native/xnnpack/*.cpp")
199
200# Add files needed from jit folders
201append_filelist("jit_core_headers" ATen_CORE_HEADERS)
202append_filelist("jit_core_sources" ATen_CORE_SRCS)
203
204add_subdirectory(quantized)
205add_subdirectory(nnapi)
206
207if(BUILD_LITE_INTERPRETER)
208  set(all_cpu_cpp ${generated_sources} ${core_generated_sources} ${cpu_kernel_cpp})
209  append_filelist("jit_core_sources" all_cpu_cpp)
210  append_filelist("aten_cpu_source_non_codegen_list" all_cpu_cpp)
211  append_filelist("aten_native_source_non_codegen_list" all_cpu_cpp)
212else()
213  set(
214    all_cpu_cpp ${base_cpp} ${ATen_CORE_SRCS} ${native_cpp}
215    ${native_ao_sparse_cpp} ${native_sparse_cpp} ${native_nested_cpp}
216    ${native_quantized_cpp} ${native_mkl_cpp} ${native_mkldnn_cpp}
217    ${native_transformers_cpp}
218    ${native_utils_cpp} ${native_xnnpack} ${generated_sources} ${core_generated_sources}
219    ${ATen_CPU_SRCS} ${ATen_QUANTIZED_SRCS} ${ATen_NNAPI_SRCS} ${cpu_kernel_cpp}
220  )
221endif()
222
223if(USE_LIGHTWEIGHT_DISPATCH)
224  set(all_cpu_cpp ${all_cpu_cpp} ${generated_unboxing_sources})
225endif()
226if(AT_MKL_ENABLED)
227  set(all_cpu_cpp ${all_cpu_cpp} ${mkl_cpp})
228endif()
229if(AT_MKLDNN_ENABLED)
230  set(all_cpu_cpp ${all_cpu_cpp} ${mkldnn_cpp})
231endif()
232if(USE_VULKAN)
233  set(all_cpu_cpp ${all_cpu_cpp} ${vulkan_cpp} ${native_vulkan_cpp} ${vulkan_generated_cpp})
234else()
235  set(all_cpu_cpp ${all_cpu_cpp} ${vulkan_cpp})
236endif()
237
238if(USE_XPU)
239  list(APPEND ATen_XPU_SRCS ${mkldnn_xpu_cpp})
240  list(APPEND ATen_XPU_DEPENDENCY_LIBS xpu_mkldnn)
241
242  list(APPEND ATen_XPU_DEPENDENCY_LIBS ${OCL_LIBRARY})
243  list(APPEND ATen_XPU_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/native/mkldnn/xpu)
244  list(APPEND ATen_XPU_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/native/mkldnn/xpu/detail)
245  list(APPEND ATen_XPU_INCLUDE ${PROJECT_SOURCE_DIR}/third_party/ideep/mkl-dnn/include)
246  list(APPEND ATen_XPU_INCLUDE ${XPU_MKLDNN_INCLUDE})
247
248  list(APPEND ATen_XPU_INCLUDE ${SYCL_INCLUDE_DIR})
249  list(APPEND ATen_XPU_DEPENDENCY_LIBS ${SYCL_LIBRARY})
250endif()
251
252# Metal
253if(USE_PYTORCH_METAL_EXPORT)
254  # Add files needed from exporting metal models(optimized_for_mobile)
255  set(all_cpu_cpp ${all_cpu_cpp} ${metal_cpp} ${metal_prepack_cpp})
256elseif(APPLE AND USE_PYTORCH_METAL)
257  # Compile Metal kernels
258  set(all_cpu_cpp ${all_cpu_cpp} ${metal_cpp} ${native_metal_srcs})
259else()
260  set(all_cpu_cpp ${all_cpu_cpp} ${metal_cpp})
261endif()
262
263if(USE_CUDA AND USE_ROCM)
264  message(FATAL_ERROR "ATen doesn't not currently support simultaneously building with CUDA and ROCM")
265endif()
266
267if(USE_CUDA)
268  list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/cuda)
269  list(APPEND ATen_CUDA_CU_SRCS
270    ${cuda_cu}
271    ${native_cuda_cu}
272    ${native_nested_cuda_cu}
273    ${native_sparse_cuda_cu}
274    ${native_quantized_cuda_cu}
275    ${native_transformers_cuda_cu}
276    ${cuda_generated_sources}
277  )
278  list(APPEND ATen_CUDA_CPP_SRCS
279    ${cuda_cpp}
280    ${native_cuda_cpp}
281    ${native_cudnn_cpp}
282    ${native_miopen_cpp}
283    ${native_nested_cuda_cpp}
284    ${native_quantized_cuda_cpp}
285    ${native_quantized_cudnn_cpp}
286    ${native_sparse_cuda_cpp}
287    ${native_transformers_cuda_cpp}
288  )
289  set(ATen_CUDA_LINALG_SRCS ${native_cuda_linalg_cpp})
290  if(NOT BUILD_LAZY_CUDA_LINALG)
291    list(APPEND ATen_CUDA_CU_SRCS ${native_cuda_linalg_cpp})
292  endif()
293  if(CAFFE2_USE_CUDNN)
294    list(APPEND ATen_CUDA_CPP_SRCS ${cudnn_cpp})
295  endif()
296
297  append_filelist("aten_cuda_cu_source_list" ATen_CUDA_CU_SRCS)
298  append_filelist("aten_cuda_with_sort_by_key_source_list" ATen_CUDA_SRCS_W_SORT_BY_KEY)
299  append_filelist("aten_cuda_cu_with_sort_by_key_source_list" ATen_CUDA_CU_SRCS_W_SORT_BY_KEY)
300
301  exclude(ATen_CUDA_CPP_SRCS "${ATen_CUDA_CPP_SRCS}"
302      ${ATen_CUDA_CU_SRCS}
303      ${ATen_CUDA_SRCS_W_SORT_BY_KEY} ${ATen_CUDA_CU_SRCS_W_SORT_BY_KEY})
304  exclude(ATen_CUDA_CU_SRCS "${ATen_CUDA_CU_SRCS}"
305      ${ATen_CUDA_SRCS_W_SORT_BY_KEY} ${ATen_CUDA_CU_SRCS_W_SORT_BY_KEY})
306endif()
307
308if(USE_ROCM)
309  list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/hip)
310  list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include)
311  list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include)
312  list(APPEND ATen_HIP_SRCS
313    ${ATen_HIP_SRCS}
314    ${hip_hip}
315    ${native_hip_hip}
316    ${native_nested_hip_hip}
317    ${native_sparse_hip_hip}
318    ${native_quantized_hip_hip}
319    ${native_transformers_hip_hip} ${native_transformers_src_hip_hip}
320  )
321  # TODO: Codegen separate files for HIP and use those (s/cuda_generated_sources/hip_generated_sources)
322  list(APPEND all_hip_cpp
323    ${native_nested_hip_cpp}
324    ${native_sparse_hip_cpp}
325    ${native_quantized_hip_cpp}
326    ${native_transformers_hip_cpp}
327    ${native_quantized_cudnn_hip_cpp}
328    ${hip_cpp}
329    ${native_hip_cpp}
330    ${native_hip_linalg_cpp}
331    ${cuda_generated_sources}
332    ${ATen_HIP_SRCS}
333    ${native_miopen_cpp}
334    ${native_cudnn_hip_cpp}
335    ${miopen_cpp}
336    ${all_hip_cpp}
337  )
338endif()
339
340if(USE_XPU)
341  list(APPEND ATen_XPU_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/xpu)
342  list(APPEND ATen_XPU_SRCS ${xpu_cpp})
343endif()
344
345list(APPEND ATen_CPU_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/..)
346
347if(BLAS_FOUND)
348  if($ENV{TH_BINARY_BUILD})
349    message(STATUS "TH_BINARY_BUILD detected. Enabling special linkage.")
350    list(APPEND ATen_CPU_DEPENDENCY_LIBS
351      "${BLAS_LIBRARIES};${BLAS_LIBRARIES};${BLAS_LIBRARIES}")
352  else($ENV{TH_BINARY_BUILD})
353    list(APPEND ATen_CPU_DEPENDENCY_LIBS ${BLAS_LIBRARIES})
354  endif($ENV{TH_BINARY_BUILD})
355endif(BLAS_FOUND)
356
357if(LAPACK_FOUND)
358  list(APPEND ATen_CPU_DEPENDENCY_LIBS ${LAPACK_LIBRARIES})
359  if(USE_CUDA AND MSVC)
360    # Although Lapack provides CPU (and thus, one might expect that ATen_cuda
361    # would not need this at all), some of our libraries (magma in particular)
362    # backend to CPU BLAS/LAPACK implementations, and so it is very important
363    # we get the *right* implementation, because even if the symbols are the
364    # same, LAPACK implementions may have different calling conventions.
365    # This caused https://github.com/pytorch/pytorch/issues/7353
366    #
367    # We do NOT do this on Linux, since we just rely on torch_cpu to
368    # provide all of the symbols we need
369    list(APPEND ATen_CUDA_DEPENDENCY_LIBS ${LAPACK_LIBRARIES})
370  endif()
371endif(LAPACK_FOUND)
372
373if(UNIX AND NOT APPLE)
374   include(CheckLibraryExists)
375   # https://github.com/libgit2/libgit2/issues/2128#issuecomment-35649830
376   CHECK_LIBRARY_EXISTS(rt clock_gettime "time.h" NEED_LIBRT)
377   if(NEED_LIBRT)
378     list(APPEND ATen_CPU_DEPENDENCY_LIBS rt)
379     set(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES} rt)
380   endif(NEED_LIBRT)
381endif(UNIX AND NOT APPLE)
382
383if(UNIX)
384  include(CheckFunctionExists)
385  set(CMAKE_EXTRA_INCLUDE_FILES "sys/mman.h")
386  CHECK_FUNCTION_EXISTS(mmap HAVE_MMAP)
387  if(HAVE_MMAP)
388    add_definitions(-DHAVE_MMAP=1)
389  endif(HAVE_MMAP)
390  # done for lseek: https://www.gnu.org/software/libc/manual/html_node/File-Position-Primitive.html
391  add_definitions(-D_FILE_OFFSET_BITS=64)
392  CHECK_FUNCTION_EXISTS(shm_open HAVE_SHM_OPEN)
393  if(HAVE_SHM_OPEN)
394    add_definitions(-DHAVE_SHM_OPEN=1)
395  endif(HAVE_SHM_OPEN)
396  CHECK_FUNCTION_EXISTS(shm_unlink HAVE_SHM_UNLINK)
397  if(HAVE_SHM_UNLINK)
398    add_definitions(-DHAVE_SHM_UNLINK=1)
399  endif(HAVE_SHM_UNLINK)
400  CHECK_FUNCTION_EXISTS(malloc_usable_size HAVE_MALLOC_USABLE_SIZE)
401  if(HAVE_MALLOC_USABLE_SIZE)
402    add_definitions(-DHAVE_MALLOC_USABLE_SIZE=1)
403  endif(HAVE_MALLOC_USABLE_SIZE)
404endif(UNIX)
405
406ADD_DEFINITIONS(-DUSE_EXTERNAL_MZCRC)
407
408if(NOT MSVC)
409  list(APPEND ATen_CPU_DEPENDENCY_LIBS m)
410endif()
411
412if(AT_NNPACK_ENABLED)
413  include_directories(${NNPACK_INCLUDE_DIRS})
414  list(APPEND ATen_CPU_DEPENDENCY_LIBS nnpack) # cpuinfo is added below
415endif()
416
417if(MKLDNN_FOUND)
418  list(APPEND ATen_CPU_DEPENDENCY_LIBS ${MKLDNN_LIBRARIES})
419endif(MKLDNN_FOUND)
420
421if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(s390x|ppc64le)$")
422  list(APPEND ATen_CPU_DEPENDENCY_LIBS cpuinfo)
423endif()
424
425if(NOT EMSCRIPTEN AND NOT INTERN_BUILD_MOBILE)
426  if(NOT MSVC)
427    # Bump up optimization level for sleef to -O1, since at -O0 the compiler
428    # excessively spills intermediate vector registers to the stack
429    # and makes things run impossibly slowly
430    set(OLD_CMAKE_C_FLAGS_DEBUG ${CMAKE_C_FLAGS_DEBUG})
431    if(${CMAKE_C_FLAGS_DEBUG} MATCHES "-O0")
432      string(REGEX REPLACE "-O0" "-O1" CMAKE_C_FLAGS_DEBUG ${OLD_CMAKE_C_FLAGS_DEBUG})
433    else()
434      set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O1")
435    endif()
436  endif()
437
438  if(NOT USE_SYSTEM_SLEEF)
439    set(SLEEF_BUILD_SHARED_LIBS OFF CACHE BOOL "Build sleef static" FORCE)
440    set(SLEEF_BUILD_DFT OFF CACHE BOOL "Don't build sleef DFT lib" FORCE)
441    set(SLEEF_BUILD_GNUABI_LIBS OFF CACHE BOOL "Don't build sleef gnuabi libs" FORCE)
442    set(SLEEF_BUILD_TESTS OFF CACHE BOOL "Don't build sleef tests" FORCE)
443    set(SLEEF_BUILD_SCALAR_LIB OFF CACHE BOOL "libsleefscalar will be built." FORCE)
444    if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
445      if(CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64" OR CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
446        set(DISABLE_SVE ON CACHE BOOL "Xcode's clang-12.5 crashes while trying to compile SVE code" FORCE)
447      endif()
448    endif()
449    add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/sleef" ${CMAKE_BINARY_DIR}/sleef)
450    set_property(TARGET sleef PROPERTY FOLDER "dependencies")
451    list(APPEND ATen_THIRD_PARTY_INCLUDE ${CMAKE_BINARY_DIR}/include)
452    link_directories(${CMAKE_BINARY_DIR}/sleef/lib)
453  else()
454    add_library(sleef SHARED IMPORTED)
455    find_library(SLEEF_LIBRARY sleef)
456    if(NOT SLEEF_LIBRARY)
457      message(FATAL_ERROR "Cannot find sleef")
458    endif()
459    message("Found sleef: ${SLEEF_LIBRARY}")
460    set_target_properties(sleef PROPERTIES IMPORTED_LOCATION "${SLEEF_LIBRARY}")
461  endif()
462  list(APPEND ATen_CPU_DEPENDENCY_LIBS sleef)
463
464  if(NOT MSVC)
465    set(CMAKE_C_FLAGS_DEBUG ${OLD_CMAKE_C_FLAGS_DEBUG})
466  endif()
467endif()
468
469if(USE_CUDA AND NOT USE_ROCM)
470  list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/include)
471  list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/tools/util/include)
472  if($ENV{ATEN_STATIC_CUDA})
473    list(APPEND ATen_CUDA_DEPENDENCY_LIBS
474      ${CUDA_LIBRARIES}
475      CUDA::cusparse_static
476      CUDA::cufft_static_nocallback
477    )
478   if(NOT BUILD_LAZY_CUDA_LINALG)
479     if(CUDA_VERSION_MAJOR LESS_EQUAL 11)
480       list(APPEND ATen_CUDA_DEPENDENCY_LIBS
481         CUDA::cusolver_static
482         ${CUDAToolkit_LIBRARY_DIR}/liblapack_static.a     # needed for libcusolver_static
483       )
484     elseif(CUDA_VERSION_MAJOR GREATER_EQUAL 12)
485       list(APPEND ATen_CUDA_DEPENDENCY_LIBS
486         CUDA::cusolver_static
487         ${CUDAToolkit_LIBRARY_DIR}/libcusolver_lapack_static.a     # needed for libcusolver_static
488       )
489     endif()
490   endif()
491  else()
492    list(APPEND ATen_CUDA_DEPENDENCY_LIBS
493      ${CUDA_LIBRARIES}
494      CUDA::cusparse
495      CUDA::cufft
496    )
497   if(NOT BUILD_LAZY_CUDA_LINALG)
498     list(APPEND ATen_CUDA_DEPENDENCY_LIBS
499       CUDA::cusolver
500     )
501   endif()
502  endif()
503
504  if(CAFFE2_USE_CUDNN)
505    list(APPEND ATen_CUDA_DEPENDENCY_LIBS ${CUDNN_LIBRARIES})
506  endif()
507  if($ENV{ATEN_STATIC_CUDA})
508    list(APPEND ATen_CUDA_DEPENDENCY_LIBS
509      CUDA::culibos
510      CUDA::cudart_static
511    )
512  endif($ENV{ATEN_STATIC_CUDA})
513endif()
514
515
516  if(USE_MAGMA)
517    if(USE_CUDA AND NOT BUILD_LAZY_CUDA_LINALG)
518      list(APPEND ATen_CUDA_DEPENDENCY_LIBS torch::magma)
519    endif(USE_CUDA AND NOT BUILD_LAZY_CUDA_LINALG)
520    if(USE_ROCM)
521      list(APPEND ATen_HIP_DEPENDENCY_LIBS torch::magma)
522    endif(USE_ROCM)
523    if(MSVC)
524      if($ENV{TH_BINARY_BUILD})
525        # Do not do this on Linux: see Note [Extra MKL symbols for MAGMA in torch_cpu]
526        # in caffe2/CMakeLists.txt
527        list(APPEND ATen_CUDA_DEPENDENCY_LIBS
528          "${BLAS_LIBRARIES};${BLAS_LIBRARIES};${BLAS_LIBRARIES}")
529      endif($ENV{TH_BINARY_BUILD})
530    endif(MSVC)
531  endif(USE_MAGMA)
532
533# Include CPU paths for CUDA/HIP as well
534list(APPEND ATen_CUDA_INCLUDE ${ATen_CPU_INCLUDE})
535list(APPEND ATen_HIP_INCLUDE ${ATen_CPU_INCLUDE})
536list(APPEND ATen_VULKAN_INCLUDE ${ATen_CPU_INCLUDE})
537
538# We have two libraries: libATen_cpu.so and libATen_cuda.so,
539# with libATen_cuda.so depending on libATen_cpu.so.  The CPU library
540# contains CPU code only.  libATen_cpu.so is invariant to the setting
541# of USE_CUDA (it always builds the same way); libATen_cuda.so is only
542# built when USE_CUDA=1 and CUDA is available.  (libATen_hip.so works
543# the same way as libATen_cuda.so)
544set(ATen_CPU_SRCS ${all_cpu_cpp})
545list(APPEND ATen_CPU_DEPENDENCY_LIBS ATEN_CPU_FILES_GEN_LIB)
546
547if(USE_CUDA)
548  set(ATen_NVRTC_STUB_SRCS ${cuda_nvrtc_stub_cpp})
549  list(APPEND ATen_CUDA_DEPENDENCY_LIBS ATEN_CUDA_FILES_GEN_LIB)
550endif()
551
552if(USE_MPS)
553    set(ATen_MPS_SRCS ${ATen_MPS_SRCS} ${mps_cpp} ${mps_mm} ${mps_h} ${native_mps_cpp} ${native_mps_mm} ${native_mps_h})
554endif()
555
556if(USE_ROCM)
557  set(ATen_HIP_SRCS ${all_hip_cpp})
558  # caffe2_nvrtc's stubs to driver APIs are useful for HIP.
559  # See NOTE [ ATen NVRTC Stub and HIP ]
560  set(ATen_NVRTC_STUB_SRCS ${hip_nvrtc_stub_cpp})
561  # NB: Instead of adding it to this list, we add it by hand
562  # to caffe2_hip, because it needs to be a PRIVATE dependency
563  # list(APPEND ATen_HIP_DEPENDENCY_LIBS ATEN_CUDA_FILES_GEN_LIB)
564endif()
565
566set(ATEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${AT_INSTALL_INCLUDE_DIR}")
567configure_file(ATenConfig.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/ATenConfig.cmake")
568install(FILES "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/ATenConfig.cmake"
569  DESTINATION "${AT_INSTALL_SHARE_DIR}/cmake/ATen")
570
571set(INSTALL_HEADERS ${base_h} ${ATen_CORE_HEADERS} ${native_nested_h} ${ATen_TRANSFORMER_HEADERS})
572if(NOT INTERN_BUILD_MOBILE)
573  list(APPEND INSTALL_HEADERS ${native_h} ${native_cpu_h} ${native_ao_sparse_h} ${native_quantized_h} ${cuda_h} ${native_cuda_h} ${native_hip_h} ${cudnn_h} ${hip_h} ${xpu_h} ${mps_h} ${native_mps_h} ${native_utils_h} ${miopen_h})
574  # Metal
575  if(USE_PYTORCH_METAL_EXPORT)
576    # Add files needed from exporting metal models(optimized_for_mobile)
577    list(APPEND INSTALL_HEADERS ${metal_h} ${metal_prepack_h})
578  elseif(APPLE AND USE_PYTORCH_METAL)
579    # Needed by Metal kernels
580    list(APPEND INSTALL_HEADERS ${metal_h} ${native_metal_h})
581  else()
582    list(APPEND INSTALL_HEADERS ${metal_h})
583  endif()
584else()
585  if(IOS AND USE_PYTORCH_METAL)
586      list(APPEND INSTALL_HEADERS ${metal_h} ${native_metal_h})
587  else()
588      list(APPEND INSTALL_HEADERS ${metal_h} ${metal_prepack_h})
589  endif()
590endif()
591
592# https://stackoverflow.com/questions/11096471/how-can-i-install-a-hierarchy-of-files-using-cmake
593foreach(HEADER  ${INSTALL_HEADERS})
594  string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "ATen/" HEADER_SUB ${HEADER})
595  string(REPLACE "${Torch_SOURCE_DIR}/" "" HEADER_SUB ${HEADER_SUB})
596  get_filename_component(DIR ${HEADER_SUB} DIRECTORY)
597  install(FILES ${HEADER} DESTINATION "${AT_INSTALL_INCLUDE_DIR}/${DIR}")
598endforeach()
599
600# TODO: Install hip_generated_headers when we have it
601foreach(HEADER ${generated_headers} ${cuda_generated_headers})
602  # NB: Assumed to be flat
603  install(FILES ${HEADER} DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen)
604endforeach()
605
606message("AT_INSTALL_INCLUDE_DIR ${AT_INSTALL_INCLUDE_DIR}/ATen/core")
607foreach(HEADER ${core_generated_headers})
608  message("core header install: ${HEADER}")
609  install(FILES ${HEADER} DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen/core)
610endforeach()
611
612install(FILES ${ops_h} ${ops_generated_headers}
613  DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen/ops)
614install(FILES ${CMAKE_BINARY_DIR}/aten/src/ATen/Declarations.yaml
615  DESTINATION ${AT_INSTALL_SHARE_DIR}/ATen)
616
617if(ATEN_NO_TEST)
618  message("disable test because ATEN_NO_TEST is set")
619elseif(BUILD_LITE_INTERPRETER)
620  message("disable aten test when BUILD_LITE_INTERPRETER is enabled")
621else()
622  add_subdirectory(test)
623endif()
624
625list(APPEND ATen_MOBILE_BENCHMARK_SRCS
626  ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/tensor_add.cpp)
627list(APPEND ATen_MOBILE_BENCHMARK_SRCS
628  ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/quantize_per_channel.cpp)
629list(APPEND ATen_MOBILE_BENCHMARK_SRCS
630  ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/stateful_conv1d.cpp)
631
632# Pass source, includes, and libs to parent
633set(ATen_CORE_SRCS ${ATen_CORE_SRCS} PARENT_SCOPE)
634set(ATen_CPU_SRCS ${ATen_CPU_SRCS} PARENT_SCOPE)
635set(ATen_XPU_SRCS ${ATen_XPU_SRCS} PARENT_SCOPE)
636set(ATen_CUDA_CU_SRCS ${ATen_CUDA_CU_SRCS} PARENT_SCOPE)
637set(ATen_CUDA_CPP_SRCS ${ATen_CUDA_CPP_SRCS} PARENT_SCOPE)
638set(ATen_CUDA_LINALG_SRCS ${ATen_CUDA_LINALG_SRCS} PARENT_SCOPE)
639set(ATen_CUDA_SRCS_W_SORT_BY_KEY ${ATen_CUDA_SRCS_W_SORT_BY_KEY} PARENT_SCOPE)
640set(ATen_CUDA_CU_SRCS_W_SORT_BY_KEY ${ATen_CUDA_CU_SRCS_W_SORT_BY_KEY} PARENT_SCOPE)
641set(ATen_NVRTC_STUB_SRCS ${ATen_NVRTC_STUB_SRCS} PARENT_SCOPE)
642set(ATen_HIP_SRCS ${ATen_HIP_SRCS} PARENT_SCOPE)
643set(ATen_MPS_SRCS ${ATen_MPS_SRCS} PARENT_SCOPE)
644set(ATen_XPU_SRCS ${ATen_XPU_SRCS} PARENT_SCOPE)
645set(ATen_QUANTIZED_SRCS ${ATen_QUANTIZED_SRCS} PARENT_SCOPE)
646set(ATen_CPU_TEST_SRCS ${ATen_CPU_TEST_SRCS} PARENT_SCOPE)
647set(ATen_CUDA_TEST_SRCS ${ATen_CUDA_TEST_SRCS} PARENT_SCOPE)
648set(ATen_XPU_TEST_SRCS ${ATen_XPU_TEST_SRCS} PARENT_SCOPE)
649set(ATen_CORE_TEST_SRCS ${ATen_CORE_TEST_SRCS} PARENT_SCOPE)
650set(ATen_HIP_TEST_SRCS ${ATen_HIP_TEST_SRCS} PARENT_SCOPE)
651set(ATen_VULKAN_TEST_SRCS ${ATen_VULKAN_TEST_SRCS} PARENT_SCOPE)
652set(ATen_MOBILE_BENCHMARK_SRCS ${ATen_MOBILE_BENCHMARK_SRCS} PARENT_SCOPE)
653set(ATen_MOBILE_TEST_SRCS ${ATen_MOBILE_TEST_SRCS} ${ATen_VULKAN_TEST_SRCS} PARENT_SCOPE)
654set(ATen_VEC_TEST_SRCS  ${ATen_VEC_TEST_SRCS} PARENT_SCOPE)
655set(ATen_QUANTIZED_TEST_SRCS ${ATen_QUANTIZED_TEST_SRCS} PARENT_SCOPE)
656set(ATen_MPS_TEST_SRCS ${ATen_MPS_TEST_SRCS} PARENT_SCOPE)
657set(ATen_CPU_INCLUDE ${ATen_CPU_INCLUDE} PARENT_SCOPE)
658set(ATen_THIRD_PARTY_INCLUDE ${ATen_THIRD_PARTY_INCLUDE} PARENT_SCOPE)
659set(ATen_CUDA_INCLUDE ${ATen_CUDA_INCLUDE} PARENT_SCOPE)
660set(ATen_HIP_INCLUDE ${ATen_HIP_INCLUDE} PARENT_SCOPE)
661set(ATen_XPU_INCLUDE ${ATen_XPU_INCLUDE} PARENT_SCOPE)
662set(ATen_VULKAN_INCLUDE ${ATen_VULKAN_INCLUDE} PARENT_SCOPE)
663set(ATen_CPU_DEPENDENCY_LIBS ${ATen_CPU_DEPENDENCY_LIBS} PARENT_SCOPE)
664set(ATen_CUDA_DEPENDENCY_LIBS ${ATen_CUDA_DEPENDENCY_LIBS} PARENT_SCOPE)
665set(ATen_XPU_DEPENDENCY_LIBS ${ATen_XPU_DEPENDENCY_LIBS} PARENT_SCOPE)
666set(ATen_HIP_DEPENDENCY_LIBS ${ATen_HIP_DEPENDENCY_LIBS} PARENT_SCOPE)
667set(FLASH_ATTENTION_CUDA_SOURCES ${FLASH_ATTENTION_CUDA_SOURCES} PARENT_SCOPE)
668set(MEM_EFF_ATTENTION_CUDA_SOURCES ${MEM_EFF_ATTENTION_CUDA_SOURCES} PARENT_SCOPE)
669set(ATen_ATTENTION_KERNEL_SRCS ${ATen_ATTENTION_KERNEL_SRCS} PARENT_SCOPE)
670