1# ---[ cuda 2 3# Poor man's include guard 4if(TARGET torch::cudart) 5 return() 6endif() 7 8# sccache is only supported in CMake master and not in the newest official 9# release (3.11.3) yet. Hence we need our own Modules_CUDA_fix to enable sccache. 10list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/../Modules_CUDA_fix) 11 12# We don't want to statically link cudart, because we rely on it's dynamic linkage in 13# python (follow along torch/cuda/__init__.py and usage of cudaGetErrorName). 14# Technically, we can link cudart here statically, and link libtorch_python.so 15# to a dynamic libcudart.so, but that's just wasteful. 16# However, on Windows, if this one gets switched off, the error "cuda: unknown error" 17# will be raised when running the following code: 18# >>> import torch 19# >>> torch.cuda.is_available() 20# >>> torch.cuda.current_device() 21# More details can be found in the following links. 22# https://github.com/pytorch/pytorch/issues/20635 23# https://github.com/pytorch/pytorch/issues/17108 24if(NOT MSVC) 25 set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE INTERNAL "") 26endif() 27 28# Find CUDA. 29find_package(CUDA) 30if(NOT CUDA_FOUND) 31 message(WARNING 32 "Caffe2: CUDA cannot be found. Depending on whether you are building " 33 "Caffe2 or a Caffe2 dependent library, the next warning / error will " 34 "give you more info.") 35 set(CAFFE2_USE_CUDA OFF) 36 return() 37endif() 38 39# Enable CUDA language support 40set(CUDAToolkit_ROOT "${CUDA_TOOLKIT_ROOT_DIR}") 41# Pass clang as host compiler, which according to the docs 42# Must be done before CUDA language is enabled, see 43# https://cmake.org/cmake/help/v3.15/variable/CMAKE_CUDA_HOST_COMPILER.html 44if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") 45 set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_C_COMPILER}") 46endif() 47enable_language(CUDA) 48if("X${CMAKE_CUDA_STANDARD}" STREQUAL "X" ) 49 set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD}) 50endif() 51set(CMAKE_CUDA_STANDARD_REQUIRED ON) 52 53# CMP0074 - find_package will respect <PackageName>_ROOT variables 54cmake_policy(PUSH) 55if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.12.0) 56 cmake_policy(SET CMP0074 NEW) 57endif() 58 59find_package(CUDAToolkit REQUIRED) 60 61cmake_policy(POP) 62 63if(NOT CMAKE_CUDA_COMPILER_VERSION VERSION_EQUAL CUDAToolkit_VERSION) 64 message(FATAL_ERROR "Found two conflicting CUDA versions:\n" 65 "V${CMAKE_CUDA_COMPILER_VERSION} in '${CUDA_INCLUDE_DIRS}' and\n" 66 "V${CUDAToolkit_VERSION} in '${CUDAToolkit_INCLUDE_DIRS}'") 67endif() 68 69message(STATUS "Caffe2: CUDA detected: " ${CUDA_VERSION}) 70message(STATUS "Caffe2: CUDA nvcc is: " ${CUDA_NVCC_EXECUTABLE}) 71message(STATUS "Caffe2: CUDA toolkit directory: " ${CUDA_TOOLKIT_ROOT_DIR}) 72if(CUDA_VERSION VERSION_LESS 11.0) 73 message(FATAL_ERROR "PyTorch requires CUDA 11.0 or above.") 74endif() 75 76if(CUDA_FOUND) 77 # Sometimes, we may mismatch nvcc with the CUDA headers we are 78 # compiling with, e.g., if a ccache nvcc is fed to us by CUDA_NVCC_EXECUTABLE 79 # but the PATH is not consistent with CUDA_HOME. It's better safe 80 # than sorry: make sure everything is consistent. 81 if(MSVC AND CMAKE_GENERATOR MATCHES "Visual Studio") 82 # When using Visual Studio, it attempts to lock the whole binary dir when 83 # `try_run` is called, which will cause the build to fail. 84 string(RANDOM BUILD_SUFFIX) 85 set(PROJECT_RANDOM_BINARY_DIR "${PROJECT_BINARY_DIR}/${BUILD_SUFFIX}") 86 else() 87 set(PROJECT_RANDOM_BINARY_DIR "${PROJECT_BINARY_DIR}") 88 endif() 89 set(file "${PROJECT_BINARY_DIR}/detect_cuda_version.cc") 90 file(WRITE ${file} "" 91 "#include <cuda.h>\n" 92 "#include <cstdio>\n" 93 "int main() {\n" 94 " printf(\"%d.%d\", CUDA_VERSION / 1000, (CUDA_VERSION / 10) % 100);\n" 95 " return 0;\n" 96 "}\n" 97 ) 98 if(NOT CMAKE_CROSSCOMPILING) 99 try_run(run_result compile_result ${PROJECT_RANDOM_BINARY_DIR} ${file} 100 CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}" 101 LINK_LIBRARIES ${CUDA_LIBRARIES} 102 RUN_OUTPUT_VARIABLE cuda_version_from_header 103 COMPILE_OUTPUT_VARIABLE output_var 104 ) 105 if(NOT compile_result) 106 message(FATAL_ERROR "Caffe2: Couldn't determine version from header: " ${output_var}) 107 endif() 108 message(STATUS "Caffe2: Header version is: " ${cuda_version_from_header}) 109 if(NOT cuda_version_from_header STREQUAL ${CUDA_VERSION_STRING}) 110 # Force CUDA to be processed for again next time 111 # TODO: I'm not sure if this counts as an implementation detail of 112 # FindCUDA 113 set(${cuda_version_from_findcuda} ${CUDA_VERSION_STRING}) 114 unset(CUDA_TOOLKIT_ROOT_DIR_INTERNAL CACHE) 115 # Not strictly necessary, but for good luck. 116 unset(CUDA_VERSION CACHE) 117 # Error out 118 message(FATAL_ERROR "FindCUDA says CUDA version is ${cuda_version_from_findcuda} (usually determined by nvcc), " 119 "but the CUDA headers say the version is ${cuda_version_from_header}. This often occurs " 120 "when you set both CUDA_HOME and CUDA_NVCC_EXECUTABLE to " 121 "non-standard locations, without also setting PATH to point to the correct nvcc. " 122 "Perhaps, try re-running this command again with PATH=${CUDA_TOOLKIT_ROOT_DIR}/bin:$PATH. " 123 "See above log messages for more diagnostics, and see https://github.com/pytorch/pytorch/issues/8092 for more details.") 124 endif() 125 endif() 126endif() 127 128# ---[ CUDA libraries wrapper 129 130# find lbnvrtc.so 131set(CUDA_NVRTC_LIB "${CUDA_nvrtc_LIBRARY}" CACHE FILEPATH "") 132if(CUDA_NVRTC_LIB AND NOT CUDA_NVRTC_SHORTHASH) 133 find_package(Python COMPONENTS Interpreter) 134 execute_process( 135 COMMAND Python::Interpreter -c 136 "import hashlib;hash=hashlib.sha256();hash.update(open('${CUDA_NVRTC_LIB}','rb').read());print(hash.hexdigest()[:8])" 137 RESULT_VARIABLE _retval 138 OUTPUT_VARIABLE CUDA_NVRTC_SHORTHASH) 139 if(NOT _retval EQUAL 0) 140 message(WARNING "Failed to compute shorthash for libnvrtc.so") 141 set(CUDA_NVRTC_SHORTHASH "XXXXXXXX") 142 else() 143 string(STRIP "${CUDA_NVRTC_SHORTHASH}" CUDA_NVRTC_SHORTHASH) 144 message(STATUS "${CUDA_NVRTC_LIB} shorthash is ${CUDA_NVRTC_SHORTHASH}") 145 endif() 146endif() 147 148# Create new style imported libraries. 149# Several of these libraries have a hardcoded path if CAFFE2_STATIC_LINK_CUDA 150# is set. This path is where sane CUDA installations have their static 151# libraries installed. This flag should only be used for binary builds, so 152# end-users should never have this flag set. 153 154# cuda 155add_library(caffe2::cuda INTERFACE IMPORTED) 156set_property( 157 TARGET caffe2::cuda PROPERTY INTERFACE_LINK_LIBRARIES 158 CUDA::cuda_driver) 159 160# cudart 161add_library(torch::cudart INTERFACE IMPORTED) 162if(CAFFE2_STATIC_LINK_CUDA) 163 set_property( 164 TARGET torch::cudart PROPERTY INTERFACE_LINK_LIBRARIES 165 CUDA::cudart_static) 166else() 167 set_property( 168 TARGET torch::cudart PROPERTY INTERFACE_LINK_LIBRARIES 169 CUDA::cudart) 170endif() 171 172# nvToolsExt 173find_path(nvtx3_dir NAMES nvtx3 PATHS "${PROJECT_SOURCE_DIR}/third_party/NVTX/c/include" NO_DEFAULT_PATH) 174find_package_handle_standard_args(nvtx3 DEFAULT_MSG nvtx3_dir) 175if(nvtx3_FOUND) 176 add_library(torch::nvtx3 INTERFACE IMPORTED) 177 target_include_directories(torch::nvtx3 INTERFACE "${nvtx3_dir}") 178 target_compile_definitions(torch::nvtx3 INTERFACE TORCH_CUDA_USE_NVTX3) 179else() 180 message(WARNING "Cannot find NVTX3, find old NVTX instead") 181 add_library(torch::nvtoolsext INTERFACE IMPORTED) 182 set_property(TARGET torch::nvtoolsext PROPERTY INTERFACE_LINK_LIBRARIES CUDA::nvToolsExt) 183endif() 184 185 186# cublas 187add_library(caffe2::cublas INTERFACE IMPORTED) 188if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32) 189 set_property( 190 TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES 191 # NOTE: cublas is always linked dynamically 192 CUDA::cublas CUDA::cublasLt) 193 set_property( 194 TARGET caffe2::cublas APPEND PROPERTY INTERFACE_LINK_LIBRARIES 195 CUDA::cudart_static rt) 196else() 197 set_property( 198 TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES 199 CUDA::cublas CUDA::cublasLt) 200endif() 201 202# cudnn interface 203# static linking is handled by USE_STATIC_CUDNN environment variable 204if(CAFFE2_USE_CUDNN) 205 if(USE_STATIC_CUDNN) 206 set(CUDNN_STATIC ON CACHE BOOL "") 207 else() 208 set(CUDNN_STATIC OFF CACHE BOOL "") 209 endif() 210 211 find_package(CUDNN) 212 213 if(NOT CUDNN_FOUND) 214 message(WARNING 215 "Cannot find cuDNN library. Turning the option off") 216 set(CAFFE2_USE_CUDNN OFF) 217 else() 218 if(CUDNN_VERSION VERSION_LESS "8.1.0") 219 message(FATAL_ERROR "PyTorch requires cuDNN 8.1 and above.") 220 endif() 221 endif() 222 223 add_library(torch::cudnn INTERFACE IMPORTED) 224 target_include_directories(torch::cudnn INTERFACE ${CUDNN_INCLUDE_PATH}) 225 if(CUDNN_STATIC AND NOT WIN32) 226 target_link_options(torch::cudnn INTERFACE 227 "-Wl,--exclude-libs,libcudnn_static.a") 228 else() 229 target_link_libraries(torch::cudnn INTERFACE ${CUDNN_LIBRARY_PATH}) 230 endif() 231else() 232 message(STATUS "USE_CUDNN is set to 0. Compiling without cuDNN support") 233endif() 234 235if(CAFFE2_USE_CUSPARSELT) 236 find_package(CUSPARSELT) 237 238 if(NOT CUSPARSELT_FOUND) 239 message(WARNING 240 "Cannot find cuSPARSELt library. Turning the option off") 241 set(CAFFE2_USE_CUSPARSELT OFF) 242 else() 243 add_library(torch::cusparselt INTERFACE IMPORTED) 244 target_include_directories(torch::cusparselt INTERFACE ${CUSPARSELT_INCLUDE_PATH}) 245 target_link_libraries(torch::cusparselt INTERFACE ${CUSPARSELT_LIBRARY_PATH}) 246 endif() 247else() 248 message(STATUS "USE_CUSPARSELT is set to 0. Compiling without cuSPARSELt support") 249endif() 250 251if(USE_CUDSS) 252 find_package(CUDSS) 253 254 if(NOT CUDSS_FOUND) 255 message(WARNING 256 "Cannot find CUDSS library. Turning the option off") 257 set(USE_CUDSS OFF) 258 else() 259 add_library(torch::cudss INTERFACE IMPORTED) 260 target_include_directories(torch::cudss INTERFACE ${CUDSS_INCLUDE_PATH}) 261 target_link_libraries(torch::cudss INTERFACE ${CUDSS_LIBRARY_PATH}) 262 endif() 263else() 264 message(STATUS "USE_CUDSS is set to 0. Compiling without cuDSS support") 265endif() 266 267# cufile 268if(CAFFE2_USE_CUFILE) 269 add_library(torch::cufile INTERFACE IMPORTED) 270 if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32) 271 set_property( 272 TARGET torch::cufile PROPERTY INTERFACE_LINK_LIBRARIES 273 CUDA::cuFile_static) 274 else() 275 set_property( 276 TARGET torch::cufile PROPERTY INTERFACE_LINK_LIBRARIES 277 CUDA::cuFile) 278 endif() 279else() 280 message(STATUS "USE_CUFILE is set to 0. Compiling without cuFile support") 281endif() 282 283# curand 284add_library(caffe2::curand INTERFACE IMPORTED) 285if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32) 286 set_property( 287 TARGET caffe2::curand PROPERTY INTERFACE_LINK_LIBRARIES 288 CUDA::curand_static) 289else() 290 set_property( 291 TARGET caffe2::curand PROPERTY INTERFACE_LINK_LIBRARIES 292 CUDA::curand) 293endif() 294 295# cufft 296add_library(caffe2::cufft INTERFACE IMPORTED) 297if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32) 298 set_property( 299 TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES 300 CUDA::cufft_static_nocallback) 301else() 302 set_property( 303 TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES 304 CUDA::cufft) 305endif() 306 307# nvrtc 308add_library(caffe2::nvrtc INTERFACE IMPORTED) 309set_property( 310 TARGET caffe2::nvrtc PROPERTY INTERFACE_LINK_LIBRARIES 311 CUDA::nvrtc caffe2::cuda) 312 313# Add onnx namepsace definition to nvcc 314if(ONNX_NAMESPACE) 315 list(APPEND CUDA_NVCC_FLAGS "-DONNX_NAMESPACE=${ONNX_NAMESPACE}") 316else() 317 list(APPEND CUDA_NVCC_FLAGS "-DONNX_NAMESPACE=onnx_c2") 318endif() 319 320# Don't activate VC env again for Ninja generators with MSVC on Windows if CUDAHOSTCXX is not defined 321# by adding --use-local-env. 322if(MSVC AND CMAKE_GENERATOR STREQUAL "Ninja" AND NOT DEFINED ENV{CUDAHOSTCXX}) 323 list(APPEND CUDA_NVCC_FLAGS "--use-local-env") 324endif() 325 326# setting nvcc arch flags 327torch_cuda_get_nvcc_gencode_flag(NVCC_FLAGS_EXTRA) 328# CMake 3.18 adds integrated support for architecture selection, but we can't rely on it 329set(CMAKE_CUDA_ARCHITECTURES OFF) 330list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA}) 331message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA}") 332 333# disable some nvcc diagnostic that appears in boost, glog, glags, opencv, etc. 334foreach(diag cc_clobber_ignored 335 field_without_dll_interface 336 base_class_has_different_dll_interface 337 dll_interface_conflict_none_assumed 338 dll_interface_conflict_dllexport_assumed 339 bad_friend_decl) 340 list(APPEND SUPPRESS_WARNING_FLAGS --diag_suppress=${diag}) 341endforeach() 342string(REPLACE ";" "," SUPPRESS_WARNING_FLAGS "${SUPPRESS_WARNING_FLAGS}") 343list(APPEND CUDA_NVCC_FLAGS -Xcudafe ${SUPPRESS_WARNING_FLAGS}) 344 345set(CUDA_PROPAGATE_HOST_FLAGS_BLOCKLIST "-Werror") 346if(MSVC) 347 list(APPEND CUDA_NVCC_FLAGS "--Werror" "cross-execution-space-call") 348 list(APPEND CUDA_NVCC_FLAGS "--no-host-device-move-forward") 349endif() 350 351# Debug and Release symbol support 352if(MSVC) 353 if(${CAFFE2_USE_MSVC_STATIC_RUNTIME}) 354 string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -Xcompiler /MTd") 355 string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL " -Xcompiler /MT") 356 string(APPEND CMAKE_CUDA_FLAGS_RELEASE " -Xcompiler /MT") 357 string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -Xcompiler /MT") 358 else() 359 string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -Xcompiler /MDd") 360 string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL " -Xcompiler /MD") 361 string(APPEND CMAKE_CUDA_FLAGS_RELEASE " -Xcompiler /MD") 362 string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -Xcompiler /MD") 363 endif() 364 if(CUDA_NVCC_FLAGS MATCHES "Zi") 365 list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-FS") 366 endif() 367elseif(CUDA_DEVICE_DEBUG) 368 list(APPEND CUDA_NVCC_FLAGS "-g" "-G") # -G enables device code debugging symbols 369endif() 370 371# Set expt-relaxed-constexpr to suppress Eigen warnings 372list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr") 373 374# Set expt-extended-lambda to support lambda on device 375list(APPEND CUDA_NVCC_FLAGS "--expt-extended-lambda") 376 377foreach(FLAG ${CUDA_NVCC_FLAGS}) 378 string(FIND "${FLAG}" " " flag_space_position) 379 if(NOT flag_space_position EQUAL -1) 380 message(FATAL_ERROR "Found spaces in CUDA_NVCC_FLAGS entry '${FLAG}'") 381 endif() 382 string(APPEND CMAKE_CUDA_FLAGS " ${FLAG}") 383endforeach() 384