xref: /aosp_15_r20/external/pytorch/cmake/Modules_CUDA_fix/upstream/FindCUDA.cmake (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1#.rst:
2# FindCUDA
3# --------
4#
5# .. note::
6#
7#   The FindCUDA module has been superseded by first-class support
8#   for the CUDA language in CMake.  It is no longer necessary to
9#   use this module or call ``find_package(CUDA)``.  This module
10#   now exists only for compatibility with projects that have not
11#   been ported.
12#
13#   Instead, list ``CUDA`` among the languages named in the top-level
14#   call to the :command:`project` command, or call the
15#   :command:`enable_language` command with ``CUDA``.
16#   Then one can add CUDA (``.cu``) sources to programs directly
17#   in calls to :command:`add_library` and :command:`add_executable`.
18#
19# Tools for building CUDA C files: libraries and build dependencies.
20#
21# This script locates the NVIDIA CUDA C tools.  It should work on Linux,
22# Windows, and macOS and should be reasonably up to date with CUDA C
23# releases.
24#
25# This script makes use of the standard :command:`find_package` arguments of
26# ``<VERSION>``, ``REQUIRED`` and ``QUIET``.  ``CUDA_FOUND`` will report if an
27# acceptable version of CUDA was found.
28#
29# The script will prompt the user to specify ``CUDA_TOOLKIT_ROOT_DIR`` if
30# the prefix cannot be determined by the location of nvcc in the system
31# path and ``REQUIRED`` is specified to :command:`find_package`.  To use
32# a different installed version of the toolkit set the environment variable
33# ``CUDA_BIN_PATH`` before running cmake (e.g.
34# ``CUDA_BIN_PATH=/usr/local/cuda1.0`` instead of the default
35# ``/usr/local/cuda``) or set ``CUDA_TOOLKIT_ROOT_DIR`` after configuring.  If
36# you change the value of ``CUDA_TOOLKIT_ROOT_DIR``, various components that
37# depend on the path will be relocated.
38#
39# It might be necessary to set ``CUDA_TOOLKIT_ROOT_DIR`` manually on certain
40# platforms, or to use a CUDA runtime not installed in the default
41# location.  In newer versions of the toolkit the CUDA library is
42# included with the graphics driver -- be sure that the driver version
43# matches what is needed by the CUDA runtime version.
44#
45# The following variables affect the behavior of the macros in the
46# script (in alphebetical order).  Note that any of these flags can be
47# changed multiple times in the same directory before calling
48# ``CUDA_ADD_EXECUTABLE``, ``CUDA_ADD_LIBRARY``, ``CUDA_COMPILE``,
49# ``CUDA_COMPILE_PTX``, ``CUDA_COMPILE_FATBIN``, ``CUDA_COMPILE_CUBIN``
50# or ``CUDA_WRAP_SRCS``::
51#
52#   CUDA_64_BIT_DEVICE_CODE (Default matches host bit size)
53#   -- Set to ON to compile for 64 bit device code, OFF for 32 bit device code.
54#      Note that making this different from the host code when generating object
55#      or C files from CUDA code just won't work, because size_t gets defined by
56#      nvcc in the generated source.  If you compile to PTX and then load the
57#      file yourself, you can mix bit sizes between device and host.
58#
59#   CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE (Default ON)
60#   -- Set to ON if you want the custom build rule to be attached to the source
61#      file in Visual Studio.  Turn OFF if you add the same cuda file to multiple
62#      targets.
63#
64#      This allows the user to build the target from the CUDA file; however, bad
65#      things can happen if the CUDA source file is added to multiple targets.
66#      When performing parallel builds it is possible for the custom build
67#      command to be run more than once and in parallel causing cryptic build
68#      errors.  VS runs the rules for every source file in the target, and a
69#      source can have only one rule no matter how many projects it is added to.
70#      When the rule is run from multiple targets race conditions can occur on
71#      the generated file.  Eventually everything will get built, but if the user
72#      is unaware of this behavior, there may be confusion.  It would be nice if
73#      this script could detect the reuse of source files across multiple targets
74#      and turn the option off for the user, but no good solution could be found.
75#
76#   CUDA_BUILD_CUBIN (Default OFF)
77#   -- Set to ON to enable and extra compilation pass with the -cubin option in
78#      Device mode. The output is parsed and register, shared memory usage is
79#      printed during build.
80#
81#   CUDA_BUILD_EMULATION (Default OFF for device mode)
82#   -- Set to ON for Emulation mode. -D_DEVICEEMU is defined for CUDA C files
83#      when CUDA_BUILD_EMULATION is TRUE.
84#
85#   CUDA_LINK_LIBRARIES_KEYWORD (Default "")
86#    -- The <PRIVATE|PUBLIC|INTERFACE> keyword to use for internal
87#       target_link_libraries calls. The default is to use no keyword which
88#       uses the old "plain" form of target_link_libraries. Note that is matters
89#       because whatever is used inside the FindCUDA module must also be used
90#       outside - the two forms of target_link_libraries cannot be mixed.
91#
92#   CUDA_GENERATED_OUTPUT_DIR (Default CMAKE_CURRENT_BINARY_DIR)
93#   -- Set to the path you wish to have the generated files placed.  If it is
94#      blank output files will be placed in CMAKE_CURRENT_BINARY_DIR.
95#      Intermediate files will always be placed in
96#      CMAKE_CURRENT_BINARY_DIR/CMakeFiles.
97#
98#   CUDA_HOST_COMPILATION_CPP (Default ON)
99#   -- Set to OFF for C compilation of host code.
100#
101#   CUDA_HOST_COMPILER (Default CMAKE_C_COMPILER)
102#   -- Set the host compiler to be used by nvcc.  Ignored if -ccbin or
103#      --compiler-bindir is already present in the CUDA_NVCC_FLAGS or
104#      CUDA_NVCC_FLAGS_<CONFIG> variables.  For Visual Studio targets,
105#      the host compiler is constructed with one or more visual studio macros
106#      such as $(VCInstallDir), that expands out to the path when
107#      the command is run from within VS.
108#      If the CUDAHOSTCXX environment variable is set it will
109#      be used as the default.
110#
111#   CUDA_NVCC_FLAGS
112#   CUDA_NVCC_FLAGS_<CONFIG>
113#   -- Additional NVCC command line arguments.  NOTE: multiple arguments must be
114#      semi-colon delimited (e.g. --compiler-options;-Wall)
115#
116#   CUDA_PROPAGATE_HOST_FLAGS (Default ON)
117#   -- Set to ON to propagate CMAKE_{C,CXX}_FLAGS and their configuration
118#      dependent counterparts (e.g. CMAKE_C_FLAGS_DEBUG) automatically to the
119#      host compiler through nvcc's -Xcompiler flag.  This helps make the
120#      generated host code match the rest of the system better.  Sometimes
121#      certain flags give nvcc problems, and this will help you turn the flag
122#      propagation off.  This does not affect the flags supplied directly to nvcc
123#      via CUDA_NVCC_FLAGS or through the OPTION flags specified through
124#      CUDA_ADD_LIBRARY, CUDA_ADD_EXECUTABLE, or CUDA_WRAP_SRCS.  Flags used for
125#      shared library compilation are not affected by this flag.
126#
127#   CUDA_PROPAGATE_HOST_FLAGS_BLACKLIST (Default "")
128#   -- A list containing the host flags that should not be propagated when
129#      CUDA_PROPAGATE_HOST_FLAGS is ON.
130#
131#   CUDA_SEPARABLE_COMPILATION (Default OFF)
132#   -- If set this will enable separable compilation for all CUDA runtime object
133#      files.  If used outside of CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY
134#      (e.g. calling CUDA_WRAP_SRCS directly),
135#      CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME and
136#      CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS should be called.
137#
138#   CUDA_SOURCE_PROPERTY_FORMAT
139#   -- If this source file property is set, it can override the format specified
140#      to CUDA_WRAP_SRCS (OBJ, PTX, CUBIN, or FATBIN).  If an input source file
141#      is not a .cu file, setting this file will cause it to be treated as a .cu
142#      file. See documentation for set_source_files_properties on how to set
143#      this property.
144#
145#   CUDA_USE_STATIC_CUDA_RUNTIME (Default ON)
146#   -- When enabled the static version of the CUDA runtime library will be used
147#      in CUDA_LIBRARIES.  If the version of CUDA configured doesn't support
148#      this option, then it will be silently disabled.
149#
150#   CUDA_VERBOSE_BUILD (Default OFF)
151#   -- Set to ON to see all the commands used when building the CUDA file.  When
152#      using a Makefile generator the value defaults to VERBOSE (run make
153#      VERBOSE=1 to see output), although setting CUDA_VERBOSE_BUILD to ON will
154#      always print the output.
155#
156# The script creates the following macros (in alphebetical order)::
157#
158#   CUDA_ADD_CUFFT_TO_TARGET( cuda_target )
159#   -- Adds the cufft library to the target (can be any target).  Handles whether
160#      you are in emulation mode or not.
161#
162#   CUDA_ADD_CUBLAS_TO_TARGET( cuda_target )
163#   -- Adds the cublas library to the target (can be any target).  Handles
164#      whether you are in emulation mode or not.
165#
166#   CUDA_ADD_EXECUTABLE( cuda_target file0 file1 ...
167#                        [WIN32] [MACOSX_BUNDLE] [EXCLUDE_FROM_ALL] [OPTIONS ...] )
168#   -- Creates an executable "cuda_target" which is made up of the files
169#      specified.  All of the non CUDA C files are compiled using the standard
170#      build rules specified by CMAKE and the cuda files are compiled to object
171#      files using nvcc and the host compiler.  In addition CUDA_INCLUDE_DIRS is
172#      added automatically to include_directories().  Some standard CMake target
173#      calls can be used on the target after calling this macro
174#      (e.g. set_target_properties and target_link_libraries), but setting
175#      properties that adjust compilation flags will not affect code compiled by
176#      nvcc.  Such flags should be modified before calling CUDA_ADD_EXECUTABLE,
177#      CUDA_ADD_LIBRARY or CUDA_WRAP_SRCS.
178#
179#   CUDA_ADD_LIBRARY( cuda_target file0 file1 ...
180#                     [STATIC | SHARED | MODULE] [EXCLUDE_FROM_ALL] [OPTIONS ...] )
181#   -- Same as CUDA_ADD_EXECUTABLE except that a library is created.
182#
183#   CUDA_BUILD_CLEAN_TARGET()
184#   -- Creates a convenience target that deletes all the dependency files
185#      generated.  You should make clean after running this target to ensure the
186#      dependency files get regenerated.
187#
188#   CUDA_COMPILE( generated_files file0 file1 ... [STATIC | SHARED | MODULE]
189#                 [OPTIONS ...] )
190#   -- Returns a list of generated files from the input source files to be used
191#      with ADD_LIBRARY or ADD_EXECUTABLE.
192#
193#   CUDA_COMPILE_PTX( generated_files file0 file1 ... [OPTIONS ...] )
194#   -- Returns a list of PTX files generated from the input source files.
195#
196#   CUDA_COMPILE_FATBIN( generated_files file0 file1 ... [OPTIONS ...] )
197#   -- Returns a list of FATBIN files generated from the input source files.
198#
199#   CUDA_COMPILE_CUBIN( generated_files file0 file1 ... [OPTIONS ...] )
200#   -- Returns a list of CUBIN files generated from the input source files.
201#
202#   CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME( output_file_var
203#                                                        cuda_target
204#                                                        object_files )
205#   -- Compute the name of the intermediate link file used for separable
206#      compilation.  This file name is typically passed into
207#      CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS.  output_file_var is produced
208#      based on cuda_target the list of objects files that need separable
209#      compilation as specified by object_files.  If the object_files list is
210#      empty, then output_file_var will be empty.  This function is called
211#      automatically for CUDA_ADD_LIBRARY and CUDA_ADD_EXECUTABLE.  Note that
212#      this is a function and not a macro.
213#
214#   CUDA_INCLUDE_DIRECTORIES( path0 path1 ... )
215#   -- Sets the directories that should be passed to nvcc
216#      (e.g. nvcc -Ipath0 -Ipath1 ... ). These paths usually contain other .cu
217#      files.
218#
219#
220#   CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS( output_file_var cuda_target
221#                                            nvcc_flags object_files)
222#   -- Generates the link object required by separable compilation from the given
223#      object files.  This is called automatically for CUDA_ADD_EXECUTABLE and
224#      CUDA_ADD_LIBRARY, but can be called manually when using CUDA_WRAP_SRCS
225#      directly.  When called from CUDA_ADD_LIBRARY or CUDA_ADD_EXECUTABLE the
226#      nvcc_flags passed in are the same as the flags passed in via the OPTIONS
227#      argument.  The only nvcc flag added automatically is the bitness flag as
228#      specified by CUDA_64_BIT_DEVICE_CODE.  Note that this is a function
229#      instead of a macro.
230#
231#   CUDA_SELECT_NVCC_ARCH_FLAGS(out_variable [target_CUDA_architectures])
232#   -- Selects GPU arch flags for nvcc based on target_CUDA_architectures
233#      target_CUDA_architectures : Auto | Common | All | LIST(ARCH_AND_PTX ...)
234#       - "Auto" detects local machine GPU compute arch at runtime.
235#       - "Common" and "All" cover common and entire subsets of architectures
236#      ARCH_AND_PTX : NAME | NUM.NUM | NUM.NUM(NUM.NUM) | NUM.NUM+PTX
237#      NAME: Kepler Maxwell Kepler+Tesla Maxwell+Tegra Pascal Volta Turing
238#      NUM: Any number. Only those pairs are currently accepted by NVCC though:
239#            3.5 3.7 5.0 5.2 5.3 6.0 6.1 6.2 7.0 7.2 7.5
240#      Returns LIST of flags to be added to CUDA_NVCC_FLAGS in ${out_variable}
241#      Additionally, sets ${out_variable}_readable to the resulting numeric list
242#      Example:
243#       CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS 3.0 3.5+PTX 5.2(5.0) Maxwell)
244#        LIST(APPEND CUDA_NVCC_FLAGS ${ARCH_FLAGS})
245#
246#      More info on CUDA architectures: https://en.wikipedia.org/wiki/CUDA
247#      Note that this is a function instead of a macro.
248#
249#   CUDA_WRAP_SRCS ( cuda_target format generated_files file0 file1 ...
250#                    [STATIC | SHARED | MODULE] [OPTIONS ...] )
251#   -- This is where all the magic happens.  CUDA_ADD_EXECUTABLE,
252#      CUDA_ADD_LIBRARY, CUDA_COMPILE, and CUDA_COMPILE_PTX all call this
253#      function under the hood.
254#
255#      Given the list of files (file0 file1 ... fileN) this macro generates
256#      custom commands that generate either PTX or linkable objects (use "PTX" or
257#      "OBJ" for the format argument to switch).  Files that don't end with .cu
258#      or have the HEADER_FILE_ONLY property are ignored.
259#
260#      The arguments passed in after OPTIONS are extra command line options to
261#      give to nvcc.  You can also specify per configuration options by
262#      specifying the name of the configuration followed by the options.  General
263#      options must precede configuration specific options.  Not all
264#      configurations need to be specified, only the ones provided will be used.
265#
266#         OPTIONS -DFLAG=2 "-DFLAG_OTHER=space in flag"
267#         DEBUG -g
268#         RELEASE --use_fast_math
269#         RELWITHDEBINFO --use_fast_math;-g
270#         MINSIZEREL --use_fast_math
271#
272#      For certain configurations (namely VS generating object files with
273#      CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE set to ON), no generated file will
274#      be produced for the given cuda file.  This is because when you add the
275#      cuda file to Visual Studio it knows that this file produces an object file
276#      and will link in the resulting object file automatically.
277#
278#      This script will also generate a separate cmake script that is used at
279#      build time to invoke nvcc.  This is for several reasons.
280#
281#        1. nvcc can return negative numbers as return values which confuses
282#        Visual Studio into thinking that the command succeeded.  The script now
283#        checks the error codes and produces errors when there was a problem.
284#
285#        2. nvcc has been known to not delete incomplete results when it
286#        encounters problems.  This confuses build systems into thinking the
287#        target was generated when in fact an unusable file exists.  The script
288#        now deletes the output files if there was an error.
289#
290#        3. By putting all the options that affect the build into a file and then
291#        make the build rule dependent on the file, the output files will be
292#        regenerated when the options change.
293#
294#      This script also looks at optional arguments STATIC, SHARED, or MODULE to
295#      determine when to target the object compilation for a shared library.
296#      BUILD_SHARED_LIBS is ignored in CUDA_WRAP_SRCS, but it is respected in
297#      CUDA_ADD_LIBRARY.  On some systems special flags are added for building
298#      objects intended for shared libraries.  A preprocessor macro,
299#      <target_name>_EXPORTS is defined when a shared library compilation is
300#      detected.
301#
302#      Flags passed into add_definitions with -D or /D are passed along to nvcc.
303#
304#
305#
306# The script defines the following variables::
307#
308#   CUDA_VERSION_MAJOR    -- The major version of cuda as reported by nvcc.
309#   CUDA_VERSION_MINOR    -- The minor version.
310#   CUDA_VERSION
311#   CUDA_VERSION_STRING   -- CUDA_VERSION_MAJOR.CUDA_VERSION_MINOR
312#   CUDA_HAS_FP16         -- Whether a short float (float16,fp16) is supported.
313#
314#   CUDA_TOOLKIT_ROOT_DIR -- Path to the CUDA Toolkit (defined if not set).
315#   CUDA_SDK_ROOT_DIR     -- Path to the CUDA SDK.  Use this to find files in the
316#                            SDK.  This script will not directly support finding
317#                            specific libraries or headers, as that isn't
318#                            supported by NVIDIA.  If you want to change
319#                            libraries when the path changes see the
320#                            FindCUDA.cmake script for an example of how to clear
321#                            these variables.  There are also examples of how to
322#                            use the CUDA_SDK_ROOT_DIR to locate headers or
323#                            libraries, if you so choose (at your own risk).
324#   CUDA_INCLUDE_DIRS     -- Include directory for cuda headers.  Added automatically
325#                            for CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY.
326#   CUDA_LIBRARIES        -- Cuda RT library.
327#   CUDA_CUFFT_LIBRARIES  -- Device or emulation library for the Cuda FFT
328#                            implementation (alternative to:
329#                            CUDA_ADD_CUFFT_TO_TARGET macro)
330#   CUDA_CUBLAS_LIBRARIES -- Device or emulation library for the Cuda BLAS
331#                            implementation (alternative to:
332#                            CUDA_ADD_CUBLAS_TO_TARGET macro).
333#   CUDA_cudart_static_LIBRARY -- Statically linkable cuda runtime library.
334#                                 Only available for CUDA version 5.5+
335#   CUDA_cudadevrt_LIBRARY -- Device runtime library.
336#                             Required for separable compilation.
337#   CUDA_cupti_LIBRARY    -- CUDA Profiling Tools Interface library.
338#                            Only available for CUDA version 4.0+.
339#   CUDA_curand_LIBRARY   -- CUDA Random Number Generation library.
340#                            Only available for CUDA version 3.2+.
341#   CUDA_cusolver_LIBRARY -- CUDA Direct Solver library.
342#                            Only available for CUDA version 7.0+.
343#   CUDA_cusparse_LIBRARY -- CUDA Sparse Matrix library.
344#                            Only available for CUDA version 3.2+.
345#   CUDA_npp_LIBRARY      -- NVIDIA Performance Primitives lib.
346#                            Only available for CUDA version 4.0+.
347#   CUDA_nppc_LIBRARY     -- NVIDIA Performance Primitives lib (core).
348#                            Only available for CUDA version 5.5+.
349#   CUDA_nppi_LIBRARY     -- NVIDIA Performance Primitives lib (image processing).
350#                            Only available for CUDA version 5.5 - 8.0.
351#   CUDA_nppial_LIBRARY   -- NVIDIA Performance Primitives lib (image processing).
352#                            Only available for CUDA version 9.0.
353#   CUDA_nppicc_LIBRARY   -- NVIDIA Performance Primitives lib (image processing).
354#                            Only available for CUDA version 9.0.
355#   CUDA_nppicom_LIBRARY  -- NVIDIA Performance Primitives lib (image processing).
356#                            Only available for CUDA version 9.0.
357#   CUDA_nppidei_LIBRARY  -- NVIDIA Performance Primitives lib (image processing).
358#                            Only available for CUDA version 9.0.
359#   CUDA_nppif_LIBRARY    -- NVIDIA Performance Primitives lib (image processing).
360#                            Only available for CUDA version 9.0.
361#   CUDA_nppig_LIBRARY    -- NVIDIA Performance Primitives lib (image processing).
362#                            Only available for CUDA version 9.0.
363#   CUDA_nppim_LIBRARY    -- NVIDIA Performance Primitives lib (image processing).
364#                            Only available for CUDA version 9.0.
365#   CUDA_nppist_LIBRARY   -- NVIDIA Performance Primitives lib (image processing).
366#                            Only available for CUDA version 9.0.
367#   CUDA_nppisu_LIBRARY   -- NVIDIA Performance Primitives lib (image processing).
368#                            Only available for CUDA version 9.0.
369#   CUDA_nppitc_LIBRARY   -- NVIDIA Performance Primitives lib (image processing).
370#                            Only available for CUDA version 9.0.
371#   CUDA_npps_LIBRARY     -- NVIDIA Performance Primitives lib (signal processing).
372#                            Only available for CUDA version 5.5+.
373#   CUDA_nvcuvenc_LIBRARY -- CUDA Video Encoder library.
374#                            Only available for CUDA version 3.2+.
375#                            Windows only.
376#   CUDA_nvcuvid_LIBRARY  -- CUDA Video Decoder library.
377#                            Only available for CUDA version 3.2+.
378#                            Windows only.
379#
380
381#   James Bigler, NVIDIA Corp (nvidia.com - jbigler)
382#   Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html
383#
384#   Copyright (c) 2008 - 2009 NVIDIA Corporation.  All rights reserved.
385#
386#   Copyright (c) 2007-2009
387#   Scientific Computing and Imaging Institute, University of Utah
388#
389#   This code is licensed under the MIT License.  See the FindCUDA.cmake script
390#   for the text of the license.
391
392# The MIT License
393#
394# License for the specific language governing rights and limitations under
395# Permission is hereby granted, free of charge, to any person obtaining a
396# copy of this software and associated documentation files (the "Software"),
397# to deal in the Software without restriction, including without limitation
398# the rights to use, copy, modify, merge, publish, distribute, sublicense,
399# and/or sell copies of the Software, and to permit persons to whom the
400# Software is furnished to do so, subject to the following conditions:
401#
402# The above copyright notice and this permission notice shall be included
403# in all copies or substantial portions of the Software.
404#
405# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
406# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
407# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
408# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
409# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
410# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
411# DEALINGS IN THE SOFTWARE.
412#
413###############################################################################
414
415# FindCUDA.cmake
416
417# This macro helps us find the location of helper files we will need the full path to
418macro(CUDA_FIND_HELPER_FILE _name _extension)
419  set(_full_name "${_name}.${_extension}")
420  # CMAKE_CURRENT_LIST_FILE contains the full path to the file currently being
421  # processed.  Using this variable, we can pull out the current path, and
422  # provide a way to get access to the other files we need local to here.
423  get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
424  set(CUDA_${_name} "${CMAKE_CURRENT_LIST_DIR}/FindCUDA/${_full_name}")
425  if(NOT EXISTS "${CUDA_${_name}}")
426    set(error_message "${_full_name} not found in ${CMAKE_CURRENT_LIST_DIR}/FindCUDA")
427    if(CUDA_FIND_REQUIRED)
428      message(FATAL_ERROR "${error_message}")
429    else()
430      if(NOT CUDA_FIND_QUIETLY)
431        message(STATUS "${error_message}")
432      endif()
433    endif()
434  endif()
435  # Set this variable as internal, so the user isn't bugged with it.
436  set(CUDA_${_name} ${CUDA_${_name}} CACHE INTERNAL "Location of ${_full_name}" FORCE)
437endmacro()
438
439#####################################################################
440## CUDA_INCLUDE_NVCC_DEPENDENCIES
441##
442
443# So we want to try and include the dependency file if it exists.  If
444# it doesn't exist then we need to create an empty one, so we can
445# include it.
446
447# If it does exist, then we need to check to see if all the files it
448# depends on exist.  If they don't then we should clear the dependency
449# file and regenerate it later.  This covers the case where a header
450# file has disappeared or moved.
451
452macro(CUDA_INCLUDE_NVCC_DEPENDENCIES dependency_file)
453  set(CUDA_NVCC_DEPEND)
454  set(CUDA_NVCC_DEPEND_REGENERATE FALSE)
455
456
457  # Include the dependency file.  Create it first if it doesn't exist .  The
458  # INCLUDE puts a dependency that will force CMake to rerun and bring in the
459  # new info when it changes.  DO NOT REMOVE THIS (as I did and spent a few
460  # hours figuring out why it didn't work.
461  if(NOT EXISTS ${dependency_file})
462    file(WRITE ${dependency_file} "#FindCUDA.cmake generated file.  Do not edit.\n")
463  endif()
464  # Always include this file to force CMake to run again next
465  # invocation and rebuild the dependencies.
466  #message("including dependency_file = ${dependency_file}")
467  include(${dependency_file})
468
469  # Now we need to verify the existence of all the included files
470  # here.  If they aren't there we need to just blank this variable and
471  # make the file regenerate again.
472#   if(DEFINED CUDA_NVCC_DEPEND)
473#     message("CUDA_NVCC_DEPEND set")
474#   else()
475#     message("CUDA_NVCC_DEPEND NOT set")
476#   endif()
477  if(CUDA_NVCC_DEPEND)
478    #message("CUDA_NVCC_DEPEND found")
479    foreach(f ${CUDA_NVCC_DEPEND})
480      # message("searching for ${f}")
481      if(NOT EXISTS ${f})
482        #message("file ${f} not found")
483        set(CUDA_NVCC_DEPEND_REGENERATE TRUE)
484      endif()
485    endforeach()
486  else()
487    #message("CUDA_NVCC_DEPEND false")
488    # No dependencies, so regenerate the file.
489    set(CUDA_NVCC_DEPEND_REGENERATE TRUE)
490  endif()
491
492  #message("CUDA_NVCC_DEPEND_REGENERATE = ${CUDA_NVCC_DEPEND_REGENERATE}")
493  # No incoming dependencies, so we need to generate them.  Make the
494  # output depend on the dependency file itself, which should cause the
495  # rule to re-run.
496  if(CUDA_NVCC_DEPEND_REGENERATE)
497    set(CUDA_NVCC_DEPEND ${dependency_file})
498    #message("Generating an empty dependency_file: ${dependency_file}")
499    file(WRITE ${dependency_file} "#FindCUDA.cmake generated file.  Do not edit.\n")
500  endif()
501
502endmacro()
503
504###############################################################################
505###############################################################################
506# Setup variables' defaults
507###############################################################################
508###############################################################################
509
510# Allow the user to specify if the device code is supposed to be 32 or 64 bit.
511if(CMAKE_SIZEOF_VOID_P EQUAL 8)
512  set(CUDA_64_BIT_DEVICE_CODE_DEFAULT ON)
513else()
514  set(CUDA_64_BIT_DEVICE_CODE_DEFAULT OFF)
515endif()
516option(CUDA_64_BIT_DEVICE_CODE "Compile device code in 64 bit mode" ${CUDA_64_BIT_DEVICE_CODE_DEFAULT})
517
518# Attach the build rule to the source file in VS.  This option
519option(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE "Attach the build rule to the CUDA source file.  Enable only when the CUDA source file is added to at most one target." ON)
520
521# Prints out extra information about the cuda file during compilation
522option(CUDA_BUILD_CUBIN "Generate and parse .cubin files in Device mode." OFF)
523
524# Set whether we are using emulation or device mode.
525option(CUDA_BUILD_EMULATION "Build in Emulation mode" OFF)
526
527# Where to put the generated output.
528set(CUDA_GENERATED_OUTPUT_DIR "" CACHE PATH "Directory to put all the output files.  If blank it will default to the CMAKE_CURRENT_BINARY_DIR")
529
530# Parse HOST_COMPILATION mode.
531option(CUDA_HOST_COMPILATION_CPP "Generated file extension" ON)
532
533# Extra user settable flags
534cmake_initialize_per_config_variable(CUDA_NVCC_FLAGS "Semi-colon delimit multiple arguments.")
535
536if(DEFINED ENV{CUDAHOSTCXX})
537  set(CUDA_HOST_COMPILER "$ENV{CUDAHOSTCXX}" CACHE FILEPATH "Host side compiler used by NVCC")
538elseif(CMAKE_GENERATOR MATCHES "Visual Studio")
539  set(_CUDA_MSVC_HOST_COMPILER "$(VCInstallDir)Tools/MSVC/$(VCToolsVersion)/bin/Host$(Platform)/$(PlatformTarget)")
540  if(MSVC_VERSION LESS 1910)
541   set(_CUDA_MSVC_HOST_COMPILER "$(VCInstallDir)bin")
542  endif()
543
544  set(CUDA_HOST_COMPILER "${_CUDA_MSVC_HOST_COMPILER}" CACHE FILEPATH "Host side compiler used by NVCC")
545
546else()
547  if(APPLE
548      AND "${CMAKE_C_COMPILER_ID}" MATCHES "Clang"
549      AND "${CMAKE_C_COMPILER}" MATCHES "/cc$")
550    # Using cc which is symlink to clang may let NVCC think it is GCC and issue
551    # unhandled -dumpspecs option to clang. Also in case neither
552    # CMAKE_C_COMPILER is defined (project does not use C language) nor
553    # CUDA_HOST_COMPILER is specified manually we should skip -ccbin and let
554    # nvcc use its own default C compiler.
555    # Only care about this on APPLE with clang to avoid
556    # following symlinks to things like ccache
557    if(DEFINED CMAKE_C_COMPILER AND NOT DEFINED CUDA_HOST_COMPILER)
558      get_filename_component(c_compiler_realpath "${CMAKE_C_COMPILER}" REALPATH)
559      # if the real path does not end up being clang then
560      # go back to using CMAKE_C_COMPILER
561      if(NOT "${c_compiler_realpath}" MATCHES "/clang$")
562        set(c_compiler_realpath "${CMAKE_C_COMPILER}")
563      endif()
564    else()
565      set(c_compiler_realpath "")
566    endif()
567    set(CUDA_HOST_COMPILER "${c_compiler_realpath}" CACHE FILEPATH "Host side compiler used by NVCC")
568  elseif(MSVC AND "${CMAKE_C_COMPILER}" MATCHES "clcache|sccache")
569    # NVCC does not think it will work if it is passed clcache.exe or sccache.exe
570    # as the host compiler, which means that builds with CC=cl.exe won't work.
571    # Best to just feed it whatever the actual cl.exe is as the host compiler.
572    set(CUDA_HOST_COMPILER "cl.exe" CACHE FILEPATH "Host side compiler used by NVCC")
573  else()
574    set(CUDA_HOST_COMPILER "${CMAKE_C_COMPILER}"
575      CACHE FILEPATH "Host side compiler used by NVCC")
576  endif()
577endif()
578
579# Propagate the host flags to the host compiler via -Xcompiler
580option(CUDA_PROPAGATE_HOST_FLAGS "Propagate C/CXX_FLAGS and friends to the host compiler via -Xcompile" ON)
581
582# Blacklisted flags to prevent propagation
583set(CUDA_PROPAGATE_HOST_FLAGS_BLACKLIST  "" CACHE STRING "Blacklisted flags to prevent propagation")
584
585# Enable CUDA_SEPARABLE_COMPILATION
586option(CUDA_SEPARABLE_COMPILATION "Compile CUDA objects with separable compilation enabled.  Requires CUDA 5.0+" OFF)
587
588# Specifies whether the commands used when compiling the .cu file will be printed out.
589option(CUDA_VERBOSE_BUILD "Print out the commands run while compiling the CUDA source file.  With the Makefile generator this defaults to VERBOSE variable specified on the command line, but can be forced on with this option." OFF)
590
591mark_as_advanced(
592  CUDA_64_BIT_DEVICE_CODE
593  CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE
594  CUDA_GENERATED_OUTPUT_DIR
595  CUDA_HOST_COMPILATION_CPP
596  CUDA_NVCC_FLAGS
597  CUDA_PROPAGATE_HOST_FLAGS
598  CUDA_PROPAGATE_HOST_FLAGS_BLACKLIST
599  CUDA_BUILD_CUBIN
600  CUDA_BUILD_EMULATION
601  CUDA_VERBOSE_BUILD
602  CUDA_SEPARABLE_COMPILATION
603  )
604
605# Single config generators like Makefiles or Ninja don't usually have
606# CMAKE_CONFIGURATION_TYPES defined (but note that it can be defined if set by
607# projects or developers). Even CMAKE_BUILD_TYPE might not be defined for
608# single config generators (and should not be defined for multi-config
609# generators). To ensure we get a complete superset of all possible
610# configurations, we combine CMAKE_CONFIGURATION_TYPES, CMAKE_BUILD_TYPE and
611# all of the standard configurations, then weed out duplicates with
612# list(REMOVE_DUPLICATES). Looping over the unique set then ensures we have
613# each configuration-specific set of nvcc flags defined and marked as advanced.
614set(CUDA_configuration_types ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE} Debug MinSizeRel Release RelWithDebInfo)
615list(REMOVE_DUPLICATES CUDA_configuration_types)
616
617###############################################################################
618###############################################################################
619# Locate CUDA, Set Build Type, etc.
620###############################################################################
621###############################################################################
622
623macro(cuda_unset_include_and_libraries)
624  unset(CUDA_TOOLKIT_INCLUDE CACHE)
625  unset(CUDA_CUDART_LIBRARY CACHE)
626  unset(CUDA_CUDA_LIBRARY CACHE)
627  # Make sure you run this before you unset CUDA_VERSION.
628  unset(CUDA_cudart_static_LIBRARY CACHE)
629  unset(CUDA_cudadevrt_LIBRARY CACHE)
630  unset(CUDA_cublas_LIBRARY CACHE)
631  unset(CUDA_cublas_device_LIBRARY CACHE)
632  unset(CUDA_cublasemu_LIBRARY CACHE)
633  unset(CUDA_cublasLt_LIBRARY CACHE)
634  unset(CUDA_cufft_LIBRARY CACHE)
635  unset(CUDA_cufftemu_LIBRARY CACHE)
636  unset(CUDA_cupti_LIBRARY CACHE)
637  unset(CUDA_curand_LIBRARY CACHE)
638  unset(CUDA_cusolver_LIBRARY CACHE)
639  unset(CUDA_cusparse_LIBRARY CACHE)
640  unset(CUDA_npp_LIBRARY CACHE)
641  unset(CUDA_nppc_LIBRARY CACHE)
642  unset(CUDA_nppi_LIBRARY CACHE)
643  unset(CUDA_npps_LIBRARY CACHE)
644  unset(CUDA_nvcuvenc_LIBRARY CACHE)
645  unset(CUDA_nvcuvid_LIBRARY CACHE)
646  unset(CUDA_GPU_DETECT_OUTPUT CACHE)
647endmacro()
648
649# Check to see if the CUDA_TOOLKIT_ROOT_DIR and CUDA_SDK_ROOT_DIR have changed,
650# if they have then clear the cache variables, so that will be detected again.
651if(NOT "${CUDA_TOOLKIT_ROOT_DIR}" STREQUAL "${CUDA_TOOLKIT_ROOT_DIR_INTERNAL}")
652  unset(CUDA_TOOLKIT_TARGET_DIR CACHE)
653  unset(CUDA_NVCC_EXECUTABLE CACHE)
654  cuda_unset_include_and_libraries()
655  unset(CUDA_VERSION CACHE)
656endif()
657
658if(NOT "${CUDA_TOOLKIT_TARGET_DIR}" STREQUAL "${CUDA_TOOLKIT_TARGET_DIR_INTERNAL}")
659  cuda_unset_include_and_libraries()
660endif()
661
662#
663#  End of unset()
664#
665
666#
667#  Start looking for things
668#
669
670# Search for the cuda distribution.
671if(NOT CUDA_TOOLKIT_ROOT_DIR AND NOT CMAKE_CROSSCOMPILING)
672  # Search in the CUDA_BIN_PATH first.
673  find_program(CUDA_TOOLKIT_ROOT_DIR_NVCC
674    NAMES nvcc nvcc.exe
675    PATHS
676      ENV CUDA_TOOLKIT_ROOT
677      ENV CUDA_PATH
678      ENV CUDA_BIN_PATH
679    PATH_SUFFIXES bin bin64
680    DOC "Toolkit location."
681    NO_DEFAULT_PATH
682    )
683
684  # Now search default paths
685  find_program(CUDA_TOOLKIT_ROOT_DIR_NVCC
686    NAMES nvcc nvcc.exe
687    PATHS /opt/cuda/bin
688    PATH_SUFFIXES cuda/bin
689    DOC "Toolkit location."
690    )
691
692  if (CUDA_TOOLKIT_ROOT_DIR_NVCC)
693    get_filename_component(CUDA_TOOLKIT_ROOT_DIR_NVCC_PAR "${CUDA_TOOLKIT_ROOT_DIR_NVCC}" DIRECTORY)
694    get_filename_component(CUDA_TOOLKIT_ROOT_DIR "${CUDA_TOOLKIT_ROOT_DIR_NVCC_PAR}" DIRECTORY CACHE)
695    string(REGEX REPLACE "[/\\\\]?bin[64]*[/\\\\]?$" "" CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR})
696    # We need to force this back into the cache.
697    set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR} CACHE PATH "Toolkit location." FORCE)
698    set(CUDA_TOOLKIT_TARGET_DIR ${CUDA_TOOLKIT_ROOT_DIR})
699  endif()
700  unset(CUDA_TOOLKIT_ROOT_DIR_NVCC CACHE)
701
702  if (NOT EXISTS ${CUDA_TOOLKIT_ROOT_DIR})
703    if(CUDA_FIND_REQUIRED)
704      message(FATAL_ERROR "Specify CUDA_TOOLKIT_ROOT_DIR")
705    elseif(NOT CUDA_FIND_QUIETLY)
706      message("CUDA_TOOLKIT_ROOT_DIR not found or specified")
707    endif()
708  endif ()
709endif ()
710
711if(CMAKE_CROSSCOMPILING)
712  SET (CUDA_TOOLKIT_ROOT $ENV{CUDA_TOOLKIT_ROOT})
713  if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7-a")
714    # Support for NVPACK
715    set (CUDA_TOOLKIT_TARGET_NAMES "armv7-linux-androideabi")
716  elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
717    # Support for arm cross compilation
718    set(CUDA_TOOLKIT_TARGET_NAMES "armv7-linux-gnueabihf")
719  elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
720    # Support for aarch64 cross compilation
721    if (ANDROID_ARCH_NAME STREQUAL "arm64")
722      set(CUDA_TOOLKIT_TARGET_NAMES "aarch64-linux-androideabi")
723    else()
724      set(CUDA_TOOLKIT_TARGET_NAMES "aarch64-linux" "sbsa-linux")
725    endif (ANDROID_ARCH_NAME STREQUAL "arm64")
726  endif()
727
728  foreach(CUDA_TOOLKIT_TARGET_NAME IN LISTS CUDA_TOOLKIT_TARGET_NAMES)
729    if (EXISTS "${CUDA_TOOLKIT_ROOT}/targets/${CUDA_TOOLKIT_TARGET_NAME}")
730      set(CUDA_TOOLKIT_TARGET_DIR "${CUDA_TOOLKIT_ROOT}/targets/${CUDA_TOOLKIT_TARGET_NAME}" CACHE PATH "CUDA Toolkit target location.")
731      SET (CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT} CACHE PATH "Toolkit location." FORCE)
732      mark_as_advanced(CUDA_TOOLKIT_TARGET_DIR)
733      break()
734    endif()
735  endforeach()
736
737  # add known CUDA targetr root path to the set of directories we search for programs, libraries and headers
738  set( CMAKE_FIND_ROOT_PATH "${CUDA_TOOLKIT_TARGET_DIR};${CMAKE_FIND_ROOT_PATH}")
739  macro( cuda_find_host_program )
740    if (COMMAND find_host_program)
741      find_host_program( ${ARGN} )
742    else()
743      find_program( ${ARGN} )
744    endif()
745  endmacro()
746else()
747  # for non-cross-compile, find_host_program == find_program and CUDA_TOOLKIT_TARGET_DIR == CUDA_TOOLKIT_ROOT_DIR
748  macro( cuda_find_host_program )
749    find_program( ${ARGN} )
750  endmacro()
751  SET (CUDA_TOOLKIT_TARGET_DIR ${CUDA_TOOLKIT_ROOT_DIR})
752endif()
753
754
755# CUDA_NVCC_EXECUTABLE
756if(DEFINED ENV{CUDA_NVCC_EXECUTABLE})
757  set(CUDA_NVCC_EXECUTABLE "$ENV{CUDA_NVCC_EXECUTABLE}" CACHE FILEPATH "The CUDA compiler")
758else()
759  cuda_find_host_program(CUDA_NVCC_EXECUTABLE
760    NAMES nvcc
761    PATHS "${CUDA_TOOLKIT_ROOT_DIR}"
762    ENV CUDA_PATH
763    ENV CUDA_BIN_PATH
764    PATH_SUFFIXES bin bin64
765    NO_DEFAULT_PATH
766    )
767  # Search default search paths, after we search our own set of paths.
768  cuda_find_host_program(CUDA_NVCC_EXECUTABLE nvcc)
769endif()
770
771if(CUDA_NVCC_EXECUTABLE AND NOT CUDA_VERSION)
772  # Compute the version.
773  execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} "--version"
774    OUTPUT_VARIABLE NVCC_OUT
775    RESULT_VARIABLE NVCC_RC)
776  if(NOT (${NVCC_RC} EQUAL 0))
777    message(WARNING "Failed to execute '${CUDA_NVCC_EXECUTABLE} --version'")
778    set(CUDA_FOUND FALSE)
779    return()
780  endif()
781  string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${NVCC_OUT})
782  string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${NVCC_OUT})
783  set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}" CACHE STRING "Version of CUDA as computed from nvcc.")
784  mark_as_advanced(CUDA_VERSION)
785else()
786  # Need to set these based off of the cached value
787  string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${CUDA_VERSION}")
788  string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${CUDA_VERSION}")
789endif()
790
791# Always set this convenience variable
792set(CUDA_VERSION_STRING "${CUDA_VERSION}")
793
794# CUDA_TOOLKIT_INCLUDE
795find_path(CUDA_TOOLKIT_INCLUDE
796  device_functions.h # Header included in toolkit
797  PATHS ${CUDA_TOOLKIT_TARGET_DIR}
798  ENV CUDA_PATH
799  ENV CUDA_INC_PATH
800  PATH_SUFFIXES include
801  NO_DEFAULT_PATH
802  )
803# Search default search paths, after we search our own set of paths.
804find_path(CUDA_TOOLKIT_INCLUDE device_functions.h)
805mark_as_advanced(CUDA_TOOLKIT_INCLUDE)
806
807set(CUDA_HAS_FP16 TRUE)
808
809# Set the user list of include dir to nothing to initialize it.
810set (CUDA_NVCC_INCLUDE_DIRS_USER "")
811set (CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE})
812
813macro(cuda_find_library_local_first_with_path_ext _var _names _doc _path_ext )
814  if(CMAKE_SIZEOF_VOID_P EQUAL 8)
815    # CUDA 3.2+ on Windows moved the library directories, so we need the new
816    # and old paths.
817    set(_cuda_64bit_lib_dir "${_path_ext}lib/x64" "${_path_ext}lib64" "${_path_ext}libx64" )
818  endif()
819  # CUDA 3.2+ on Windows moved the library directories, so we need to new
820  # (lib/Win32) and the old path (lib).
821  find_library(${_var}
822    NAMES ${_names}
823    PATHS "${CUDA_TOOLKIT_TARGET_DIR}"
824    ENV CUDA_PATH
825    ENV CUDA_LIB_PATH
826    PATH_SUFFIXES ${_cuda_64bit_lib_dir} "${_path_ext}lib/Win32" "${_path_ext}lib" "${_path_ext}libWin32"
827    DOC ${_doc}
828    NO_DEFAULT_PATH
829    )
830  if (NOT CMAKE_CROSSCOMPILING)
831    # Search default search paths, after we search our own set of paths.
832    find_library(${_var}
833      NAMES ${_names}
834      PATHS "/usr/lib/nvidia-current"
835      DOC ${_doc}
836      )
837  endif()
838endmacro()
839
840macro(cuda_find_library_local_first _var _names _doc)
841  cuda_find_library_local_first_with_path_ext( "${_var}" "${_names}" "${_doc}" "" )
842endmacro()
843
844macro(find_library_local_first _var _names _doc )
845  cuda_find_library_local_first( "${_var}" "${_names}" "${_doc}" "" )
846endmacro()
847
848
849# CUDA_LIBRARIES
850cuda_find_library_local_first(CUDA_CUDART_LIBRARY cudart "\"cudart\" library")
851
852cuda_find_library_local_first(CUDA_cudart_static_LIBRARY cudart_static "static CUDA runtime library")
853mark_as_advanced(CUDA_cudart_static_LIBRARY)
854
855
856if(CUDA_cudart_static_LIBRARY)
857  # If static cudart available, use it by default, but provide a user-visible option to disable it.
858  option(CUDA_USE_STATIC_CUDA_RUNTIME "Use the static version of the CUDA runtime library if available" ON)
859else()
860  # If not available, silently disable the option.
861  set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE INTERNAL "")
862endif()
863
864if(CUDA_USE_STATIC_CUDA_RUNTIME)
865  set(CUDA_CUDART_LIBRARY_VAR CUDA_cudart_static_LIBRARY)
866else()
867  set(CUDA_CUDART_LIBRARY_VAR CUDA_CUDART_LIBRARY)
868endif()
869
870cuda_find_library_local_first(CUDA_cudadevrt_LIBRARY cudadevrt "\"cudadevrt\" library")
871mark_as_advanced(CUDA_cudadevrt_LIBRARY)
872
873if(CUDA_USE_STATIC_CUDA_RUNTIME)
874  if(UNIX)
875    # Check for the dependent libraries.  Here we look for pthreads.
876    if (DEFINED CMAKE_THREAD_PREFER_PTHREAD)
877      set(_cuda_cmake_thread_prefer_pthread ${CMAKE_THREAD_PREFER_PTHREAD})
878    endif()
879    set(CMAKE_THREAD_PREFER_PTHREAD 1)
880
881    # Many of the FindXYZ CMake comes with makes use of try_compile with int main(){return 0;}
882    # as the source file.  Unfortunately this causes a warning with -Wstrict-prototypes and
883    # -Werror causes the try_compile to fail.  We will just temporarily disable other flags
884    # when doing the find_package command here.
885    set(_cuda_cmake_c_flags ${CMAKE_C_FLAGS})
886    set(CMAKE_C_FLAGS "-fPIC")
887    find_package(Threads REQUIRED)
888    set(CMAKE_C_FLAGS ${_cuda_cmake_c_flags})
889
890    if (DEFINED _cuda_cmake_thread_prefer_pthread)
891      set(CMAKE_THREAD_PREFER_PTHREAD ${_cuda_cmake_thread_prefer_pthread})
892      unset(_cuda_cmake_thread_prefer_pthread)
893    else()
894      unset(CMAKE_THREAD_PREFER_PTHREAD)
895    endif()
896
897    if(NOT APPLE)
898      #On Linux, you must link against librt when using the static cuda runtime.
899      find_library(CUDA_rt_LIBRARY rt)
900      if (NOT CUDA_rt_LIBRARY)
901        message(WARNING "Expecting to find librt for libcudart_static, but didn't find it.")
902      endif()
903    endif()
904  endif()
905endif()
906
907cuda_find_library_local_first_with_path_ext(CUDA_cupti_LIBRARY cupti "\"cupti\" library" "extras/CUPTI/")
908mark_as_advanced(CUDA_cupti_LIBRARY)
909
910# Set the CUDA_LIBRARIES variable.  This is the set of stuff to link against if you are
911# using the CUDA runtime.  For the dynamic version of the runtime, most of the
912# dependencies are brough in, but for the static version there are additional libraries
913# and linker commands needed.
914# Initialize to empty
915set(CUDA_LIBRARIES)
916
917# If we are using emulation mode and we found the cudartemu library then use
918# that one instead of cudart.
919if(CUDA_BUILD_EMULATION AND CUDA_CUDARTEMU_LIBRARY)
920  list(APPEND CUDA_LIBRARIES ${CUDA_CUDARTEMU_LIBRARY})
921elseif(CUDA_USE_STATIC_CUDA_RUNTIME AND CUDA_cudart_static_LIBRARY)
922  list(APPEND CUDA_LIBRARIES ${CUDA_cudart_static_LIBRARY} ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS})
923  if (CUDA_rt_LIBRARY)
924    list(APPEND CUDA_LIBRARIES ${CUDA_rt_LIBRARY})
925  endif()
926  if(APPLE)
927    # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that
928    # the static cuda runtime can find it at runtime.
929    list(APPEND CUDA_LIBRARIES -Wl,-rpath,/usr/local/cuda/lib)
930  endif()
931else()
932  list(APPEND CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY})
933endif()
934
935# 1.1 toolkit on linux doesn't appear to have a separate library on
936# some platforms.
937cuda_find_library_local_first(CUDA_CUDA_LIBRARY cuda "\"cuda\" library (older versions only).")
938
939mark_as_advanced(
940  CUDA_CUDA_LIBRARY
941  CUDA_CUDART_LIBRARY
942  )
943
944#######################
945# Look for some of the toolkit helper libraries
946macro(FIND_CUDA_HELPER_LIBS _name)
947  cuda_find_library_local_first(CUDA_${_name}_LIBRARY ${_name} "\"${_name}\" library")
948  mark_as_advanced(CUDA_${_name}_LIBRARY)
949endmacro()
950
951if(CUDA_BUILD_EMULATION)
952  message(FATAL_ERROR "CUDA_BUILD_EMULATION is not supported in version 3.1 and onwards.  You must disable it to proceed.  You have version ${CUDA_VERSION}.")
953endif()
954
955find_cuda_helper_libs(cufft)
956find_cuda_helper_libs(cublas)
957find_cuda_helper_libs(cublasLt)
958# cusparse showed up in version 3.2
959find_cuda_helper_libs(cusparse)
960find_cuda_helper_libs(curand)
961if (WIN32)
962  find_cuda_helper_libs(nvcuvenc)
963  find_cuda_helper_libs(nvcuvid)
964endif()
965
966# In CUDA 9.0 NPP was nppi was removed
967find_cuda_helper_libs(nppc)
968find_cuda_helper_libs(nppial)
969find_cuda_helper_libs(nppicc)
970find_cuda_helper_libs(nppicom)
971find_cuda_helper_libs(nppidei)
972find_cuda_helper_libs(nppif)
973find_cuda_helper_libs(nppig)
974find_cuda_helper_libs(nppim)
975find_cuda_helper_libs(nppist)
976find_cuda_helper_libs(nppisu)
977find_cuda_helper_libs(nppitc)
978find_cuda_helper_libs(npps)
979set(CUDA_npp_LIBRARY "${CUDA_nppc_LIBRARY};${CUDA_nppial_LIBRARY};${CUDA_nppicc_LIBRARY};${CUDA_nppicom_LIBRARY};${CUDA_nppidei_LIBRARY};${CUDA_nppif_LIBRARY};${CUDA_nppig_LIBRARY};${CUDA_nppim_LIBRARY};${CUDA_nppist_LIBRARY};${CUDA_nppisu_LIBRARY};${CUDA_nppitc_LIBRARY};${CUDA_npps_LIBRARY}")
980# cusolver showed up in version 7.0
981find_cuda_helper_libs(cusolver)
982
983if (CUDA_BUILD_EMULATION)
984  set(CUDA_CUFFT_LIBRARIES ${CUDA_cufftemu_LIBRARY})
985  set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublasemu_LIBRARY})
986else()
987  set(CUDA_CUFFT_LIBRARIES ${CUDA_cufft_LIBRARY})
988  set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublas_LIBRARY} ${CUDA_cublas_device_LIBRARY} ${CUDA_cublasLt_LIBRARY})
989endif()
990
991########################
992# Look for the SDK stuff.  As of CUDA 3.0 NVSDKCUDA_ROOT has been replaced with
993# NVSDKCOMPUTE_ROOT with the old CUDA C contents moved into the C subdirectory
994find_path(CUDA_SDK_ROOT_DIR common/inc/cutil.h
995 HINTS
996  "$ENV{NVSDKCOMPUTE_ROOT}/C"
997  ENV NVSDKCUDA_ROOT
998  "[HKEY_LOCAL_MACHINE\\SOFTWARE\\NVIDIA Corporation\\Installed Products\\NVIDIA SDK 10\\Compute;InstallDir]"
999 PATHS
1000  "/Developer/GPU\ Computing/C"
1001  )
1002
1003# Keep the CUDA_SDK_ROOT_DIR first in order to be able to override the
1004# environment variables.
1005set(CUDA_SDK_SEARCH_PATH
1006  "${CUDA_SDK_ROOT_DIR}"
1007  "${CUDA_TOOLKIT_ROOT_DIR}/local/NVSDK0.2"
1008  "${CUDA_TOOLKIT_ROOT_DIR}/NVSDK0.2"
1009  "${CUDA_TOOLKIT_ROOT_DIR}/NV_CUDA_SDK"
1010  "$ENV{HOME}/NVIDIA_CUDA_SDK"
1011  "$ENV{HOME}/NVIDIA_CUDA_SDK_MACOSX"
1012  "/Developer/CUDA"
1013  )
1014
1015# Example of how to find an include file from the CUDA_SDK_ROOT_DIR
1016
1017# find_path(CUDA_CUT_INCLUDE_DIR
1018#   cutil.h
1019#   PATHS ${CUDA_SDK_SEARCH_PATH}
1020#   PATH_SUFFIXES "common/inc"
1021#   DOC "Location of cutil.h"
1022#   NO_DEFAULT_PATH
1023#   )
1024# # Now search system paths
1025# find_path(CUDA_CUT_INCLUDE_DIR cutil.h DOC "Location of cutil.h")
1026
1027# mark_as_advanced(CUDA_CUT_INCLUDE_DIR)
1028
1029
1030# Example of how to find a library in the CUDA_SDK_ROOT_DIR
1031
1032# # cutil library is called cutil64 for 64 bit builds on windows.  We don't want
1033# # to get these confused, so we are setting the name based on the word size of
1034# # the build.
1035
1036# if(CMAKE_SIZEOF_VOID_P EQUAL 8)
1037#   set(cuda_cutil_name cutil64)
1038# else()
1039#   set(cuda_cutil_name cutil32)
1040# endif()
1041
1042# find_library(CUDA_CUT_LIBRARY
1043#   NAMES cutil ${cuda_cutil_name}
1044#   PATHS ${CUDA_SDK_SEARCH_PATH}
1045#   # The new version of the sdk shows up in common/lib, but the old one is in lib
1046#   PATH_SUFFIXES "common/lib" "lib"
1047#   DOC "Location of cutil library"
1048#   NO_DEFAULT_PATH
1049#   )
1050# # Now search system paths
1051# find_library(CUDA_CUT_LIBRARY NAMES cutil ${cuda_cutil_name} DOC "Location of cutil library")
1052# mark_as_advanced(CUDA_CUT_LIBRARY)
1053# set(CUDA_CUT_LIBRARIES ${CUDA_CUT_LIBRARY})
1054
1055
1056
1057#############################
1058# Check for required components
1059set(CUDA_FOUND TRUE)
1060
1061set(CUDA_TOOLKIT_ROOT_DIR_INTERNAL "${CUDA_TOOLKIT_ROOT_DIR}" CACHE INTERNAL
1062  "This is the value of the last time CUDA_TOOLKIT_ROOT_DIR was set successfully." FORCE)
1063set(CUDA_TOOLKIT_TARGET_DIR_INTERNAL "${CUDA_TOOLKIT_TARGET_DIR}" CACHE INTERNAL
1064  "This is the value of the last time CUDA_TOOLKIT_TARGET_DIR was set successfully." FORCE)
1065set(CUDA_SDK_ROOT_DIR_INTERNAL "${CUDA_SDK_ROOT_DIR}" CACHE INTERNAL
1066  "This is the value of the last time CUDA_SDK_ROOT_DIR was set successfully." FORCE)
1067
1068include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake)
1069
1070find_package_handle_standard_args(CUDA
1071  REQUIRED_VARS
1072    CUDA_TOOLKIT_ROOT_DIR
1073    CUDA_NVCC_EXECUTABLE
1074    CUDA_INCLUDE_DIRS
1075    ${CUDA_CUDART_LIBRARY_VAR}
1076  VERSION_VAR
1077    CUDA_VERSION
1078  )
1079
1080
1081
1082###############################################################################
1083###############################################################################
1084# Macros
1085###############################################################################
1086###############################################################################
1087
1088###############################################################################
1089# Add include directories to pass to the nvcc command.
1090macro(CUDA_INCLUDE_DIRECTORIES)
1091  foreach(dir ${ARGN})
1092    list(APPEND CUDA_NVCC_INCLUDE_DIRS_USER ${dir})
1093  endforeach()
1094endmacro()
1095
1096
1097##############################################################################
1098cuda_find_helper_file(parse_cubin cmake)
1099cuda_find_helper_file(make2cmake cmake)
1100cuda_find_helper_file(run_nvcc cmake)
1101include("${CMAKE_CURRENT_LIST_DIR}/FindCUDA/select_compute_arch.cmake")
1102
1103##############################################################################
1104# Separate the OPTIONS out from the sources
1105#
1106macro(CUDA_GET_SOURCES_AND_OPTIONS _sources _cmake_options _options)
1107  set( ${_sources} )
1108  set( ${_cmake_options} )
1109  set( ${_options} )
1110  set( _found_options FALSE )
1111  foreach(arg ${ARGN})
1112    if("x${arg}" STREQUAL "xOPTIONS")
1113      set( _found_options TRUE )
1114    elseif(
1115        "x${arg}" STREQUAL "xWIN32" OR
1116        "x${arg}" STREQUAL "xMACOSX_BUNDLE" OR
1117        "x${arg}" STREQUAL "xEXCLUDE_FROM_ALL" OR
1118        "x${arg}" STREQUAL "xSTATIC" OR
1119        "x${arg}" STREQUAL "xSHARED" OR
1120        "x${arg}" STREQUAL "xMODULE"
1121        )
1122      list(APPEND ${_cmake_options} ${arg})
1123    else()
1124      if ( _found_options )
1125        list(APPEND ${_options} ${arg})
1126      else()
1127        # Assume this is a file
1128        list(APPEND ${_sources} ${arg})
1129      endif()
1130    endif()
1131  endforeach()
1132endmacro()
1133
1134##############################################################################
1135# Parse the OPTIONS from ARGN and set the variables prefixed by _option_prefix
1136#
1137macro(CUDA_PARSE_NVCC_OPTIONS _option_prefix)
1138  set( _found_config )
1139  foreach(arg ${ARGN})
1140    # Determine if we are dealing with a perconfiguration flag
1141    foreach(config ${CUDA_configuration_types})
1142      string(TOUPPER ${config} config_upper)
1143      if (arg STREQUAL "${config_upper}")
1144        set( _found_config _${arg})
1145        # Set arg to nothing to keep it from being processed further
1146        set( arg )
1147      endif()
1148    endforeach()
1149
1150    if ( arg )
1151      list(APPEND ${_option_prefix}${_found_config} "${arg}")
1152    endif()
1153  endforeach()
1154endmacro()
1155
1156##############################################################################
1157# Helper to add the include directory for CUDA only once
1158function(CUDA_ADD_CUDA_INCLUDE_ONCE)
1159  get_directory_property(_include_directories INCLUDE_DIRECTORIES)
1160  set(_add TRUE)
1161  if(_include_directories)
1162    foreach(dir ${_include_directories})
1163      if("${dir}" STREQUAL "${CUDA_INCLUDE_DIRS}")
1164        set(_add FALSE)
1165      endif()
1166    endforeach()
1167  endif()
1168  if(_add)
1169    include_directories(${CUDA_INCLUDE_DIRS})
1170  endif()
1171endfunction()
1172
1173function(CUDA_BUILD_SHARED_LIBRARY shared_flag)
1174  set(cmake_args ${ARGN})
1175  # If SHARED, MODULE, or STATIC aren't already in the list of arguments, then
1176  # add SHARED or STATIC based on the value of BUILD_SHARED_LIBS.
1177  list(FIND cmake_args SHARED _cuda_found_SHARED)
1178  list(FIND cmake_args MODULE _cuda_found_MODULE)
1179  list(FIND cmake_args STATIC _cuda_found_STATIC)
1180  if( _cuda_found_SHARED GREATER -1 OR
1181      _cuda_found_MODULE GREATER -1 OR
1182      _cuda_found_STATIC GREATER -1)
1183    set(_cuda_build_shared_libs)
1184  else()
1185    if (BUILD_SHARED_LIBS)
1186      set(_cuda_build_shared_libs SHARED)
1187    else()
1188      set(_cuda_build_shared_libs STATIC)
1189    endif()
1190  endif()
1191  set(${shared_flag} ${_cuda_build_shared_libs} PARENT_SCOPE)
1192endfunction()
1193
1194##############################################################################
1195# Helper to avoid clashes of files with the same basename but different paths.
1196# This doesn't attempt to do exactly what CMake internals do, which is to only
1197# add this path when there is a conflict, since by the time a second collision
1198# in names is detected it's already too late to fix the first one.  For
1199# consistency sake the relative path will be added to all files.
1200function(CUDA_COMPUTE_BUILD_PATH path build_path)
1201  #message("CUDA_COMPUTE_BUILD_PATH([${path}] ${build_path})")
1202  # Only deal with CMake style paths from here on out
1203  file(TO_CMAKE_PATH "${path}" bpath)
1204  if (IS_ABSOLUTE "${bpath}")
1205    # Absolute paths are generally unnessary, especially if something like
1206    # file(GLOB_RECURSE) is used to pick up the files.
1207
1208    string(FIND "${bpath}" "${CMAKE_CURRENT_BINARY_DIR}" _binary_dir_pos)
1209    if (_binary_dir_pos EQUAL 0)
1210      file(RELATIVE_PATH bpath "${CMAKE_CURRENT_BINARY_DIR}" "${bpath}")
1211    else()
1212      file(RELATIVE_PATH bpath "${CMAKE_CURRENT_SOURCE_DIR}" "${bpath}")
1213    endif()
1214  endif()
1215
1216  # This recipe is from cmLocalGenerator::CreateSafeUniqueObjectFileName in the
1217  # CMake source.
1218
1219  # Remove leading /
1220  string(REGEX REPLACE "^[/]+" "" bpath "${bpath}")
1221  # Avoid absolute paths by removing ':'
1222  string(REPLACE ":" "_" bpath "${bpath}")
1223  # Avoid relative paths that go up the tree
1224  string(REPLACE "../" "__/" bpath "${bpath}")
1225  # Avoid spaces
1226  string(REPLACE " " "_" bpath "${bpath}")
1227
1228  # Strip off the filename.  I wait until here to do it, since removin the
1229  # basename can make a path that looked like path/../basename turn into
1230  # path/.. (notice the trailing slash).
1231  get_filename_component(bpath "${bpath}" PATH)
1232
1233  set(${build_path} "${bpath}" PARENT_SCOPE)
1234  #message("${build_path} = ${bpath}")
1235endfunction()
1236
1237##############################################################################
1238# This helper macro populates the following variables and setups up custom
1239# commands and targets to invoke the nvcc compiler to generate C or PTX source
1240# dependent upon the format parameter.  The compiler is invoked once with -M
1241# to generate a dependency file and a second time with -cuda or -ptx to generate
1242# a .cpp or .ptx file.
1243# INPUT:
1244#   cuda_target         - Target name
1245#   format              - PTX, CUBIN, FATBIN or OBJ
1246#   FILE1 .. FILEN      - The remaining arguments are the sources to be wrapped.
1247#   OPTIONS             - Extra options to NVCC
1248# OUTPUT:
1249#   generated_files     - List of generated files
1250##############################################################################
1251##############################################################################
1252
1253macro(CUDA_WRAP_SRCS cuda_target format generated_files)
1254
1255  # Put optional arguments in list.
1256  set(_argn_list "${ARGN}")
1257  # If one of the given optional arguments is "PHONY", make a note of it, then
1258  # remove it from the list.
1259  list(FIND _argn_list "PHONY" _phony_idx)
1260  if("${_phony_idx}" GREATER "-1")
1261    set(_target_is_phony true)
1262    list(REMOVE_AT _argn_list ${_phony_idx})
1263  else()
1264    set(_target_is_phony false)
1265  endif()
1266
1267  # If CMake doesn't support separable compilation, complain
1268  if(CUDA_SEPARABLE_COMPILATION AND CMAKE_VERSION VERSION_LESS "2.8.10.1")
1269    message(SEND_ERROR "CUDA_SEPARABLE_COMPILATION isn't supported for CMake versions less than 2.8.10.1")
1270  endif()
1271
1272  # Set up all the command line flags here, so that they can be overridden on a per target basis.
1273
1274  set(nvcc_flags "")
1275
1276  # Emulation if the card isn't present.
1277  if (CUDA_BUILD_EMULATION)
1278    # Emulation.
1279    set(nvcc_flags ${nvcc_flags} --device-emulation -D_DEVICEEMU -g)
1280  else()
1281    # Device mode.  No flags necessary.
1282  endif()
1283
1284  if(CUDA_HOST_COMPILATION_CPP)
1285    set(CUDA_C_OR_CXX CXX)
1286  else()
1287    message(WARNING "--host-compilation flag is deprecated in CUDA version >= 3.0.  Removing --host-compilation C flag" )
1288    set(CUDA_C_OR_CXX C)
1289  endif()
1290
1291  set(generated_extension ${CMAKE_${CUDA_C_OR_CXX}_OUTPUT_EXTENSION})
1292
1293  if(CUDA_64_BIT_DEVICE_CODE)
1294    set(nvcc_flags ${nvcc_flags} -m64)
1295  else()
1296    set(nvcc_flags ${nvcc_flags} -m32)
1297  endif()
1298
1299  if(CUDA_TARGET_CPU_ARCH)
1300    set(nvcc_flags ${nvcc_flags} "--target-cpu-architecture=${CUDA_TARGET_CPU_ARCH}")
1301  endif()
1302
1303  # This needs to be passed in at this stage, because VS needs to fill out the
1304  # various macros from within VS.  Note that CCBIN is only used if
1305  # -ccbin or --compiler-bindir isn't used and CUDA_HOST_COMPILER matches
1306  # _CUDA_MSVC_HOST_COMPILER
1307  if(CMAKE_GENERATOR MATCHES "Visual Studio")
1308    set(ccbin_flags -D "\"CCBIN:PATH=${_CUDA_MSVC_HOST_COMPILER}\"" )
1309  else()
1310    set(ccbin_flags)
1311  endif()
1312
1313  # Figure out which configure we will use and pass that in as an argument to
1314  # the script.  We need to defer the decision until compilation time, because
1315  # for VS projects we won't know if we are making a debug or release build
1316  # until build time.
1317  if(CMAKE_GENERATOR MATCHES "Visual Studio")
1318    set( CUDA_build_configuration "$(ConfigurationName)" )
1319  else()
1320    set( CUDA_build_configuration "${CMAKE_BUILD_TYPE}")
1321  endif()
1322
1323  # Initialize our list of includes with the user ones followed by the CUDA system ones.
1324  set(CUDA_NVCC_INCLUDE_DIRS ${CUDA_NVCC_INCLUDE_DIRS_USER} "${CUDA_INCLUDE_DIRS}")
1325  if(_target_is_phony)
1326    # If the passed in target name isn't a real target (i.e., this is from a call to one of the
1327    # cuda_compile_* functions), need to query directory properties to get include directories
1328    # and compile definitions.
1329    get_directory_property(_dir_include_dirs INCLUDE_DIRECTORIES)
1330    get_directory_property(_dir_compile_defs COMPILE_DEFINITIONS)
1331
1332    list(APPEND CUDA_NVCC_INCLUDE_DIRS "${_dir_include_dirs}")
1333    set(CUDA_NVCC_COMPILE_DEFINITIONS "${_dir_compile_defs}")
1334  else()
1335    # Append the include directories for this target via generator expression, which is
1336    # expanded by the FILE(GENERATE) call below.  This generator expression captures all
1337    # include dirs set by the user, whether via directory properties or target properties
1338    list(APPEND CUDA_NVCC_INCLUDE_DIRS "$<TARGET_PROPERTY:${cuda_target},INCLUDE_DIRECTORIES>")
1339
1340    # Do the same thing with compile definitions
1341    set(CUDA_NVCC_COMPILE_DEFINITIONS "$<TARGET_PROPERTY:${cuda_target},COMPILE_DEFINITIONS>")
1342  endif()
1343
1344
1345  # Reset these variables
1346  set(CUDA_WRAP_OPTION_NVCC_FLAGS)
1347  foreach(config ${CUDA_configuration_types})
1348    string(TOUPPER ${config} config_upper)
1349    set(CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper})
1350  endforeach()
1351
1352  CUDA_GET_SOURCES_AND_OPTIONS(_cuda_wrap_sources _cuda_wrap_cmake_options _cuda_wrap_options ${_argn_list})
1353  CUDA_PARSE_NVCC_OPTIONS(CUDA_WRAP_OPTION_NVCC_FLAGS ${_cuda_wrap_options})
1354
1355  # Figure out if we are building a shared library.  BUILD_SHARED_LIBS is
1356  # respected in CUDA_ADD_LIBRARY.
1357  set(_cuda_build_shared_libs FALSE)
1358  # SHARED, MODULE
1359  list(FIND _cuda_wrap_cmake_options SHARED _cuda_found_SHARED)
1360  list(FIND _cuda_wrap_cmake_options MODULE _cuda_found_MODULE)
1361  if(_cuda_found_SHARED GREATER -1 OR _cuda_found_MODULE GREATER -1)
1362    set(_cuda_build_shared_libs TRUE)
1363  endif()
1364  # STATIC
1365  list(FIND _cuda_wrap_cmake_options STATIC _cuda_found_STATIC)
1366  if(_cuda_found_STATIC GREATER -1)
1367    set(_cuda_build_shared_libs FALSE)
1368  endif()
1369
1370  # CUDA_HOST_FLAGS
1371  if(_cuda_build_shared_libs)
1372    # If we are setting up code for a shared library, then we need to add extra flags for
1373    # compiling objects for shared libraries.
1374    set(CUDA_HOST_SHARED_FLAGS ${CMAKE_SHARED_LIBRARY_${CUDA_C_OR_CXX}_FLAGS})
1375  else()
1376    set(CUDA_HOST_SHARED_FLAGS)
1377  endif()
1378
1379  macro(_filter_blocklisted_host_flags CUDA_FLAGS)
1380    string(REGEX REPLACE "[ \t]+" ";" ${CUDA_FLAGS} "${${CUDA_FLAGS}}")
1381    foreach(_blacklisted ${CUDA_PROPAGATE_HOST_FLAGS_BLACKLIST})
1382      list(REMOVE_ITEM ${CUDA_FLAGS} "${_blacklisted}")
1383    endforeach()
1384    string(REPLACE ";" " " ${CUDA_FLAGS} "${${CUDA_FLAGS}}")
1385  endmacro()
1386
1387  # Only add the CMAKE_{C,CXX}_FLAGS if we are propagating host flags.  We
1388  # always need to set the SHARED_FLAGS, though.
1389  if(CUDA_PROPAGATE_HOST_FLAGS)
1390    set(_cuda_C_FLAGS "${CMAKE_${CUDA_C_OR_CXX}_FLAGS}")
1391    _filter_blocklisted_host_flags(_cuda_C_FLAGS)
1392    set(_cuda_host_flags "set(CMAKE_HOST_FLAGS ${_cuda_C_FLAGS} ${CUDA_HOST_SHARED_FLAGS})")
1393  else()
1394    set(_cuda_host_flags "set(CMAKE_HOST_FLAGS ${CUDA_HOST_SHARED_FLAGS})")
1395  endif()
1396
1397  set(_cuda_nvcc_flags_config "# Build specific configuration flags")
1398  # Loop over all the configuration types to generate appropriate flags for run_nvcc.cmake
1399  foreach(config ${CUDA_configuration_types})
1400    string(TOUPPER ${config} config_upper)
1401    # CMAKE_FLAGS are strings and not lists.  By not putting quotes around CMAKE_FLAGS
1402    # we convert the strings to lists (like we want).
1403
1404    if(CUDA_PROPAGATE_HOST_FLAGS)
1405      # nvcc chokes on -g3 in versions previous to 3.0, so replace it with -g
1406      set(_cuda_fix_g3 FALSE)
1407
1408      set(_cuda_C_FLAGS "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}")
1409      _filter_blocklisted_host_flags(_cuda_C_FLAGS)
1410      if(_cuda_fix_g3)
1411        string(REPLACE "-g3" "-g" _cuda_C_FLAGS "${_cuda_C_FLAGS}")
1412      endif()
1413
1414      string(APPEND _cuda_host_flags "\nset(CMAKE_HOST_FLAGS_${config_upper} ${_cuda_C_FLAGS})")
1415    endif()
1416
1417    # Note that if we ever want CUDA_NVCC_FLAGS_<CONFIG> to be string (instead of a list
1418    # like it is currently), we can remove the quotes around the
1419    # ${CUDA_NVCC_FLAGS_${config_upper}} variable like the CMAKE_HOST_FLAGS_<CONFIG> variable.
1420    string(APPEND _cuda_nvcc_flags_config "\nset(CUDA_NVCC_FLAGS_${config_upper} ${CUDA_NVCC_FLAGS_${config_upper}} ;; ${CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper}})")
1421  endforeach()
1422
1423  # Process the C++14 flag.  If the host sets the flag, we need to add it to nvcc and
1424  # remove it from the host. This is because -Xcompile -std=c++ will choke nvcc (it uses
1425  # the C preprocessor).  In order to get this to work correctly, we need to use nvcc's
1426  # specific c++14 flag.
1427  if( "${_cuda_host_flags}" MATCHES "-std=c\\+\\+11")
1428    # Add the c++14 flag to nvcc if it isn't already present.  Note that we only look at
1429    # the main flag instead of the configuration specific flags.
1430    if( NOT "${CUDA_NVCC_FLAGS}" MATCHES "-std=c\\+\\+14" )
1431      list(APPEND nvcc_flags --std c++14)
1432    endif()
1433    string(REGEX REPLACE "[-]+std=c\\+\\+14" "" _cuda_host_flags "${_cuda_host_flags}")
1434  endif()
1435
1436  if(_cuda_build_shared_libs)
1437    list(APPEND nvcc_flags "-D${cuda_target}_EXPORTS")
1438  endif()
1439
1440  # Reset the output variable
1441  set(_cuda_wrap_generated_files "")
1442
1443  # Iterate over the macro arguments and create custom
1444  # commands for all the .cu files.
1445  foreach(file ${_argn_list})
1446    # Ignore any file marked as a HEADER_FILE_ONLY
1447    get_source_file_property(_is_header ${file} HEADER_FILE_ONLY)
1448    # Allow per source file overrides of the format.  Also allows compiling non-.cu files.
1449    get_source_file_property(_cuda_source_format ${file} CUDA_SOURCE_PROPERTY_FORMAT)
1450    if((${file} MATCHES "\\.cu$" OR _cuda_source_format) AND NOT _is_header)
1451
1452      if(NOT _cuda_source_format)
1453        set(_cuda_source_format ${format})
1454      endif()
1455      # If file isn't a .cu file, we need to tell nvcc to treat it as such.
1456      if(NOT file MATCHES "\\.cu$")
1457        set(cuda_language_flag -x=cu)
1458      else()
1459        set(cuda_language_flag)
1460      endif()
1461
1462      if( ${_cuda_source_format} MATCHES "OBJ")
1463        set( cuda_compile_to_external_module OFF )
1464      else()
1465        set( cuda_compile_to_external_module ON )
1466        if( ${_cuda_source_format} MATCHES "PTX" )
1467          set( cuda_compile_to_external_module_type "ptx" )
1468        elseif( ${_cuda_source_format} MATCHES "CUBIN")
1469          set( cuda_compile_to_external_module_type "cubin" )
1470        elseif( ${_cuda_source_format} MATCHES "FATBIN")
1471          set( cuda_compile_to_external_module_type "fatbin" )
1472        else()
1473          message( FATAL_ERROR "Invalid format flag passed to CUDA_WRAP_SRCS or set with CUDA_SOURCE_PROPERTY_FORMAT file property for file '${file}': '${_cuda_source_format}'.  Use OBJ, PTX, CUBIN or FATBIN.")
1474        endif()
1475      endif()
1476
1477      if(cuda_compile_to_external_module)
1478        # Don't use any of the host compilation flags for PTX targets.
1479        set(CUDA_HOST_FLAGS)
1480        set(CUDA_NVCC_FLAGS_CONFIG)
1481      else()
1482        set(CUDA_HOST_FLAGS ${_cuda_host_flags})
1483        set(CUDA_NVCC_FLAGS_CONFIG ${_cuda_nvcc_flags_config})
1484      endif()
1485
1486      # Determine output directory
1487      cuda_compute_build_path("${file}" cuda_build_path)
1488      set(cuda_compile_intermediate_directory "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${cuda_target}.dir/${cuda_build_path}")
1489      if(CUDA_GENERATED_OUTPUT_DIR)
1490        set(cuda_compile_output_dir "${CUDA_GENERATED_OUTPUT_DIR}")
1491      else()
1492        if ( cuda_compile_to_external_module )
1493          set(cuda_compile_output_dir "${CMAKE_CURRENT_BINARY_DIR}")
1494        else()
1495          set(cuda_compile_output_dir "${cuda_compile_intermediate_directory}")
1496        endif()
1497      endif()
1498
1499      # Add a custom target to generate a c or ptx file. ######################
1500
1501      get_filename_component( basename ${file} NAME )
1502      if( cuda_compile_to_external_module )
1503        set(generated_file_path "${cuda_compile_output_dir}")
1504        set(generated_file_basename "${cuda_target}_generated_${basename}.${cuda_compile_to_external_module_type}")
1505        set(format_flag "-${cuda_compile_to_external_module_type}")
1506        file(MAKE_DIRECTORY "${cuda_compile_output_dir}")
1507      else()
1508        set(generated_file_path "${cuda_compile_output_dir}/${CMAKE_CFG_INTDIR}")
1509        set(generated_file_basename "${cuda_target}_generated_${basename}${generated_extension}")
1510        if(CUDA_SEPARABLE_COMPILATION)
1511          set(format_flag "-dc")
1512        else()
1513          set(format_flag "-c")
1514        endif()
1515      endif()
1516
1517      # Set all of our file names.  Make sure that whatever filenames that have
1518      # generated_file_path in them get passed in through as a command line
1519      # argument, so that the ${CMAKE_CFG_INTDIR} gets expanded at run time
1520      # instead of configure time.
1521      set(generated_file "${generated_file_path}/${generated_file_basename}")
1522      set(cmake_dependency_file "${cuda_compile_intermediate_directory}/${generated_file_basename}.depend")
1523      set(NVCC_generated_dependency_file "${cuda_compile_intermediate_directory}/${generated_file_basename}.NVCC-depend")
1524      set(generated_cubin_file "${generated_file_path}/${generated_file_basename}.cubin.txt")
1525      set(custom_target_script_pregen "${cuda_compile_intermediate_directory}/${generated_file_basename}.cmake.pre-gen")
1526      set(custom_target_script "${cuda_compile_intermediate_directory}/${generated_file_basename}$<$<BOOL:$<CONFIG>>:.$<CONFIG>>.cmake")
1527
1528      # Setup properties for obj files:
1529      if( NOT cuda_compile_to_external_module )
1530        set_source_files_properties("${generated_file}"
1531          PROPERTIES
1532          EXTERNAL_OBJECT true # This is an object file not to be compiled, but only be linked.
1533          )
1534      endif()
1535
1536      # Don't add CMAKE_CURRENT_SOURCE_DIR if the path is already an absolute path.
1537      get_filename_component(file_path "${file}" PATH)
1538      if(IS_ABSOLUTE "${file_path}")
1539        set(source_file "${file}")
1540      else()
1541        set(source_file "${CMAKE_CURRENT_SOURCE_DIR}/${file}")
1542      endif()
1543
1544      if( NOT cuda_compile_to_external_module AND CUDA_SEPARABLE_COMPILATION)
1545        list(APPEND ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS "${generated_file}")
1546      endif()
1547
1548      # Bring in the dependencies.  Creates a variable CUDA_NVCC_DEPEND #######
1549      cuda_include_nvcc_dependencies(${cmake_dependency_file})
1550
1551      # Convenience string for output #########################################
1552      if(CUDA_BUILD_EMULATION)
1553        set(cuda_build_type "Emulation")
1554      else()
1555        set(cuda_build_type "Device")
1556      endif()
1557
1558      # Build the NVCC made dependency file ###################################
1559      set(build_cubin OFF)
1560      if ( NOT CUDA_BUILD_EMULATION AND CUDA_BUILD_CUBIN )
1561         if ( NOT cuda_compile_to_external_module )
1562           set ( build_cubin ON )
1563         endif()
1564      endif()
1565
1566      # Configure the build script
1567      configure_file("${CUDA_run_nvcc}" "${custom_target_script_pregen}" @ONLY)
1568      file(GENERATE
1569        OUTPUT "${custom_target_script}"
1570        INPUT "${custom_target_script_pregen}"
1571        )
1572
1573      # So if a user specifies the same cuda file as input more than once, you
1574      # can have bad things happen with dependencies.  Here we check an option
1575      # to see if this is the behavior they want.
1576      if(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE)
1577        set(main_dep MAIN_DEPENDENCY ${source_file})
1578      else()
1579        set(main_dep DEPENDS ${source_file})
1580      endif()
1581
1582      if(CUDA_VERBOSE_BUILD)
1583        set(verbose_output ON)
1584      elseif(CMAKE_GENERATOR MATCHES "Makefiles")
1585        set(verbose_output "$(VERBOSE)")
1586      # This condition lets us also turn on verbose output when someone
1587      # specifies CMAKE_VERBOSE_MAKEFILE, even if the generator isn't
1588      # the Makefiles generator (this is important for us, Ninja users.)
1589      elseif(CMAKE_VERBOSE_MAKEFILE)
1590        set(verbose_output ON)
1591      else()
1592        set(verbose_output OFF)
1593      endif()
1594
1595      # Create up the comment string
1596      file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}")
1597      if(cuda_compile_to_external_module)
1598        set(cuda_build_comment_string "Building NVCC ${cuda_compile_to_external_module_type} file ${generated_file_relative_path}")
1599      else()
1600        set(cuda_build_comment_string "Building NVCC (${cuda_build_type}) object ${generated_file_relative_path}")
1601      endif()
1602
1603      set(_verbatim VERBATIM)
1604      if(ccbin_flags MATCHES "\\$\\(VCInstallDir\\)")
1605        set(_verbatim "")
1606      endif()
1607
1608      # Build the generated file and dependency file ##########################
1609      add_custom_command(
1610        OUTPUT ${generated_file}
1611        # These output files depend on the source_file and the contents of cmake_dependency_file
1612        ${main_dep}
1613        DEPENDS ${CUDA_NVCC_DEPEND}
1614        DEPENDS ${custom_target_script}
1615        # Make sure the output directory exists before trying to write to it.
1616        COMMAND ${CMAKE_COMMAND} -E make_directory "${generated_file_path}"
1617        COMMAND ${CMAKE_COMMAND} ARGS
1618          -D verbose:BOOL=${verbose_output}
1619          ${ccbin_flags}
1620          -D build_configuration:STRING=${CUDA_build_configuration}
1621          -D "generated_file:STRING=${generated_file}"
1622          -D "generated_cubin_file:STRING=${generated_cubin_file}"
1623          -P "${custom_target_script}"
1624        WORKING_DIRECTORY "${cuda_compile_intermediate_directory}"
1625        COMMENT "${cuda_build_comment_string}"
1626        ${_verbatim}
1627        )
1628
1629      # Make sure the build system knows the file is generated.
1630      set_source_files_properties(${generated_file} PROPERTIES GENERATED TRUE)
1631
1632      list(APPEND _cuda_wrap_generated_files ${generated_file})
1633
1634      # Add the other files that we want cmake to clean on a cleanup ##########
1635      list(APPEND CUDA_ADDITIONAL_CLEAN_FILES "${cmake_dependency_file}")
1636      list(REMOVE_DUPLICATES CUDA_ADDITIONAL_CLEAN_FILES)
1637      set(CUDA_ADDITIONAL_CLEAN_FILES ${CUDA_ADDITIONAL_CLEAN_FILES} CACHE INTERNAL "List of intermediate files that are part of the cuda dependency scanning.")
1638
1639    endif()
1640  endforeach()
1641
1642  # Set the return parameter
1643  set(${generated_files} ${_cuda_wrap_generated_files})
1644endmacro()
1645
1646function(_cuda_get_important_host_flags important_flags flag_string)
1647  if(CMAKE_GENERATOR MATCHES "Visual Studio")
1648    string(REGEX MATCHALL "/M[DT][d]?" flags "${flag_string}")
1649    list(APPEND ${important_flags} ${flags})
1650  else()
1651    string(REGEX MATCHALL "-fPIC" flags "${flag_string}")
1652    list(APPEND ${important_flags} ${flags})
1653  endif()
1654  set(${important_flags} ${${important_flags}} PARENT_SCOPE)
1655endfunction()
1656
1657###############################################################################
1658###############################################################################
1659# Separable Compilation Link
1660###############################################################################
1661###############################################################################
1662
1663# Compute the filename to be used by CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS
1664function(CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME output_file_var cuda_target object_files)
1665  if (object_files)
1666    set(generated_extension ${CMAKE_${CUDA_C_OR_CXX}_OUTPUT_EXTENSION})
1667    set(output_file "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${cuda_target}.dir/${CMAKE_CFG_INTDIR}/${cuda_target}_intermediate_link${generated_extension}")
1668  else()
1669    set(output_file)
1670  endif()
1671
1672  set(${output_file_var} "${output_file}" PARENT_SCOPE)
1673endfunction()
1674
1675# Setup the build rule for the separable compilation intermediate link file.
1676function(CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS output_file cuda_target options object_files)
1677  if (object_files)
1678
1679    set_source_files_properties("${output_file}"
1680      PROPERTIES
1681      EXTERNAL_OBJECT TRUE # This is an object file not to be compiled, but only
1682                           # be linked.
1683      GENERATED TRUE       # This file is generated during the build
1684      )
1685
1686    # For now we are ignoring all the configuration specific flags.
1687    set(nvcc_flags)
1688    CUDA_PARSE_NVCC_OPTIONS(nvcc_flags ${options})
1689    if(CUDA_64_BIT_DEVICE_CODE)
1690      list(APPEND nvcc_flags -m64)
1691    else()
1692      list(APPEND nvcc_flags -m32)
1693    endif()
1694    # If -ccbin, --compiler-bindir has been specified, don't do anything.  Otherwise add it here.
1695    list( FIND nvcc_flags "-ccbin" ccbin_found0 )
1696    list( FIND nvcc_flags "--compiler-bindir" ccbin_found1 )
1697    if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER )
1698      # Match VERBATIM check below.
1699      if(CUDA_HOST_COMPILER MATCHES "\\$\\(VCInstallDir\\)")
1700        list(APPEND nvcc_flags -ccbin "\"${CUDA_HOST_COMPILER}\"")
1701      else()
1702        list(APPEND nvcc_flags -ccbin "${CUDA_HOST_COMPILER}")
1703      endif()
1704    endif()
1705
1706    # Create a list of flags specified by CUDA_NVCC_FLAGS_${CONFIG} and CMAKE_${CUDA_C_OR_CXX}_FLAGS*
1707    set(config_specific_flags)
1708    set(flags)
1709    foreach(config ${CUDA_configuration_types})
1710      string(TOUPPER ${config} config_upper)
1711      # Add config specific flags
1712      foreach(f ${CUDA_NVCC_FLAGS_${config_upper}})
1713        list(APPEND config_specific_flags $<$<CONFIG:${config}>:${f}>)
1714      endforeach()
1715      set(important_host_flags)
1716      _cuda_get_important_host_flags(important_host_flags "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}")
1717      foreach(f ${important_host_flags})
1718        list(APPEND flags $<$<CONFIG:${config}>:-Xcompiler> $<$<CONFIG:${config}>:${f}>)
1719      endforeach()
1720    endforeach()
1721    # Add CMAKE_${CUDA_C_OR_CXX}_FLAGS
1722    set(important_host_flags)
1723    _cuda_get_important_host_flags(important_host_flags "${CMAKE_${CUDA_C_OR_CXX}_FLAGS}")
1724    foreach(f ${important_host_flags})
1725      list(APPEND flags -Xcompiler ${f})
1726    endforeach()
1727
1728    # Add our general CUDA_NVCC_FLAGS with the configuration specifig flags
1729    set(nvcc_flags ${CUDA_NVCC_FLAGS} ${config_specific_flags} ${nvcc_flags})
1730
1731    file(RELATIVE_PATH output_file_relative_path "${CMAKE_BINARY_DIR}" "${output_file}")
1732
1733    # Some generators don't handle the multiple levels of custom command
1734    # dependencies correctly (obj1 depends on file1, obj2 depends on obj1), so
1735    # we work around that issue by compiling the intermediate link object as a
1736    # pre-link custom command in that situation.
1737    set(do_obj_build_rule TRUE)
1738    if (MSVC_VERSION GREATER 1599 AND MSVC_VERSION LESS 1800)
1739      # VS 2010 and 2012 have this problem.
1740      set(do_obj_build_rule FALSE)
1741    endif()
1742
1743    set(_verbatim VERBATIM)
1744    if(nvcc_flags MATCHES "\\$\\(VCInstallDir\\)")
1745      set(_verbatim "")
1746    endif()
1747
1748    if (do_obj_build_rule)
1749      add_custom_command(
1750        OUTPUT ${output_file}
1751        DEPENDS ${object_files}
1752        COMMAND ${CUDA_NVCC_EXECUTABLE} ${nvcc_flags} -dlink ${object_files} -o ${output_file}
1753        ${flags}
1754        COMMENT "Building NVCC intermediate link file ${output_file_relative_path}"
1755        COMMAND_EXPAND_LISTS
1756        ${_verbatim}
1757        )
1758    else()
1759      get_filename_component(output_file_dir "${output_file}" DIRECTORY)
1760      add_custom_command(
1761        TARGET ${cuda_target}
1762        PRE_LINK
1763        COMMAND ${CMAKE_COMMAND} -E echo "Building NVCC intermediate link file ${output_file_relative_path}"
1764        COMMAND ${CMAKE_COMMAND} -E make_directory "${output_file_dir}"
1765        COMMAND ${CUDA_NVCC_EXECUTABLE} ${nvcc_flags} ${flags} -dlink ${object_files} -o "${output_file}"
1766        COMMAND_EXPAND_LISTS
1767        ${_verbatim}
1768        )
1769    endif()
1770 endif()
1771endfunction()
1772
1773###############################################################################
1774###############################################################################
1775# ADD LIBRARY
1776###############################################################################
1777###############################################################################
1778macro(CUDA_ADD_LIBRARY cuda_target)
1779
1780  CUDA_ADD_CUDA_INCLUDE_ONCE()
1781
1782  # Separate the sources from the options
1783  CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN})
1784  CUDA_BUILD_SHARED_LIBRARY(_cuda_shared_flag ${ARGN})
1785  # Create custom commands and targets for each file.
1786  CUDA_WRAP_SRCS( ${cuda_target} OBJ _generated_files ${_sources}
1787    ${_cmake_options} ${_cuda_shared_flag}
1788    OPTIONS ${_options} )
1789
1790  # Compute the file name of the intermedate link file used for separable
1791  # compilation.
1792  CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME(link_file ${cuda_target} "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}")
1793
1794  # Add the library.
1795  add_library(${cuda_target} ${_cmake_options}
1796    ${_generated_files}
1797    ${_sources}
1798    ${link_file}
1799    )
1800
1801  # Add a link phase for the separable compilation if it has been enabled.  If
1802  # it has been enabled then the ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS
1803  # variable will have been defined.
1804  CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS("${link_file}" ${cuda_target} "${_options}" "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}")
1805
1806  target_link_libraries(${cuda_target} ${CUDA_LINK_LIBRARIES_KEYWORD}
1807    ${CUDA_LIBRARIES}
1808    )
1809
1810  if(CUDA_SEPARABLE_COMPILATION)
1811    target_link_libraries(${cuda_target} ${CUDA_LINK_LIBRARIES_KEYWORD}
1812      ${CUDA_cudadevrt_LIBRARY}
1813      )
1814  endif()
1815
1816  # We need to set the linker language based on what the expected generated file
1817  # would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP.
1818  set_target_properties(${cuda_target}
1819    PROPERTIES
1820    LINKER_LANGUAGE ${CUDA_C_OR_CXX}
1821    )
1822
1823endmacro()
1824
1825
1826###############################################################################
1827###############################################################################
1828# ADD EXECUTABLE
1829###############################################################################
1830###############################################################################
1831macro(CUDA_ADD_EXECUTABLE cuda_target)
1832
1833  CUDA_ADD_CUDA_INCLUDE_ONCE()
1834
1835  # Separate the sources from the options
1836  CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN})
1837  # Create custom commands and targets for each file.
1838  CUDA_WRAP_SRCS( ${cuda_target} OBJ _generated_files ${_sources} OPTIONS ${_options} )
1839
1840  # Compute the file name of the intermedate link file used for separable
1841  # compilation.
1842  CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME(link_file ${cuda_target} "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}")
1843
1844  # Add the library.
1845  add_executable(${cuda_target} ${_cmake_options}
1846    ${_generated_files}
1847    ${_sources}
1848    ${link_file}
1849    )
1850
1851  # Add a link phase for the separable compilation if it has been enabled.  If
1852  # it has been enabled then the ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS
1853  # variable will have been defined.
1854  CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS("${link_file}" ${cuda_target} "${_options}" "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}")
1855
1856  target_link_libraries(${cuda_target} ${CUDA_LINK_LIBRARIES_KEYWORD}
1857    ${CUDA_LIBRARIES}
1858    )
1859
1860  # We need to set the linker language based on what the expected generated file
1861  # would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP.
1862  set_target_properties(${cuda_target}
1863    PROPERTIES
1864    LINKER_LANGUAGE ${CUDA_C_OR_CXX}
1865    )
1866
1867endmacro()
1868
1869
1870###############################################################################
1871###############################################################################
1872# (Internal) helper for manually added cuda source files with specific targets
1873###############################################################################
1874###############################################################################
1875macro(cuda_compile_base cuda_target format generated_files)
1876  # Update a counter in this directory, to keep phony target names unique.
1877  set(_cuda_target "${cuda_target}")
1878  get_property(_counter DIRECTORY PROPERTY _cuda_internal_phony_counter)
1879  if(_counter)
1880    math(EXPR _counter "${_counter} + 1")
1881  else()
1882    set(_counter 1)
1883  endif()
1884  string(APPEND _cuda_target "_${_counter}")
1885  set_property(DIRECTORY PROPERTY _cuda_internal_phony_counter ${_counter})
1886
1887  # Separate the sources from the options
1888  CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN})
1889
1890  # Create custom commands and targets for each file.
1891  CUDA_WRAP_SRCS( ${_cuda_target} ${format} _generated_files ${_sources}
1892                  ${_cmake_options} OPTIONS ${_options} PHONY)
1893
1894  set( ${generated_files} ${_generated_files})
1895
1896endmacro()
1897
1898###############################################################################
1899###############################################################################
1900# CUDA COMPILE
1901###############################################################################
1902###############################################################################
1903macro(CUDA_COMPILE generated_files)
1904  cuda_compile_base(cuda_compile OBJ ${generated_files} ${ARGN})
1905endmacro()
1906
1907###############################################################################
1908###############################################################################
1909# CUDA COMPILE PTX
1910###############################################################################
1911###############################################################################
1912macro(CUDA_COMPILE_PTX generated_files)
1913  cuda_compile_base(cuda_compile_ptx PTX ${generated_files} ${ARGN})
1914endmacro()
1915
1916###############################################################################
1917###############################################################################
1918# CUDA COMPILE FATBIN
1919###############################################################################
1920###############################################################################
1921macro(CUDA_COMPILE_FATBIN generated_files)
1922  cuda_compile_base(cuda_compile_fatbin FATBIN ${generated_files} ${ARGN})
1923endmacro()
1924
1925###############################################################################
1926###############################################################################
1927# CUDA COMPILE CUBIN
1928###############################################################################
1929###############################################################################
1930macro(CUDA_COMPILE_CUBIN generated_files)
1931  cuda_compile_base(cuda_compile_cubin CUBIN ${generated_files} ${ARGN})
1932endmacro()
1933
1934
1935###############################################################################
1936###############################################################################
1937# CUDA ADD CUFFT TO TARGET
1938###############################################################################
1939###############################################################################
1940macro(CUDA_ADD_CUFFT_TO_TARGET target)
1941  if (CUDA_BUILD_EMULATION)
1942    target_link_libraries(${target} ${CUDA_LINK_LIBRARIES_KEYWORD} ${CUDA_cufftemu_LIBRARY})
1943  else()
1944    target_link_libraries(${target} ${CUDA_LINK_LIBRARIES_KEYWORD} ${CUDA_cufft_LIBRARY})
1945  endif()
1946endmacro()
1947
1948###############################################################################
1949###############################################################################
1950# CUDA ADD CUBLAS TO TARGET
1951###############################################################################
1952###############################################################################
1953macro(CUDA_ADD_CUBLAS_TO_TARGET target)
1954  if (CUDA_BUILD_EMULATION)
1955    target_link_libraries(${target} ${CUDA_LINK_LIBRARIES_KEYWORD} ${CUDA_cublasemu_LIBRARY})
1956  else()
1957    target_link_libraries(${target} ${CUDA_LINK_LIBRARIES_KEYWORD} ${CUDA_cublas_LIBRARY} ${CUDA_cublas_device_LIBRARY} ${CUDA_cublasLt_LIBRARY})
1958  endif()
1959endmacro()
1960
1961###############################################################################
1962###############################################################################
1963# CUDA BUILD CLEAN TARGET
1964###############################################################################
1965###############################################################################
1966macro(CUDA_BUILD_CLEAN_TARGET)
1967  # Call this after you add all your CUDA targets, and you will get a
1968  # convenience target.  You should also make clean after running this target
1969  # to get the build system to generate all the code again.
1970
1971  set(cuda_clean_target_name clean_cuda_depends)
1972  if (CMAKE_GENERATOR MATCHES "Visual Studio")
1973    string(TOUPPER ${cuda_clean_target_name} cuda_clean_target_name)
1974  endif()
1975  add_custom_target(${cuda_clean_target_name}
1976    COMMAND ${CMAKE_COMMAND} -E remove ${CUDA_ADDITIONAL_CLEAN_FILES})
1977
1978  # Clear out the variable, so the next time we configure it will be empty.
1979  # This is useful so that the files won't persist in the list after targets
1980  # have been removed.
1981  set(CUDA_ADDITIONAL_CLEAN_FILES "" CACHE INTERNAL "List of intermediate files that are part of the cuda dependency scanning.")
1982endmacro()
1983