1 #pragma once 2 3 #include <c10/cuda/CUDADeviceAssertionHost.h> 4 #include <c10/cuda/CUDAMacros.h> 5 #include <c10/cuda/CUDAMiscFunctions.h> 6 #include <c10/macros/Macros.h> 7 #include <c10/util/Exception.h> 8 #include <c10/util/irange.h> 9 #include <cuda.h> 10 11 // Note [CHECK macro] 12 // ~~~~~~~~~~~~~~~~~~ 13 // This is a macro so that AT_ERROR can get accurate __LINE__ 14 // and __FILE__ information. We could split this into a short 15 // macro and a function implementation if we pass along __LINE__ 16 // and __FILE__, but no one has found this worth doing. 17 18 // Used to denote errors from CUDA framework. 19 // This needs to be declared here instead util/Exception.h for proper conversion 20 // during hipify. 21 namespace c10 { 22 class C10_CUDA_API CUDAError : public c10::Error { 23 using Error::Error; 24 }; 25 } // namespace c10 26 27 #define C10_CUDA_CHECK(EXPR) \ 28 do { \ 29 const cudaError_t __err = EXPR; \ 30 c10::cuda::c10_cuda_check_implementation( \ 31 static_cast<int32_t>(__err), \ 32 __FILE__, \ 33 __func__, /* Line number data type not well-defined between \ 34 compilers, so we perform an explicit cast */ \ 35 static_cast<uint32_t>(__LINE__), \ 36 true); \ 37 } while (0) 38 39 #define C10_CUDA_CHECK_WARN(EXPR) \ 40 do { \ 41 const cudaError_t __err = EXPR; \ 42 if (C10_UNLIKELY(__err != cudaSuccess)) { \ 43 auto error_unused C10_UNUSED = cudaGetLastError(); \ 44 (void)error_unused; \ 45 TORCH_WARN("CUDA warning: ", cudaGetErrorString(__err)); \ 46 } \ 47 } while (0) 48 49 // Indicates that a CUDA error is handled in a non-standard way 50 #define C10_CUDA_ERROR_HANDLED(EXPR) EXPR 51 52 // Intentionally ignore a CUDA error 53 #define C10_CUDA_IGNORE_ERROR(EXPR) \ 54 do { \ 55 const cudaError_t __err = EXPR; \ 56 if (C10_UNLIKELY(__err != cudaSuccess)) { \ 57 cudaError_t error_unused C10_UNUSED = cudaGetLastError(); \ 58 (void)error_unused; \ 59 } \ 60 } while (0) 61 62 // Clear the last CUDA error 63 #define C10_CUDA_CLEAR_ERROR() \ 64 do { \ 65 cudaError_t error_unused C10_UNUSED = cudaGetLastError(); \ 66 (void)error_unused; \ 67 } while (0) 68 69 // This should be used directly after every kernel launch to ensure 70 // the launch happened correctly and provide an early, close-to-source 71 // diagnostic if it didn't. 72 #define C10_CUDA_KERNEL_LAUNCH_CHECK() C10_CUDA_CHECK(cudaGetLastError()) 73 74 /// Launches a CUDA kernel appending to it all the information need to handle 75 /// device-side assertion failures. Checks that the launch was successful. 76 #define TORCH_DSA_KERNEL_LAUNCH( \ 77 kernel, blocks, threads, shared_mem, stream, ...) \ 78 do { \ 79 auto& launch_registry = \ 80 c10::cuda::CUDAKernelLaunchRegistry::get_singleton_ref(); \ 81 kernel<<<blocks, threads, shared_mem, stream>>>( \ 82 __VA_ARGS__, \ 83 launch_registry.get_uvm_assertions_ptr_for_current_device(), \ 84 launch_registry.insert( \ 85 __FILE__, __FUNCTION__, __LINE__, #kernel, stream.id())); \ 86 C10_CUDA_KERNEL_LAUNCH_CHECK(); \ 87 } while (0) 88 89 namespace c10::cuda { 90 91 /// In the event of a CUDA failure, formats a nice error message about that 92 /// failure and also checks for device-side assertion failures 93 C10_CUDA_API void c10_cuda_check_implementation( 94 const int32_t err, 95 const char* filename, 96 const char* function_name, 97 const int line_number, 98 const bool include_device_assertions); 99 100 } // namespace c10::cuda 101