xref: /aosp_15_r20/external/pytorch/c10/cuda/CUDAException.h (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #pragma once
2 
3 #include <c10/cuda/CUDADeviceAssertionHost.h>
4 #include <c10/cuda/CUDAMacros.h>
5 #include <c10/cuda/CUDAMiscFunctions.h>
6 #include <c10/macros/Macros.h>
7 #include <c10/util/Exception.h>
8 #include <c10/util/irange.h>
9 #include <cuda.h>
10 
11 // Note [CHECK macro]
12 // ~~~~~~~~~~~~~~~~~~
13 // This is a macro so that AT_ERROR can get accurate __LINE__
14 // and __FILE__ information.  We could split this into a short
15 // macro and a function implementation if we pass along __LINE__
16 // and __FILE__, but no one has found this worth doing.
17 
18 // Used to denote errors from CUDA framework.
19 // This needs to be declared here instead util/Exception.h for proper conversion
20 // during hipify.
21 namespace c10 {
22 class C10_CUDA_API CUDAError : public c10::Error {
23   using Error::Error;
24 };
25 } // namespace c10
26 
27 #define C10_CUDA_CHECK(EXPR)                                        \
28   do {                                                              \
29     const cudaError_t __err = EXPR;                                 \
30     c10::cuda::c10_cuda_check_implementation(                       \
31         static_cast<int32_t>(__err),                                \
32         __FILE__,                                                   \
33         __func__, /* Line number data type not well-defined between \
34                       compilers, so we perform an explicit cast */  \
35         static_cast<uint32_t>(__LINE__),                            \
36         true);                                                      \
37   } while (0)
38 
39 #define C10_CUDA_CHECK_WARN(EXPR)                              \
40   do {                                                         \
41     const cudaError_t __err = EXPR;                            \
42     if (C10_UNLIKELY(__err != cudaSuccess)) {                  \
43       auto error_unused C10_UNUSED = cudaGetLastError();       \
44       (void)error_unused;                                      \
45       TORCH_WARN("CUDA warning: ", cudaGetErrorString(__err)); \
46     }                                                          \
47   } while (0)
48 
49 // Indicates that a CUDA error is handled in a non-standard way
50 #define C10_CUDA_ERROR_HANDLED(EXPR) EXPR
51 
52 // Intentionally ignore a CUDA error
53 #define C10_CUDA_IGNORE_ERROR(EXPR)                             \
54   do {                                                          \
55     const cudaError_t __err = EXPR;                             \
56     if (C10_UNLIKELY(__err != cudaSuccess)) {                   \
57       cudaError_t error_unused C10_UNUSED = cudaGetLastError(); \
58       (void)error_unused;                                       \
59     }                                                           \
60   } while (0)
61 
62 // Clear the last CUDA error
63 #define C10_CUDA_CLEAR_ERROR()                                \
64   do {                                                        \
65     cudaError_t error_unused C10_UNUSED = cudaGetLastError(); \
66     (void)error_unused;                                       \
67   } while (0)
68 
69 // This should be used directly after every kernel launch to ensure
70 // the launch happened correctly and provide an early, close-to-source
71 // diagnostic if it didn't.
72 #define C10_CUDA_KERNEL_LAUNCH_CHECK() C10_CUDA_CHECK(cudaGetLastError())
73 
74 /// Launches a CUDA kernel appending to it all the information need to handle
75 /// device-side assertion failures. Checks that the launch was successful.
76 #define TORCH_DSA_KERNEL_LAUNCH(                                      \
77     kernel, blocks, threads, shared_mem, stream, ...)                 \
78   do {                                                                \
79     auto& launch_registry =                                           \
80         c10::cuda::CUDAKernelLaunchRegistry::get_singleton_ref();     \
81     kernel<<<blocks, threads, shared_mem, stream>>>(                  \
82         __VA_ARGS__,                                                  \
83         launch_registry.get_uvm_assertions_ptr_for_current_device(),  \
84         launch_registry.insert(                                       \
85             __FILE__, __FUNCTION__, __LINE__, #kernel, stream.id())); \
86     C10_CUDA_KERNEL_LAUNCH_CHECK();                                   \
87   } while (0)
88 
89 namespace c10::cuda {
90 
91 /// In the event of a CUDA failure, formats a nice error message about that
92 /// failure and also checks for device-side assertion failures
93 C10_CUDA_API void c10_cuda_check_implementation(
94     const int32_t err,
95     const char* filename,
96     const char* function_name,
97     const int line_number,
98     const bool include_device_assertions);
99 
100 } // namespace c10::cuda
101