xref: /aosp_15_r20/external/pytorch/torch/csrc/cuda/shared/cudart.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #include <cuda.h>
2 #include <cuda_runtime.h>
3 #include <torch/csrc/utils/pybind.h>
4 #if !defined(USE_ROCM)
5 #include <cuda_profiler_api.h>
6 #else
7 #include <hip/hip_runtime_api.h>
8 #endif
9 
10 #include <c10/cuda/CUDAException.h>
11 #include <c10/cuda/CUDAGuard.h>
12 
13 namespace torch::cuda::shared {
14 
15 #ifdef USE_ROCM
16 namespace {
hipReturnSuccess()17 hipError_t hipReturnSuccess() {
18   return hipSuccess;
19 }
20 } // namespace
21 #endif
22 
initCudartBindings(PyObject * module)23 void initCudartBindings(PyObject* module) {
24   auto m = py::handle(module).cast<py::module>();
25 
26   auto cudart = m.def_submodule("_cudart", "libcudart.so bindings");
27 
28   // By splitting the names of these objects into two literals we prevent the
29   // HIP rewrite rules from changing these names when building with HIP.
30 
31 #if !defined(USE_ROCM) && defined(CUDA_VERSION) && CUDA_VERSION < 12000
32   // cudaOutputMode_t is used in cudaProfilerInitialize only. The latter is gone
33   // in CUDA 12.
34   py::enum_<cudaOutputMode_t>(
35       cudart,
36       "cuda"
37       "OutputMode")
38       .value("KeyValuePair", cudaKeyValuePair)
39       .value("CSV", cudaCSV);
40 #endif
41 
42   py::enum_<cudaError_t>(
43       cudart,
44       "cuda"
45       "Error")
46       .value("success", cudaSuccess);
47 
48   cudart.def(
49       "cuda"
50       "GetErrorString",
51       cudaGetErrorString);
52   cudart.def(
53       "cuda"
54       "ProfilerStart",
55 #ifdef USE_ROCM
56       hipReturnSuccess
57 #else
58       cudaProfilerStart
59 #endif
60   );
61   cudart.def(
62       "cuda"
63       "ProfilerStop",
64 #ifdef USE_ROCM
65       hipReturnSuccess
66 #else
67       cudaProfilerStop
68 #endif
69   );
70   cudart.def(
71       "cuda"
72       "HostRegister",
73       [](uintptr_t ptr, size_t size, unsigned int flags) -> cudaError_t {
74         py::gil_scoped_release no_gil;
75         return C10_CUDA_ERROR_HANDLED(
76             cudaHostRegister((void*)ptr, size, flags));
77       });
78   cudart.def(
79       "cuda"
80       "HostUnregister",
81       [](uintptr_t ptr) -> cudaError_t {
82         py::gil_scoped_release no_gil;
83         return C10_CUDA_ERROR_HANDLED(cudaHostUnregister((void*)ptr));
84       });
85   cudart.def(
86       "cuda"
87       "StreamCreate",
88       [](uintptr_t ptr) -> cudaError_t {
89         py::gil_scoped_release no_gil;
90         return C10_CUDA_ERROR_HANDLED(cudaStreamCreate((cudaStream_t*)ptr));
91       });
92   cudart.def(
93       "cuda"
94       "StreamDestroy",
95       [](uintptr_t ptr) -> cudaError_t {
96         py::gil_scoped_release no_gil;
97         return C10_CUDA_ERROR_HANDLED(cudaStreamDestroy((cudaStream_t)ptr));
98       });
99 #if !defined(USE_ROCM) && defined(CUDA_VERSION) && CUDA_VERSION < 12000
100   // cudaProfilerInitialize is no longer needed after CUDA 12:
101   // https://forums.developer.nvidia.com/t/cudaprofilerinitialize-is-deprecated-alternative/200776/3
102   cudart.def(
103       "cuda"
104       "ProfilerInitialize",
105       cudaProfilerInitialize,
106       py::call_guard<py::gil_scoped_release>());
107 #endif
108   cudart.def(
109       "cuda"
110       "MemGetInfo",
111       [](c10::DeviceIndex device) -> std::pair<size_t, size_t> {
112         c10::cuda::CUDAGuard guard(device);
113         size_t device_free = 0;
114         size_t device_total = 0;
115         py::gil_scoped_release no_gil;
116         C10_CUDA_CHECK(cudaMemGetInfo(&device_free, &device_total));
117         return {device_free, device_total};
118       });
119 }
120 
121 } // namespace torch::cuda::shared
122