xref: /aosp_15_r20/external/pytorch/aten/src/ATen/cuda/CUDAContext.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #include <ATen/cuda/CUDAContext.h>
2 #include <c10/cuda/CUDACachingAllocator.h>
3 #include <c10/util/CallOnce.h>
4 
5 #include <ATen/cuda/CUDAConfig.h>
6 #include <deque>
7 #include <vector>
8 
9 namespace at::cuda {
10 
11 namespace {
12 
13 DeviceIndex num_gpus = -1;
14 c10::once_flag init_flag;
15 std::deque<c10::once_flag> device_flags;
16 std::vector<cudaDeviceProp> device_properties;
17 
initCUDAContextVectors()18 void initCUDAContextVectors() {
19   num_gpus = c10::cuda::device_count();
20   device_flags.resize(num_gpus);
21   device_properties.resize(num_gpus);
22 }
23 
initDeviceProperty(DeviceIndex device_index)24 void initDeviceProperty(DeviceIndex device_index) {
25   cudaDeviceProp device_prop{};
26   AT_CUDA_CHECK(cudaGetDeviceProperties(&device_prop, device_index));
27   device_properties[device_index] = device_prop;
28 }
29 
30 } // anonymous namespace
31 
32 // We need this function to force the linking against torch_cuda(_cpp) on Windows.
33 // If you need to modify this function, please specify a new function and apply
34 // the changes according to https://github.com/pytorch/pytorch/pull/34288.
35 // Related issue: https://github.com/pytorch/pytorch/issues/31611.
36 /* Device info */
warp_size()37 int warp_size() {
38   return getCurrentDeviceProperties()->warpSize;
39 }
40 
getCurrentDeviceProperties()41 cudaDeviceProp* getCurrentDeviceProperties() {
42   auto device = c10::cuda::current_device();
43   return getDeviceProperties(device);
44 }
45 
getDeviceProperties(c10::DeviceIndex device)46 cudaDeviceProp* getDeviceProperties(c10::DeviceIndex device) {
47   c10::call_once(init_flag, initCUDAContextVectors);
48   if (device == -1) device = c10::cuda::current_device();
49   AT_ASSERT(device >= 0 && device < num_gpus, "device=", static_cast<int>(device), ", num_gpus=", num_gpus);
50   c10::call_once(device_flags[device], initDeviceProperty, device);
51   return &device_properties[device];
52 }
53 
canDeviceAccessPeer(c10::DeviceIndex device,c10::DeviceIndex peer_device)54 bool canDeviceAccessPeer(c10::DeviceIndex device, c10::DeviceIndex peer_device) {
55   c10::call_once(init_flag, initCUDAContextVectors);
56   if (device == -1) device = c10::cuda::current_device();
57   AT_ASSERT(device >= 0 && device < num_gpus, "device=", static_cast<int>(device), ", num_gpus=", num_gpus);
58   AT_ASSERT(peer_device >= 0 && peer_device < num_gpus, "peer_device=", static_cast<int>(peer_device), ", num_gpus=", num_gpus);
59   int can_access = 0;
60   AT_CUDA_CHECK(cudaDeviceCanAccessPeer(&can_access, device, peer_device));
61   return can_access != 0;
62 }
63 
getCUDADeviceAllocator()64 Allocator* getCUDADeviceAllocator() {
65   return c10::cuda::CUDACachingAllocator::get();
66 }
67 
68 } // namespace at::cuda
69