xref: /aosp_15_r20/external/pytorch/c10/cuda/CUDAGraphsC10Utils.h (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1*da0073e9SAndroid Build Coastguard Worker #pragma once
2*da0073e9SAndroid Build Coastguard Worker 
3*da0073e9SAndroid Build Coastguard Worker #include <c10/cuda/CUDAStream.h>
4*da0073e9SAndroid Build Coastguard Worker #include <iostream>
5*da0073e9SAndroid Build Coastguard Worker #include <utility>
6*da0073e9SAndroid Build Coastguard Worker 
7*da0073e9SAndroid Build Coastguard Worker // CUDA Graphs utils used by c10 and aten.
8*da0073e9SAndroid Build Coastguard Worker // aten/cuda/CUDAGraphsUtils.cuh adds utils used by aten only.
9*da0073e9SAndroid Build Coastguard Worker 
10*da0073e9SAndroid Build Coastguard Worker namespace c10::cuda {
11*da0073e9SAndroid Build Coastguard Worker 
12*da0073e9SAndroid Build Coastguard Worker using CaptureId_t = unsigned long long;
13*da0073e9SAndroid Build Coastguard Worker 
14*da0073e9SAndroid Build Coastguard Worker // first is set if the instance is created by CUDAGraph::capture_begin.
15*da0073e9SAndroid Build Coastguard Worker // second is set if the instance is created by at::cuda::graph_pool_handle.
16*da0073e9SAndroid Build Coastguard Worker using MempoolId_t = std::pair<CaptureId_t, CaptureId_t>;
17*da0073e9SAndroid Build Coastguard Worker 
18*da0073e9SAndroid Build Coastguard Worker // RAII guard for "cudaStreamCaptureMode", a thread-local value
19*da0073e9SAndroid Build Coastguard Worker // that controls the error-checking strictness of a capture.
20*da0073e9SAndroid Build Coastguard Worker struct C10_CUDA_API CUDAStreamCaptureModeGuard {
CUDAStreamCaptureModeGuardCUDAStreamCaptureModeGuard21*da0073e9SAndroid Build Coastguard Worker   CUDAStreamCaptureModeGuard(cudaStreamCaptureMode desired)
22*da0073e9SAndroid Build Coastguard Worker       : strictness_(desired) {
23*da0073e9SAndroid Build Coastguard Worker     C10_CUDA_CHECK(cudaThreadExchangeStreamCaptureMode(&strictness_));
24*da0073e9SAndroid Build Coastguard Worker   }
~CUDAStreamCaptureModeGuardCUDAStreamCaptureModeGuard25*da0073e9SAndroid Build Coastguard Worker   ~CUDAStreamCaptureModeGuard() {
26*da0073e9SAndroid Build Coastguard Worker     C10_CUDA_CHECK_WARN(cudaThreadExchangeStreamCaptureMode(&strictness_));
27*da0073e9SAndroid Build Coastguard Worker   }
28*da0073e9SAndroid Build Coastguard Worker 
29*da0073e9SAndroid Build Coastguard Worker  private:
30*da0073e9SAndroid Build Coastguard Worker   cudaStreamCaptureMode strictness_;
31*da0073e9SAndroid Build Coastguard Worker };
32*da0073e9SAndroid Build Coastguard Worker 
33*da0073e9SAndroid Build Coastguard Worker // Protects against enum cudaStreamCaptureStatus implementation changes.
34*da0073e9SAndroid Build Coastguard Worker // Some compilers seem not to like static_assert without the messages.
35*da0073e9SAndroid Build Coastguard Worker static_assert(
36*da0073e9SAndroid Build Coastguard Worker     int(cudaStreamCaptureStatus::cudaStreamCaptureStatusNone) == 0,
37*da0073e9SAndroid Build Coastguard Worker     "unexpected int(cudaStreamCaptureStatusNone) value");
38*da0073e9SAndroid Build Coastguard Worker static_assert(
39*da0073e9SAndroid Build Coastguard Worker     int(cudaStreamCaptureStatus::cudaStreamCaptureStatusActive) == 1,
40*da0073e9SAndroid Build Coastguard Worker     "unexpected int(cudaStreamCaptureStatusActive) value");
41*da0073e9SAndroid Build Coastguard Worker static_assert(
42*da0073e9SAndroid Build Coastguard Worker     int(cudaStreamCaptureStatus::cudaStreamCaptureStatusInvalidated) == 2,
43*da0073e9SAndroid Build Coastguard Worker     "unexpected int(cudaStreamCaptureStatusInvalidated) value");
44*da0073e9SAndroid Build Coastguard Worker 
45*da0073e9SAndroid Build Coastguard Worker enum class CaptureStatus : int {
46*da0073e9SAndroid Build Coastguard Worker   None = int(cudaStreamCaptureStatus::cudaStreamCaptureStatusNone),
47*da0073e9SAndroid Build Coastguard Worker   Active = int(cudaStreamCaptureStatus::cudaStreamCaptureStatusActive),
48*da0073e9SAndroid Build Coastguard Worker   Invalidated = int(cudaStreamCaptureStatus::cudaStreamCaptureStatusInvalidated)
49*da0073e9SAndroid Build Coastguard Worker };
50*da0073e9SAndroid Build Coastguard Worker 
51*da0073e9SAndroid Build Coastguard Worker inline std::ostream& operator<<(std::ostream& os, CaptureStatus status) {
52*da0073e9SAndroid Build Coastguard Worker   switch (status) {
53*da0073e9SAndroid Build Coastguard Worker     case CaptureStatus::None:
54*da0073e9SAndroid Build Coastguard Worker       os << "cudaStreamCaptureStatusNone";
55*da0073e9SAndroid Build Coastguard Worker       break;
56*da0073e9SAndroid Build Coastguard Worker     case CaptureStatus::Active:
57*da0073e9SAndroid Build Coastguard Worker       os << "cudaStreamCaptureStatusActive";
58*da0073e9SAndroid Build Coastguard Worker       break;
59*da0073e9SAndroid Build Coastguard Worker     case CaptureStatus::Invalidated:
60*da0073e9SAndroid Build Coastguard Worker       os << "cudaStreamCaptureStatusInvalidated";
61*da0073e9SAndroid Build Coastguard Worker       break;
62*da0073e9SAndroid Build Coastguard Worker     default:
63*da0073e9SAndroid Build Coastguard Worker       TORCH_INTERNAL_ASSERT(
64*da0073e9SAndroid Build Coastguard Worker           false, "Unknown CUDA graph CaptureStatus", int(status));
65*da0073e9SAndroid Build Coastguard Worker   }
66*da0073e9SAndroid Build Coastguard Worker   return os;
67*da0073e9SAndroid Build Coastguard Worker }
68*da0073e9SAndroid Build Coastguard Worker 
69*da0073e9SAndroid Build Coastguard Worker // Use this version where you're sure a CUDA context exists already.
currentStreamCaptureStatusMayInitCtx()70*da0073e9SAndroid Build Coastguard Worker inline CaptureStatus currentStreamCaptureStatusMayInitCtx() {
71*da0073e9SAndroid Build Coastguard Worker   cudaStreamCaptureStatus is_capturing{cudaStreamCaptureStatusNone};
72*da0073e9SAndroid Build Coastguard Worker   C10_CUDA_CHECK(
73*da0073e9SAndroid Build Coastguard Worker       cudaStreamIsCapturing(c10::cuda::getCurrentCUDAStream(), &is_capturing));
74*da0073e9SAndroid Build Coastguard Worker   return CaptureStatus(is_capturing);
75*da0073e9SAndroid Build Coastguard Worker }
76*da0073e9SAndroid Build Coastguard Worker 
77*da0073e9SAndroid Build Coastguard Worker } // namespace c10::cuda
78