xref: /aosp_15_r20/external/pytorch/c10/core/CopyBytes.h (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #pragma once
2 
3 #include <c10/core/Device.h>
4 #include <c10/core/DeviceType.h>
5 #include <c10/macros/Export.h>
6 #include <c10/macros/Macros.h>
7 #include <cstddef>
8 
9 namespace c10 {
10 
11 using CopyBytesFunction = void (*)(
12     size_t nbytes,
13     const void* src,
14     Device src_device,
15     void* dst,
16     Device dst_device);
17 
18 struct C10_API _CopyBytesFunctionRegisterer {
19   _CopyBytesFunctionRegisterer(
20       DeviceType from,
21       DeviceType to,
22       CopyBytesFunction func_sync,
23       CopyBytesFunction func_async = nullptr);
24 };
25 
26 #define REGISTER_COPY_BYTES_FUNCTION(from, to, ...)           \
27   namespace {                                                 \
28   static _CopyBytesFunctionRegisterer C10_ANONYMOUS_VARIABLE( \
29       g_copy_function)(from, to, __VA_ARGS__);                \
30   }
31 
32 /*
33  * WARNING: Implementations for this function are currently registered from
34  * ATen and caffe2, not yet from c10. Don't use this if not either ATen
35  * or caffe2 is present as well.
36  * We can't move them yet, because the CUDA implementations aren't unified yet
37  * between ATen and caffe2.
38  * We're planning to move the implementations into c10/backend/xxx
39  * to make c10 self contained again.
40  */
41 C10_API void CopyBytes(
42     size_t nbytes,
43     const void* src,
44     Device src_device,
45     void* dst,
46     Device dst_device,
47     bool async);
48 } // namespace c10
49