1 #pragma once 2 3 #include <c10/core/Device.h> 4 #include <c10/core/DeviceType.h> 5 #include <c10/macros/Export.h> 6 #include <c10/macros/Macros.h> 7 #include <cstddef> 8 9 namespace c10 { 10 11 using CopyBytesFunction = void (*)( 12 size_t nbytes, 13 const void* src, 14 Device src_device, 15 void* dst, 16 Device dst_device); 17 18 struct C10_API _CopyBytesFunctionRegisterer { 19 _CopyBytesFunctionRegisterer( 20 DeviceType from, 21 DeviceType to, 22 CopyBytesFunction func_sync, 23 CopyBytesFunction func_async = nullptr); 24 }; 25 26 #define REGISTER_COPY_BYTES_FUNCTION(from, to, ...) \ 27 namespace { \ 28 static _CopyBytesFunctionRegisterer C10_ANONYMOUS_VARIABLE( \ 29 g_copy_function)(from, to, __VA_ARGS__); \ 30 } 31 32 /* 33 * WARNING: Implementations for this function are currently registered from 34 * ATen and caffe2, not yet from c10. Don't use this if not either ATen 35 * or caffe2 is present as well. 36 * We can't move them yet, because the CUDA implementations aren't unified yet 37 * between ATen and caffe2. 38 * We're planning to move the implementations into c10/backend/xxx 39 * to make c10 self contained again. 40 */ 41 C10_API void CopyBytes( 42 size_t nbytes, 43 const void* src, 44 Device src_device, 45 void* dst, 46 Device dst_device, 47 bool async); 48 } // namespace c10 49