1 #define TORCH_ASSERT_ONLY_METHOD_OPERATORS
2 #include <ATen/NamedTensorUtils.h>
3 #include <ATen/native/utils/Factory.h>
4 #include <c10/core/CPUAllocator.h>
5 #include <c10/util/accumulate.h>
6
7 namespace at {
8 namespace native {
9 namespace mobile {
10
empty_with_tail_padding(const IntArrayRef size,const caffe2::TypeMeta dtype,const c10::MemoryFormat memory_format,std::optional<DimnameList> maybe_names)11 Tensor empty_with_tail_padding(
12 const IntArrayRef size,
13 const caffe2::TypeMeta dtype,
14 const c10::MemoryFormat memory_format,
15 std::optional<DimnameList> maybe_names) {
16 auto* const allocator_ptr = c10::GetDefaultMobileCPUAllocator();
17 const int64_t nelements = c10::multiply_integers(size);
18 size_t size_bytes = nelements * dtype.itemsize();
19
20 Tensor tensor(c10::make_intrusive<c10::TensorImpl>(
21 c10::Storage{
22 c10::Storage::use_byte_size_t(),
23 size_bytes,
24 allocator_ptr->allocate(size_bytes),
25 allocator_ptr,
26 /*resizable=*/true,
27 },
28 DispatchKeySet{DispatchKey::CPU},
29 dtype));
30
31 return namedinference::propagate_names_if_present_and_nonempty(
32 tensor.resize_(size, memory_format),
33 maybe_names);
34 }
35
allocate_padded_contiguous_if_needed(const Tensor & input,const c10::MemoryFormat memory_format)36 Tensor allocate_padded_contiguous_if_needed(
37 const Tensor& input,
38 const c10::MemoryFormat memory_format) {
39 const auto* const allocator = input.storage().allocator();
40 const auto* const mobile_allocator = c10::GetDefaultMobileCPUAllocator();
41
42 // If the allocators are the same and the memory is contiguous in the requested
43 // format, then there is no need to reallocate the tensor.
44
45 if ((allocator == mobile_allocator) && input.is_contiguous(memory_format)) {
46 return input;
47 }
48
49 // If there is a need to reallocate the tensor on the other hand, either because
50 // the allocators are not the same, or the allocators are the same but the input
51 // is not contiguous in the requested format, then reallocate and directly copy
52 // into destination. There is no need to allocate a temporary contiguous memory
53 // only to use it as the source of the copy operation onto our final destination.
54
55 Tensor padded_input = empty_with_tail_padding(
56 input.sizes(),
57 input.options().dtype(),
58 memory_format,
59 input.opt_names());
60
61 return padded_input.copy_(input);
62 }
63
64 } // namespace mobile
65 } // namespace native
66 } // namespace at
67