xref: /aosp_15_r20/external/pytorch/aten/src/ATen/native/utils/Factory.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #define TORCH_ASSERT_ONLY_METHOD_OPERATORS
2 #include <ATen/NamedTensorUtils.h>
3 #include <ATen/native/utils/Factory.h>
4 #include <c10/core/CPUAllocator.h>
5 #include <c10/util/accumulate.h>
6 
7 namespace at {
8 namespace native {
9 namespace mobile {
10 
empty_with_tail_padding(const IntArrayRef size,const caffe2::TypeMeta dtype,const c10::MemoryFormat memory_format,std::optional<DimnameList> maybe_names)11 Tensor empty_with_tail_padding(
12     const IntArrayRef size,
13     const caffe2::TypeMeta dtype,
14     const c10::MemoryFormat memory_format,
15     std::optional<DimnameList> maybe_names) {
16   auto* const allocator_ptr = c10::GetDefaultMobileCPUAllocator();
17   const int64_t nelements = c10::multiply_integers(size);
18   size_t size_bytes = nelements * dtype.itemsize();
19 
20   Tensor tensor(c10::make_intrusive<c10::TensorImpl>(
21       c10::Storage{
22           c10::Storage::use_byte_size_t(),
23           size_bytes,
24           allocator_ptr->allocate(size_bytes),
25           allocator_ptr,
26           /*resizable=*/true,
27       },
28       DispatchKeySet{DispatchKey::CPU},
29       dtype));
30 
31   return namedinference::propagate_names_if_present_and_nonempty(
32       tensor.resize_(size, memory_format),
33       maybe_names);
34 }
35 
allocate_padded_contiguous_if_needed(const Tensor & input,const c10::MemoryFormat memory_format)36 Tensor allocate_padded_contiguous_if_needed(
37     const Tensor& input,
38     const c10::MemoryFormat memory_format) {
39   const auto* const allocator = input.storage().allocator();
40   const auto* const mobile_allocator = c10::GetDefaultMobileCPUAllocator();
41 
42   // If the allocators are the same and the memory is contiguous in the requested
43   // format, then there is no need to reallocate the tensor.
44 
45   if ((allocator == mobile_allocator) && input.is_contiguous(memory_format)) {
46     return input;
47   }
48 
49   // If there is a need to reallocate the tensor on the other hand, either because
50   // the allocators are not the same, or the allocators are the same but the input
51   // is not contiguous in the requested format, then reallocate and directly copy
52   // into destination.  There is no need to allocate a temporary contiguous memory
53   // only to use it as the source of the copy operation onto our final destination.
54 
55   Tensor padded_input = empty_with_tail_padding(
56       input.sizes(),
57       input.options().dtype(),
58       memory_format,
59       input.opt_names());
60 
61   return padded_input.copy_(input);
62 }
63 
64 } // namespace mobile
65 } // namespace native
66 } // namespace at
67