xref: /aosp_15_r20/external/armnn/src/backends/backendsCommon/WorkloadUtils.hpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
8 #include <armnn/backends/ITensorHandle.hpp>
9 #include <armnn/backends/TensorHandle.hpp>
10 #include <armnn/Tensor.hpp>
11 #include <armnn/utility/PolymorphicDowncast.hpp>
12 #include <armnnUtils/Permute.hpp>
13 
14 #include <Half.hpp>
15 #include <Profiling.hpp>
16 
17 
18 namespace armnn
19 {
20 namespace
21 {
22 
23 template <typename ArrayType, typename Arg>
AssignValues(unsigned int num,unsigned int & idx,const ArrayType & array,Arg & arg)24 void AssignValues(unsigned int num, unsigned int& idx, const ArrayType& array, Arg& arg)
25 {
26     if (idx >= num)
27     {
28         return;
29     }
30 
31     arg = array[(num - 1) - idx];
32     idx++;
33 }
34 
35 template <typename T, typename ArrayType, typename... Args>
AssignValues(unsigned int num,unsigned int idx,const ArrayType & array,T & assignee,Args &...args)36 void AssignValues(unsigned int num, unsigned int idx, const ArrayType& array, T& assignee, Args&... args)
37 {
38     AssignValues(num, idx, array, assignee);
39 
40     AssignValues(num, idx, array, args...);
41 }
42 
43 }    // anonymous namespace
44 
45 template <typename CopyFunc>
CopyTensorContentsGeneric(const ITensorHandle * srcTensor,ITensorHandle * dstTensor,CopyFunc copy)46 void CopyTensorContentsGeneric(const ITensorHandle* srcTensor, ITensorHandle* dstTensor, CopyFunc copy)
47 {
48     // For ease of understanding, names are assigned to the dimensions
49     // of the tensor as if NHWC, however this routine works with any 5D tensor
50     static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyTensorContents");
51 
52     TensorShape srcStrides      = srcTensor->GetStrides();
53     const TensorShape& srcShape = srcTensor->GetShape();
54     const auto srcSize          = srcTensor->GetStrides()[0] * srcShape[0];
55     IgnoreUnused(srcSize);  // Only used for asserts
56     TensorShape dstStrides      = dstTensor->GetStrides();
57     const TensorShape& dstShape = dstTensor->GetShape();
58     const auto dstSize          = dstTensor->GetStrides()[0] * dstShape[0];
59     IgnoreUnused(dstSize);  // Only used for asserts
60 
61     size_t srcDepth    = 1;
62     size_t srcBatches  = 1;
63     size_t srcHeight   = 1;
64     size_t srcWidth    = 1;
65     size_t srcChannels = 1;
66     AssignValues(srcShape.GetNumDimensions(),
67                  0,
68                  srcShape,
69                  srcChannels,
70                  srcWidth,
71                  srcHeight,
72                  srcBatches,
73                  srcDepth);
74 
75     size_t srcDepthStride   = 0;
76     size_t srcBatchStride   = 0;
77     size_t srcHeightStride  = 0;
78     size_t srcWidthStride   = 0;
79     size_t srcChannelStride = 0;
80     AssignValues(srcStrides.GetNumDimensions(),
81                  0,
82                  srcStrides,
83                  srcChannelStride,
84                  srcWidthStride,
85                  srcHeightStride,
86                  srcBatchStride,
87                  srcDepthStride);
88 
89     size_t dstDepth    = 1;
90     size_t dstBatches  = 1;
91     size_t dstHeight   = 1;
92     size_t dstWidth    = 1;
93     size_t dstChannels = 1;
94     AssignValues(dstShape.GetNumDimensions(),
95                  0,
96                  dstShape,
97                  dstChannels,
98                  dstWidth,
99                  dstHeight,
100                  dstBatches,
101                  dstDepth);
102 
103     size_t dstDepthStride   = 0;
104     size_t dstBatchStride   = 0;
105     size_t dstHeightStride  = 0;
106     size_t dstWidthStride   = 0;
107     size_t dstChannelStride = 0;
108     AssignValues(dstStrides.GetNumDimensions(),
109                  0,
110                  dstStrides,
111                  dstChannelStride,
112                  dstWidthStride,
113                  dstHeightStride,
114                  dstBatchStride,
115                  dstDepthStride);
116 
117     const unsigned char* srcDataStart;
118     unsigned char* dstDataStart;
119     {
120         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Synchronize buffers");
121         srcDataStart = static_cast<const uint8_t*>(srcTensor->Map());
122         dstDataStart = static_cast<uint8_t*>(dstTensor->Map());
123     }
124 
125     size_t copyLength  = std::min(srcChannels * srcChannelStride, dstChannels * dstChannelStride);
126     size_t copyWidth   = std::min(srcWidth, dstWidth);
127     size_t copyHeight  = std::min(srcHeight, dstHeight);
128     size_t copyBatches = std::min(srcBatches, dstBatches);
129     size_t copyDepth   = std::min(srcDepth, dstDepth);
130 
131     // Coalesce inner dimensions where possible
132     // to reduce overheard calling copy() and to
133     // allow for memory bandwidth optimisations
134     if (copyLength == srcWidthStride &&
135         copyLength == dstWidthStride)
136     {
137         // There is no special padding between rows,
138         // and sizes are compatible, so copy whole rows
139         copyLength *= copyWidth;
140         copyWidth = 1;
141 
142         if (copyLength == srcHeightStride &&
143             copyLength == dstHeightStride)
144         {
145             // There is no special padding between batches
146             // and sizes are compatible so copy whole batches
147             copyLength *= copyHeight;
148             copyHeight = 1;
149         }
150     }
151 
152     const unsigned char* srcData = srcDataStart;
153     unsigned char* dstData = dstDataStart;
154     for (unsigned int d = 0; d < copyDepth; ++d)
155     {
156         auto srcPtrDepth = srcData;
157         auto dstPtrDepth = dstData;
158         for (unsigned int b = 0; b < copyBatches; ++b)
159         {
160             auto srcPtrBatch = srcData;
161             auto dstPtrBatch = dstData;
162             for (unsigned int h = 0; h < copyHeight; ++h)
163             {
164                 auto srcPtrChannel = srcData;
165                 auto dstPtrChannel = dstData;
166                 for (unsigned int w = 0; w < copyWidth; ++w)
167                 {
168                     ARMNN_ASSERT(srcData >= srcDataStart && srcData + copyLength <= srcDataStart + srcSize);
169                     ARMNN_ASSERT(dstData >= dstDataStart && dstData + copyLength <= dstDataStart + dstSize);
170                     copy(dstData, srcData, copyLength);
171                     dstData += dstWidthStride;
172                     srcData += srcWidthStride;
173                 }
174                 dstData += (static_cast<long>(dstHeightStride) - (dstData - dstPtrChannel));
175                 srcData += (static_cast<long>(srcHeightStride) - (srcData - srcPtrChannel));
176             }
177             dstData += (static_cast<long>(dstBatchStride) - (dstData - dstPtrBatch));
178             srcData += (static_cast<long>(srcBatchStride) - (srcData - srcPtrBatch));
179         }
180         dstData += (static_cast<long>(dstDepthStride) - (dstData - dstPtrDepth));
181         srcData += (static_cast<long>(srcDepthStride) - (srcData - srcPtrDepth));
182     }
183 
184     srcTensor->Unmap();
185     dstTensor->Unmap();
186 }
187 
188 template <typename SrcTensorHandleType, typename DstTensorHandleType, typename DescriptorType>
GatherTensorHandlePairs(const DescriptorType & descriptor,std::vector<std::pair<SrcTensorHandleType *,DstTensorHandleType * >> & tensorHandlePairs)189 void GatherTensorHandlePairs(const DescriptorType& descriptor,
190                              std::vector<std::pair<SrcTensorHandleType*, DstTensorHandleType*>>& tensorHandlePairs)
191 {
192     const unsigned int numInputs = static_cast<unsigned int>(descriptor.m_Inputs.size());
193     tensorHandlePairs.reserve(numInputs);
194 
195     for (unsigned int i = 0; i < numInputs; ++i)
196     {
197         SrcTensorHandleType* const srcTensorHandle =
198             PolymorphicDowncast<SrcTensorHandleType*>(descriptor.m_Inputs[i]);
199         DstTensorHandleType* const dstTensorHandle =
200             PolymorphicDowncast<DstTensorHandleType*>(descriptor.m_Outputs[i]);
201 
202         tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle);
203     }
204 }
205 
206 int32_t ConvertMaskToACLFormat(int32_t mask, int32_t numDim);
207 
208 armnn::ConstTensor PermuteTensor(const ConstTensorHandle* tensor,
209                                  const PermutationVector& permutationVector,
210                                  void* permuteBuffer);
211 
212 void ReshapeWeightsForAcl(TensorInfo& weightInfo, DataLayout dataLayout);
213 
214 TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo& weightInfo, DataLayout dataLayout);
215 
216 /// Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M]
217 /// This function coverts a TensorInfo from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC)
218 /// as required by the compute library
219 /// Returns a tuple of converted weights tensor info and depth multiplier
220 std::tuple<TensorInfo, unsigned int> Convert1HWOTensorInfoToAcl(const TensorInfo& weightInfo,
221                                                                 const TensorInfo& inputInfo,
222                                                                 const DataLayout dataLayout);
223 
224 armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstTensorHandle* weightTensor,
225                                                      DataLayout dataLayout,
226                                                      void* permuteBuffer);
227 
228 /// Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M]
229 /// This function coverts a ConstCpuTensorHandle from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or
230 /// keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library
231 ///
232 /// \param weightTensor - ConstTensorHandle of weights tensor
233 /// \param inputInfo - TensorInfo of input tensor
234 /// \param dataLayout - DataLayout of the input tensor
235 /// \param permuteBuffer - Pointer to memory with the size of tensor. Used for the permutation
236 /// \return tuple of transformed weights-ConstTensor and depthwise multiplier
237 std::tuple<ConstTensor, unsigned int> Convert1HWOTensorToAcl(const ConstTensorHandle* weightTensor,
238                                                              const TensorInfo& inputInfo,
239                                                              const DataLayout dataLayout,
240                                                              void* permuteBuffer);
241 
242 /// Converts a (weights) tensor from [1, H, W, I*M] = [1, H, W, O] to [M, I, H, W]
243 ///
244 /// \param weightTensor - ConstTensorHandle of the weight tensor that should be converted
245 /// \param inputInfo - TensorInfo of the corresponding input tensor
246 /// \param dataLayout - DataLayout of the input tensor e.g. NHWC or NCHW
247 /// \param permuteBuffer - Memory location with the same size as the weight tensor to write converted data to
248 /// \return - A tuple of ConstTensor and unsigned int which is the converted weightTensor and the depthMultiplier
249 std::tuple<ConstTensor, unsigned int> Convert1HWOtoMIHW(const ConstTensorHandle* weightTensor,
250                                                         const TensorInfo& inputInfo,
251                                                         const DataLayout& dataLayout,
252                                                         void* permuteBuffer);
253 
254 /// Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1)
255 ///
256 /// \param inputInfo0 - TensorInfo of the corresponding input tensor: params
257 /// \param inputInfo1 - TensorInfo of the corresponding input tensor: indices
258 /// \return - A map with names and values for  N, ND, K, W, C
259 std::map<std::string, unsigned int> CalculateGatherNdKeyIndices(TensorInfo inputInfo0, TensorInfo inputInfo1);
260 
261 /// Generates a permutation vector of size rank that permutes the 2 most right dimensions
262 ///
263 /// \param rank - Tensor rank, i.e. number of dimensions in the tensors
264 /// \return - A permutation vector that permutes the 2 last dimensions
265 armnn::PermutationVector GeneratePermutationVectorOnLastTwoDimensions(unsigned int rank);
266 
267 }  //namespace armnn
268