xref: /aosp_15_r20/external/armnn/src/backends/backendsCommon/WorkloadUtils.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include <backendsCommon/WorkloadUtils.hpp>
7 
8 #include <armnn/Utils.hpp>
9 #include <armnn/utility/NumericCast.hpp>
10 #include <armnnUtils/DataLayoutIndexed.hpp>
11 
12 #include <fmt/format.h>
13 #include <numeric>
14 
15 namespace armnn
16 {
17 
PermuteTensor(const ConstTensorHandle * tensor,const PermutationVector & permutationVector,void * permuteBuffer)18 armnn::ConstTensor PermuteTensor(const ConstTensorHandle* tensor,
19                                  const PermutationVector& permutationVector, void* permuteBuffer)
20 {
21     ARMNN_ASSERT_MSG(tensor, "Invalid input tensor");
22     ARMNN_ASSERT_MSG(permuteBuffer, "Invalid permute buffer");
23 
24     TensorInfo tensorInfo = tensor->GetTensorInfo();
25 
26     if (permutationVector.GetSize() > 0)
27     {
28         tensorInfo = armnnUtils::Permuted(tensorInfo, permutationVector);
29         armnnUtils::Permute(tensorInfo.GetShape(), permutationVector,
30                             tensor->GetConstTensor<void>(), permuteBuffer,
31                             GetDataTypeSize(tensorInfo.GetDataType()));
32     }
33     else
34     {
35         ::memcpy(permuteBuffer, tensor->GetConstTensor<void>(), tensorInfo.GetNumBytes());
36     }
37     tensorInfo.SetConstant(true);
38     return ConstTensor(tensorInfo, permuteBuffer);
39 }
40 
ReshapeWeightsForAcl(TensorInfo & weightInfo,DataLayout dataLayout)41 void ReshapeWeightsForAcl(TensorInfo& weightInfo, DataLayout dataLayout)
42 {
43     // Reshape the weights in-place
44     const TensorShape& weightShape = weightInfo.GetShape();
45     switch (dataLayout)
46     {
47         case DataLayout::NHWC:
48             // The data layout is NHWC, reshape from [ H, W, I, M ] to [ 1, H, W, I * M ]
49             weightInfo.SetShape({ 1,
50                                   weightShape[0],
51                                   weightShape[1],
52                                   weightShape[2] * weightShape[3] });
53             weightInfo.SetShape({ 1,
54                                   weightShape[0] * weightShape[1],
55                                   weightShape[2],
56                                   weightShape[3] });
57             break;
58         case DataLayout::NCHW:
59         default:
60             // The data layout is NCHW, reshape from [ M, I, H, W ] to [ 1, I * M, H, W, ]
61             weightInfo.SetShape({ 1, weightShape[0] * weightShape[1], weightShape[2], weightShape[3] });
62             break;
63     }
64 }
65 
66 template <typename DataType>
ReorderWeightChannelsForAcl(const ConstTensor & weightHandle,DataLayout dataLayout,void * permuteBuffer)67 ConstTensor ReorderWeightChannelsForAcl(const ConstTensor& weightHandle, DataLayout dataLayout, void* permuteBuffer)
68 {
69     DataType* weight = static_cast<DataType*>(permuteBuffer);
70     const TensorShape& weightShape = weightHandle.GetShape();
71     unsigned int multiplier;
72     unsigned int height;
73     unsigned int width;
74     unsigned int inputChannels;
75     switch (dataLayout)
76     {
77         case DataLayout::NHWC:    //It actually is [ H, W, I, M ]
78             height        = weightShape[0];
79             width         = weightShape[1];
80             inputChannels = weightShape[2];
81             multiplier    = weightShape[3];
82             break;
83         case DataLayout::NCHW:    //It actually is [ M, I, H, W ]
84         default:
85             height        = weightShape[2];
86             width         = weightShape[3];
87             inputChannels = weightShape[1];
88             multiplier    = weightShape[0];
89             break;
90     }
91 
92     std::vector<DataType> weightAclOrder(height*width*inputChannels*multiplier);
93     unsigned int destinationWeightsChannel;
94     unsigned int totalChannels = inputChannels * multiplier;
95     unsigned int channelSize   = height * width;
96     unsigned int inputChannel  = 0;
97 
98     for (unsigned int originWeightsChannel = 0; originWeightsChannel < totalChannels; originWeightsChannel++)
99     {
100         inputChannel = originWeightsChannel % inputChannels;
101         destinationWeightsChannel = (originWeightsChannel - inputChannel) / inputChannels + multiplier * inputChannel;
102 
103         for (unsigned int i = 0; i < channelSize; i++)
104         {
105             weightAclOrder[i + destinationWeightsChannel * channelSize] =
106                     weight[i + originWeightsChannel * channelSize];
107         }
108     }
109 
110     ::memcpy(permuteBuffer, weightAclOrder.data(), weightHandle.GetInfo().GetNumBytes());
111     return ConstTensor(weightHandle.GetInfo(), permuteBuffer);
112 }
113 
114 
ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo & weightInfo,DataLayout dataLayout)115 TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo& weightInfo, DataLayout dataLayout)
116 {
117     // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
118     // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
119 
120     // 1. Permute the weights if necessary
121     // If the data layout is NCHW no permutation is necessary, as a reshape to [ 1, I * M, H, W ] can be better done
122     // starting from the current shape of [ M, I, H, W ]
123     TensorInfo weightPermutedInfo(weightInfo);
124     if (dataLayout == DataLayout::NHWC)
125     {
126         // The data layout is NHWC, then permute the weights from [ M, I, H, W ] to [ H, W, I, M ]
127         PermutationVector permutationVector{ 3, 2, 0, 1 };
128         weightPermutedInfo = armnnUtils::Permuted(weightInfo, permutationVector);
129     }
130 
131     // 2. Reshape the weights
132     ReshapeWeightsForAcl(weightPermutedInfo, dataLayout);
133 
134     // 3. Return the permuted weight info
135     return weightPermutedInfo;
136 }
137 
138 
Convert1HWOTensorToAcl(const ConstTensorHandle * weightTensor,const TensorInfo & inputInfo,const DataLayout dataLayout,void * permuteBuffer)139 std::tuple<ConstTensor, unsigned int> Convert1HWOTensorToAcl(const ConstTensorHandle* weightTensor,
140                                                              const TensorInfo& inputInfo,
141                                                              const DataLayout dataLayout,
142                                                              void* permuteBuffer)
143 {
144     TensorInfo weightsInfo = weightTensor->GetTensorInfo();
145     unsigned int depthMultiplier = 1;
146     PermutationVector permutationVector{};
147     if (dataLayout == armnn::DataLayout::NHWC)
148     {
149         // No permutation required. Data layouts are the same.
150 
151         depthMultiplier = weightsInfo.GetShape()[3] / inputInfo.GetShape()[3];
152     }
153     else if (dataLayout == armnn::DataLayout::NCHW)
154     {
155         // [ 1, H, W, I*M] --> [ 1, I * M, H, W ]
156         depthMultiplier = weightsInfo.GetShape()[3] / inputInfo.GetShape()[1];
157         permutationVector = { 0, 2, 3, 1 };
158     }
159     else
160     {
161         throw InvalidArgumentException(fmt::format("Unknown data layout for tensor conversion: {}",
162                                                    GetDataLayoutName(dataLayout)));
163     }
164 
165     ConstTensor weightsPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
166 
167     return std::make_tuple(weightsPermuted, depthMultiplier);
168 }
169 
Convert1HWOTensorInfoToAcl(const TensorInfo & weightInfo,const TensorInfo & inputInfo,const DataLayout dataLayout)170 std::tuple<TensorInfo, unsigned int> Convert1HWOTensorInfoToAcl(const TensorInfo& weightInfo,
171                                                                 const TensorInfo& inputInfo,
172                                                                 const DataLayout dataLayout)
173 {
174     unsigned int aclDepthMultiplier = 1;
175     TensorInfo weightsPermuted;
176     if (dataLayout == armnn::DataLayout::NHWC)
177     {
178         // No permutation required. Input and weights data layouts are the same.
179         aclDepthMultiplier = weightInfo.GetShape()[3] / inputInfo.GetShape()[3];
180         weightsPermuted = weightInfo;
181     }
182 
183     else if (dataLayout == armnn::DataLayout::NCHW)
184     {
185         // Weights permutation required. Weights [N,H,W,C] and input [N,C,H,W] data layouts are different.
186         // [ 1, H, W, I*M] --> [ 1, I * M, H, W ]
187         aclDepthMultiplier = weightInfo.GetShape()[3] / inputInfo.GetShape()[1];
188         PermutationVector permutationVector{ 0, 2, 3, 1 };
189         weightsPermuted = armnnUtils::Permuted(weightInfo, permutationVector);
190     }
191     else
192     {
193         throw InvalidArgumentException(fmt::format("Unknown data layout for tensor info conversion: {}",
194                                                    GetDataLayoutName(dataLayout)));
195     }
196 
197     return std::make_tuple(weightsPermuted, aclDepthMultiplier);
198 }
199 
200 
Convert1HWOtoMIHW(const ConstTensorHandle * weightTensor,const TensorInfo & inputInfo,const DataLayout & dataLayout,void * permuteBuffer)201 std::tuple<ConstTensor, unsigned int> Convert1HWOtoMIHW(const ConstTensorHandle* weightTensor,
202                                                         const TensorInfo& inputInfo,
203                                                         const DataLayout& dataLayout,
204                                                         void* permuteBuffer)
205 {
206     TensorInfo weightsInfo = weightTensor->GetTensorInfo();
207 
208     if (weightsInfo.HasPerAxisQuantization())
209     {
210         throw InvalidArgumentException("Can't convert tensor from [1,H,W,Cout] to [M,Cin,H,W] when per channel "
211                                        "quantization is applied.");
212     }
213 
214     // Reshape weights  [ 1, H, W, I*M ] --> [ H, W, I, M ]
215     auto weightsShape = weightsInfo.GetShape();
216     auto channelIndex = armnnUtils::DataLayoutIndexed(dataLayout).GetChannelsIndex();
217     unsigned int depthMultiplier = weightsShape[3] / inputInfo.GetShape()[channelIndex];
218     weightsInfo.SetShape({ weightsShape[1],
219                            weightsShape[2],
220                            inputInfo.GetShape()[channelIndex],
221                            depthMultiplier});
222 
223     // Permute [ H, W, I, M ] --> [ M, I, H, W ]
224     PermutationVector permutationVector = { 2, 3, 1, 0 };
225     ConstTensor weightsPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
226 
227     return std::make_tuple(weightsPermuted, depthMultiplier);
228 }
229 
ConvertWeightTensorFromArmnnToAcl(const ConstTensorHandle * weightTensor,DataLayout dataLayout,void * permuteBuffer)230 armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstTensorHandle* weightTensor,
231                                                      DataLayout dataLayout,
232                                                      void* permuteBuffer)
233 {
234     ARMNN_ASSERT_MSG(weightTensor, "Invalid input tensor");
235     ARMNN_ASSERT_MSG(permuteBuffer, "Invalid permute buffer");
236 
237     auto multiplier    = weightTensor->GetTensorInfo().GetShape()[0];
238     auto inputChannels = weightTensor->GetTensorInfo().GetShape()[1];
239 
240     // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
241     // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
242 
243     // 1. Permute the weights if necessary
244     // If the data layout is NCHW no permutation is necessary, as a reshape to [ 1, I * M, H, W ] can be better done
245     // starting from the current shape of [ M, I, H, W ]
246     // If no permutation is necessary, leave the permutation vector empty
247     PermutationVector permutationVector{};
248     if (dataLayout == DataLayout::NHWC)
249     {
250         // The data layout is NHWC, then permute the weights from [ M, I, H, W ] to [ H, W, I, M ]
251         permutationVector = { 3, 2, 0, 1 };
252     }
253     ConstTensor weightPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
254 
255     // Shuffle the weights data to obtain the channel order needed used by Acl
256     if (multiplier > 1 && inputChannels > 1 && dataLayout == DataLayout::NCHW)
257     {
258         switch (weightPermuted.GetDataType())
259         {
260             case DataType::Float32:
261                 weightPermuted = ReorderWeightChannelsForAcl<float>(weightPermuted, dataLayout, permuteBuffer);
262                 break;
263             case DataType::Float16:
264                 weightPermuted =
265                     ReorderWeightChannelsForAcl<half_float::half>(weightPermuted, dataLayout, permuteBuffer);
266                 break;
267             case DataType::QAsymmS8:
268             case DataType::QAsymmU8:
269                 weightPermuted = ReorderWeightChannelsForAcl<uint8_t>(weightPermuted, dataLayout, permuteBuffer);
270                 break;
271             case DataType::QSymmS8:
272                 weightPermuted = ReorderWeightChannelsForAcl<int8_t>(weightPermuted, dataLayout, permuteBuffer);
273                 break;
274             default:
275                 break;
276         }
277     }
278 
279     // 2. Reshape the weights
280     ReshapeWeightsForAcl(weightPermuted.GetInfo(), dataLayout);
281 
282     // 3. Return both the tensor and the allocated storage to ensure that the data stays alive
283     return weightPermuted;
284 }
285 
ConvertMaskToACLFormat(int32_t mask,int32_t numDim)286 int32_t ConvertMaskToACLFormat(int32_t mask, int32_t numDim)
287 {
288     int32_t reversedMask = 0;
289     for (unsigned int i = 0; i < armnn::numeric_cast<unsigned int>(numDim); ++i)
290     {
291         // Check if bit set in mask for each dimension
292         int32_t bit = (mask & 1 << i) != 0;
293         // Increment the new mask with the bits reversed
294         reversedMask += (bit << std::max(numDim-(armnn::numeric_cast<int>(i)+1), 0));
295     }
296 
297     return reversedMask;
298 }
299 
CalculateGatherNdKeyIndices(TensorInfo inputInfo0,TensorInfo inputInfo1)300 std::map<std::string, unsigned int> CalculateGatherNdKeyIndices(TensorInfo inputInfo0, TensorInfo inputInfo1)
301 {
302     std::vector<unsigned int> paramsShape;
303     for (unsigned int i = 0; i < inputInfo0.GetNumDimensions(); ++i)
304     {
305         paramsShape.push_back(inputInfo0.GetShape()[i]);
306     }
307 
308     std::vector<unsigned int> indicesShape;
309     for (unsigned int i = 0; i < inputInfo1.GetNumDimensions(); ++i)
310     {
311         indicesShape.push_back(inputInfo1.GetShape()[i]);
312     }
313 
314     std::map<std::string, unsigned int> keyIndices;
315 
316     // N: number of batches
317     keyIndices["N"] = 1;
318 
319     // ND: number of dimensions that are sliced from params
320     keyIndices["ND"] = indicesShape.back();
321 
322     // W: number of indices in each batch (all but the last dimension)
323     keyIndices["W"] =
324         static_cast<unsigned int>(std::accumulate(std::begin(indicesShape),
325                                                   std::end(indicesShape) - 1,
326                                                   1,
327                                                   std::multiplies<>() ));
328     // K: range of each index
329     keyIndices["K"] =
330         static_cast<unsigned int>(std::accumulate(std::begin(paramsShape),
331                                                   std::begin(paramsShape) + static_cast<int>(keyIndices["ND"]),
332                                                   1,
333                                                   std::multiplies<>() ));
334     //  C: number of channels for each index
335     keyIndices["C"] =
336         static_cast<unsigned int>(std::accumulate(std::begin(paramsShape) + static_cast<int>(keyIndices["ND"]),
337                                                   std::end(paramsShape),
338                                                   1,
339                                                   std::multiplies<>() ));
340 
341     return keyIndices;
342 }
343 
GeneratePermutationVectorOnLastTwoDimensions(unsigned int rank)344 armnn::PermutationVector GeneratePermutationVectorOnLastTwoDimensions(unsigned int rank)
345 {
346     armnn::PermutationVector permutationVector{};
347     switch (rank)
348     {
349         case 2:
350             permutationVector = {1U, 0U};
351             break;
352         case 3:
353             permutationVector = {0U, 2U, 1U};
354             break;
355         case 4:
356             permutationVector = {0U, 1U, 3U, 2U};
357             break;
358         default:
359             throw Exception("Invalid number of dimensions.");
360     }
361     return permutationVector;
362 }
363 
364 } // namespace armnn
365