xref: /aosp_15_r20/external/armnn/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "Conv2dTestImpl.hpp"
7 
8 #include <armnnUtils/QuantizeHelper.hpp>
9 #include <armnnUtils/TensorUtils.hpp>
10 
11 #include <armnn/utility/IgnoreUnused.hpp>
12 #include <armnn/utility/NumericCast.hpp>
13 #include <armnnUtils/DataLayoutIndexed.hpp>
14 #include <armnnUtils/Permute.hpp>
15 
16 #include <armnn/backends/TensorHandle.hpp>
17 
18 #include <armnnTestUtils/DataLayoutUtils.hpp>
19 #include <armnnTestUtils/TensorCopyUtils.hpp>
20 #include <armnnTestUtils/WorkloadTestUtils.hpp>
21 
22 #include <armnnTestUtils/TensorHelpers.hpp>
23 
24 #include <string>
25 
26 //
27 // Static data
28 //
29 
30 // 2-channel bias used by a number of Conv2d tests.
31 static std::vector<float> Bias2({0, 2});
32 
33 static std::vector<float> Bias4({1, 2, 3, 4});
34 
35 static std::vector<float> Bias8({1, 2, 3, 4, 1, 2, 3, 4});
36 
37 // 3-channel 16x8 image used as common input data for a number of Conv2d tests.
38 static std::vector<float> ConvInput3x8x16({
39     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
40     0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
41     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
42     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
43     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
44     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
45     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
46     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
47     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
56     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
57     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
58     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
59     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
60     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
61     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
62     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
63 });
64 
65 using namespace armnnUtils;
66 
67 //
68 // Helper templates
69 //
70 
71 // Helper template that returns either Bias2 or an empty vector depending on whether bias is enabled.
72 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
GetBias2(bool biasEnabled,float qScale)73 std::vector<T> GetBias2(bool biasEnabled, float qScale)
74 {
75     if(biasEnabled)
76     {
77         return QuantizedVector<T>(Bias2, qScale, 0);
78     }
79     else
80     {
81         return std::vector<T>();
82     }
83 }
84 
85 // Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
86 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
GetBias4(bool biasEnabled,float qScale)87 std::vector<T> GetBias4(bool biasEnabled, float qScale)
88 {
89     if(biasEnabled)
90     {
91         return QuantizedVector<T>(Bias4, qScale, 0);
92     }
93     else
94     {
95         return std::vector<T>();
96     }
97 }
98 
99 // Helper template that returns either Bias8 or an empty vector depending on whether bias is enabled.
100 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
GetBias8(bool biasEnabled,float qScale)101 std::vector<T> GetBias8(bool biasEnabled, float qScale)
102 {
103     if(biasEnabled)
104     {
105         return QuantizedVector<T>(Bias8, qScale, 0);
106     }
107     else
108     {
109         return std::vector<T>();
110     }
111 }
112 
113 // Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
114 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
GetBias(bool biasEnabled,float qScale,armnn::TensorInfo outputInfo,armnn::DataLayout layout)115 std::vector<T> GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout)
116 {
117     const armnnUtils::DataLayoutIndexed dataLayoutIndexed(layout);
118     const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
119     const unsigned int outputChannels = outputInfo.GetShape()[channelsIndex];
120 
121     switch (outputChannels)
122     {
123         case 2:
124         default:
125         {
126             return GetBias2<ArmnnType>(biasEnabled, qScale);
127         }
128         case 4:
129         {
130             return GetBias4<ArmnnType>(biasEnabled, qScale);
131         }
132         case 8:
133         {
134             return GetBias8<ArmnnType>(biasEnabled, qScale);
135         }
136     }
137 }
138 
139 //
140 // Implementation templates
141 //
142 
143 // Mapping from input type to bias type for fully connected layers.
144 // float => float, uint8_t => int32_t
145 template<typename T>
146 struct FullyConnectedBiasTypeForInputType;
147 
148 template<>
149 struct FullyConnectedBiasTypeForInputType<float>
150 {
151     using Type = float;
152 };
153 
154 template<>
155 struct FullyConnectedBiasTypeForInputType<uint8_t>
156 {
157     using Type = int32_t;
158 };
159 
160 // Modifies a std::vector in-place using a specified bias.
161 template<typename T, typename B>
ApplyBias(std::vector<T> & v,float vScale,int32_t vOffset,const std::vector<B> & bias,float bScale,int32_t bOffset,uint32_t w,uint32_t h)162 void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset,
163     const std::vector<B>& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h)
164 {
165     ARMNN_ASSERT_MSG((armnn::IsQuantizedType<T>() && vScale != 0.0f) || (!armnn::IsQuantizedType<T>()),
166                      "Invalid type and parameter combination.");
167     ARMNN_ASSERT_MSG((armnn::IsQuantizedType<B>() && bScale != 0.0f) || (!armnn::IsQuantizedType<B>()),
168                      "Invalid type and parameter combination.");
169 
170     // Note we need to dequantize and re-quantize the image value and the bias.
171     for (uint32_t i = 0; i < bias.size(); ++i)
172     {
173         float dBias = SelectiveDequantize(bias[i], bScale, bOffset);
174         for (uint32_t y = 0; y < h; ++y)
175         {
176             for (uint32_t x = 0; x < w; ++x)
177             {
178                 uint32_t offset = (i * h + y) * w + x;
179                 ARMNN_ASSERT(offset < v.size());
180                 T& outRef = v[offset];
181                 float dOutput = SelectiveDequantize(outRef, vScale, vOffset);
182                 outRef = SelectiveQuantize<T>(dOutput + dBias, vScale, vOffset);
183             }
184         }
185     }
186 }
187 
188 //
189 // Convolution2d implementations
190 //
191 
192 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
193          typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
SimpleConvolution2dTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const std::vector<T> & originalInput,const std::vector<T> & originalKernel,const std::vector<B> & bias,const std::vector<T> & originalOutputExpected,const armnn::TensorShape & originalInputShape,const armnn::TensorShape & originalKernelShape,const armnn::TensorShape & originalOutputExpectedShape,float qScale,int32_t qOffset,const armnn::DataLayout layout=armnn::DataLayout::NCHW,uint32_t padLeft=0,uint32_t padTop=0,uint32_t padRight=0,uint32_t padBottom=0,uint32_t strideX=1,uint32_t strideY=1,uint32_t dilationX=1,uint32_t dilationY=1)194 LayerTestResult<T, 4> SimpleConvolution2dTestImpl(
195     armnn::IWorkloadFactory& workloadFactory,
196     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
197     const armnn::ITensorHandleFactory& tensorHandleFactory,
198     const std::vector<T>& originalInput,
199     const std::vector<T>& originalKernel,
200     const std::vector<B>& bias,
201     const std::vector<T>& originalOutputExpected,
202     const armnn::TensorShape& originalInputShape,
203     const armnn::TensorShape& originalKernelShape,
204     const armnn::TensorShape& originalOutputExpectedShape,
205     float qScale,
206     int32_t qOffset,
207     const armnn::DataLayout layout = armnn::DataLayout::NCHW,
208     uint32_t padLeft = 0,
209     uint32_t padTop = 0,
210     uint32_t padRight = 0,
211     uint32_t padBottom = 0,
212     uint32_t strideX = 1,
213     uint32_t strideY = 1,
214     uint32_t dilationX = 1,
215     uint32_t dilationY = 1)
216 {
217     armnn::IgnoreUnused(memoryManager);
218     unsigned int inputHeight    = armnn::numeric_cast<unsigned int>(originalInputShape[2]);
219     unsigned int inputWidth     = armnn::numeric_cast<unsigned int>(originalInputShape[3]);
220     unsigned int inputChannels  = armnn::numeric_cast<unsigned int>(originalInputShape[1]);
221     unsigned int inputNum       = armnn::numeric_cast<unsigned int>(originalInputShape[0]);
222 
223     unsigned int outputHeight   = armnn::numeric_cast<unsigned int>(originalOutputExpectedShape[2]);
224     unsigned int outputWidth    = armnn::numeric_cast<unsigned int>(originalOutputExpectedShape[3]);
225     unsigned int outputChannels = armnn::numeric_cast<unsigned int>(originalOutputExpectedShape[1]);
226     unsigned int outputNum      = armnn::numeric_cast<unsigned int>(originalOutputExpectedShape[0]);
227 
228     unsigned int kernelHeight   = armnn::numeric_cast<unsigned int>(originalKernelShape[2]);
229     unsigned int kernelWidth    = armnn::numeric_cast<unsigned int>(originalKernelShape[3]);
230     unsigned int kernelChannels = armnn::numeric_cast<unsigned int>(originalKernelShape[1]);
231     unsigned int kernelDepthMul = armnn::numeric_cast<unsigned int>(originalKernelShape[0]);
232 
233     bool biasEnabled = bias.size() > 0;
234 
235     // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
236     ARMNN_ASSERT(inputNum == 1);
237     ARMNN_ASSERT(outputNum == 1);
238 
239     // If a bias is used, its size must equal the number of output channels.
240     ARMNN_ASSERT(!biasEnabled || bias.size() == outputChannels);
241 
242     // Note these tensors will use two (identical) batches.
243     armnn::TensorInfo inputTensorInfo =
244             armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
245     armnn::TensorInfo outputTensorInfo =
246             armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
247     armnn::TensorInfo kernelDesc =
248             armnnUtils::GetTensorInfo(kernelDepthMul, kernelChannels, kernelHeight, kernelWidth, layout, ArmnnType);
249     armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
250 
251     // Set quantization parameters if the requested type is a quantized type.
252     if(armnn::IsQuantizedType<T>())
253     {
254         inputTensorInfo.SetQuantizationScale(qScale);
255         inputTensorInfo.SetQuantizationOffset(qOffset);
256         outputTensorInfo.SetQuantizationScale(qScale);
257         outputTensorInfo.SetQuantizationOffset(qOffset);
258         kernelDesc.SetQuantizationScale(qScale);
259         kernelDesc.SetQuantizationOffset(qOffset);
260         biasDesc.SetQuantizationScale(qScale*qScale);
261         biasDesc.SetQuantizationOffset(0);
262     }
263 
264     // Construct input data - two batches of the same input image.
265     std::vector<T> inputImage;
266     inputImage.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth);
267     std::vector<T> inputData;
268     inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
269     inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
270 
271     // at this point if we require it permute the input data
272     const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
273     if (layout == armnn::DataLayout::NHWC)
274     {
275         std::vector<T> tmp(inputData.size());
276         armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
277         inputData = tmp;
278     }
279 
280     std::vector<T> outputImage;
281     outputImage.assign(originalOutputExpected.data(),
282             originalOutputExpected.data() + outputChannels*outputHeight*outputWidth);
283 
284     // Apply bias to output image if it is enabled.
285     if(biasEnabled)
286     {
287         std::vector<T> biasV;
288         biasV.assign(bias.data(), bias.data() + outputChannels);
289         ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
290             biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
291             outputWidth, outputHeight);
292     }
293 
294     // Data will be copied from outputHandle
295     std::vector<T> actualOutput(outputTensorInfo.GetNumElements());
296 
297     // Construct expected output data - two identical images.
298     std::vector<T> expectedOutput;
299     expectedOutput.insert(expectedOutput.end(), outputImage.begin(), outputImage.end());
300     expectedOutput.insert(expectedOutput.end(), outputImage.begin(), outputImage.end());
301 
302     // at this point if we require it permute the expected output
303     if (layout == armnn::DataLayout::NHWC)
304     {
305         std::vector<T> tmp(expectedOutput.size());
306         armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, expectedOutput.data(), tmp.data(), sizeof(T));
307         expectedOutput = tmp;
308     }
309 
310     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
311     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
312     std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc);
313 
314     armnn::Convolution2dQueueDescriptor data;
315     armnn::WorkloadInfo info;
316 
317     // Permute the kernel if necessary
318     std::vector<T> kernel = originalKernel;
319     if (layout == armnn::DataLayout::NHWC)
320     {
321         armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data(), sizeof(T));
322     }
323 
324     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
325     AddInputToWorkload(data, info, kernelDesc, weightsHandle.get());
326     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
327 
328     std::unique_ptr<armnn::ITensorHandle> biasHandle = nullptr;
329     if (biasEnabled)
330     {
331         biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
332         AddInputToWorkload(data, info, biasDesc, biasHandle.get());
333     }
334     data.m_Parameters.m_StrideX = strideX;
335     data.m_Parameters.m_StrideY = strideY;
336     data.m_Parameters.m_PadLeft = padLeft;
337     data.m_Parameters.m_PadRight = padRight;
338     data.m_Parameters.m_PadTop = padTop;
339     data.m_Parameters.m_PadBottom = padBottom;
340     data.m_Parameters.m_BiasEnabled = biasEnabled;
341     data.m_Parameters.m_DataLayout = layout;
342     data.m_Parameters.m_DilationX = dilationX;
343     data.m_Parameters.m_DilationY = dilationY;
344 
345     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateWorkload(armnn::LayerType::Convolution2d,
346                                                                                 data,
347                                                                                 info);
348     inputHandle->Allocate();
349     outputHandle->Allocate();
350     weightsHandle->Allocate();
351 
352     if (biasEnabled)
353     {
354         biasHandle->Allocate();
355         CopyDataToITensorHandle(biasHandle.get(), bias.data());
356     }
357 
358     CopyDataToITensorHandle(inputHandle.get(), inputData.data());
359     CopyDataToITensorHandle(weightsHandle.get(), kernel.data());
360 
361     ExecuteWorkload(*workload, memoryManager);
362 
363     CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
364 
365     return LayerTestResult<T, 4>(actualOutput,
366                                  expectedOutput,
367                                  outputHandle->GetShape(),
368                                  outputTensorInfo.GetShape());
369 }
370 
371 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
372          typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>,
373          armnn::DataType OutType = ArmnnType, typename O = armnn::ResolveType<OutType>>
SimpleConvolution2dNhwcTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const std::vector<T> & input,const std::vector<T> & kernel,const std::vector<B> & bias,const std::vector<O> & outputExpected,const armnn::TensorShape & inputShape,const armnn::TensorShape & kernelShape,const armnn::TensorShape & outputExpectedShape,const armnn::DataLayout dataLayout,float qScale,int32_t qOffset,uint32_t padLeft=1,uint32_t padTop=1,uint32_t padRight=1,uint32_t padBottom=1,uint32_t strideX=1,uint32_t strideY=1)374 LayerTestResult<O, 4> SimpleConvolution2dNhwcTestImpl(
375     armnn::IWorkloadFactory& workloadFactory,
376     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
377     const armnn::ITensorHandleFactory& tensorHandleFactory,
378     const std::vector<T>& input,
379     const std::vector<T>& kernel,
380     const std::vector<B>& bias,
381     const std::vector<O>& outputExpected,
382     const armnn::TensorShape& inputShape,
383     const armnn::TensorShape& kernelShape,
384     const armnn::TensorShape& outputExpectedShape,
385     const armnn::DataLayout dataLayout,
386     float qScale,
387     int32_t qOffset,
388     uint32_t padLeft = 1,
389     uint32_t padTop = 1,
390     uint32_t padRight = 1,
391     uint32_t padBottom = 1,
392     uint32_t strideX  = 1,
393     uint32_t strideY  = 1)
394 {
395     armnn::IgnoreUnused(qScale, qOffset);
396     unsigned int inputNum       = armnn::numeric_cast<unsigned int>(inputShape[0]);
397     unsigned int inputChannels  = armnn::numeric_cast<unsigned int>(inputShape[3]);
398     unsigned int inputHeight    = armnn::numeric_cast<unsigned int>(inputShape[1]);
399     unsigned int inputWidth     = armnn::numeric_cast<unsigned int>(inputShape[2]);
400 
401     unsigned int kernelChanMul  = armnn::numeric_cast<unsigned int>(kernelShape[0]);
402     unsigned int kernelChannels = armnn::numeric_cast<unsigned int>(kernelShape[3]);
403     unsigned int kernelHeight   = armnn::numeric_cast<unsigned int>(kernelShape[1]);
404     unsigned int kernelWidth    = armnn::numeric_cast<unsigned int>(kernelShape[2]);
405 
406     unsigned int outputNum      = armnn::numeric_cast<unsigned int>(outputExpectedShape[0]);
407     unsigned int outputChannels = armnn::numeric_cast<unsigned int>(outputExpectedShape[3]);
408     unsigned int outputHeight   = armnn::numeric_cast<unsigned int>(outputExpectedShape[1]);
409     unsigned int outputWidth    = armnn::numeric_cast<unsigned int>(outputExpectedShape[2]);
410 
411     bool biasEnabled = bias.size() > 0;
412 
413     // Creates the tensors.
414     armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, ArmnnType);
415     armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels},
416                                        OutType);
417     armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, ArmnnType);
418     armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
419 
420     // Construct the input data.
421     std::vector<T> inputData;
422     inputData.assign(input.data(), input.data() + inputHeight*inputWidth*inputChannels);
423 
424     // Construct the output data, with bias applied, as appropriate.
425     std::vector<O> outputData;
426     outputData.assign(outputExpected.data(), outputExpected.data() + outputHeight*outputWidth*outputChannels);
427 
428     std::vector<O> actualOutput(outputTensorInfo.GetNumElements());
429 
430     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
431     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
432     std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc);
433     std::unique_ptr<armnn::ITensorHandle> biasHandle = nullptr;
434 
435 //    armnn::ScopedTensorHandle weightsTensor(kernelDesc);
436 //    AllocateAndCopyDataToITensorHandle(&weightsTensor, kernel.data());
437 
438 //    armnn::ScopedTensorHandle biasTensor(biasDesc);
439 
440     armnn::Convolution2dQueueDescriptor data;
441 
442     data.m_Parameters.m_StrideX = strideX;
443     data.m_Parameters.m_StrideY = strideY;
444     data.m_Parameters.m_PadLeft = padLeft;
445     data.m_Parameters.m_PadRight = padRight;
446     data.m_Parameters.m_PadTop = padTop;
447     data.m_Parameters.m_PadBottom = padBottom;
448     data.m_Parameters.m_BiasEnabled = biasEnabled;
449     data.m_Parameters.m_DataLayout = dataLayout;
450 
451     armnn::WorkloadInfo info;
452     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
453     AddInputToWorkload(data, info, kernelDesc, weightsHandle.get());
454     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
455 
456     if (biasEnabled)
457     {
458         biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
459         AddInputToWorkload(data, info, biasDesc, biasHandle.get());
460     }
461 
462     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateWorkload(armnn::LayerType::Convolution2d,
463                                                                                 data,
464                                                                                 info);
465     inputHandle->Allocate();
466     outputHandle->Allocate();
467     weightsHandle->Allocate();
468 
469     if (biasEnabled)
470     {
471         biasHandle->Allocate();
472         CopyDataToITensorHandle(biasHandle.get(), bias.data());
473     }
474 
475     CopyDataToITensorHandle(inputHandle.get(), inputData.data());
476     CopyDataToITensorHandle(weightsHandle.get(), kernel.data());
477 
478     ExecuteWorkload(*workload, memoryManager);
479 
480     CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
481 
482     return LayerTestResult<O, 4>(actualOutput,
483                                  outputData,
484                                  outputHandle->GetShape(),
485                                  outputTensorInfo.GetShape());
486 }
487 
488 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
Convolution1dTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool biasEnabled)489 LayerTestResult<T,4> Convolution1dTestImpl(
490     armnn::IWorkloadFactory& workloadFactory,
491     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
492     const armnn::ITensorHandleFactory& tensorHandleFactory,
493     float qScale,
494     int32_t qOffset,
495     bool biasEnabled)
496 {
497     using B = armnn::ResolveType<ArmnnBType>;
498     // Until we have a specialist 1D convolution layer, we can fake one using
499     // 2D convolution with the final dimension set to 1.
500     // I don't anticipate this being particularly slow, given that convolution is implemented
501     // as a matrix multiplication, at which point dimension doesn't matter.
502 
503     unsigned int batchSize      = 1;
504     unsigned int inputChannels  = 2;
505     unsigned int outputChannels = 3;
506     unsigned int inputSize      = 5; // The 1D size (could view as 'width' or 'height').
507     unsigned int kernelSize     = 3;
508     unsigned int padSize        = 2;
509     unsigned int stride         = 1;
510     unsigned int outputSize     = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride.
511 
512     armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, ArmnnType);
513     armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, ArmnnType);
514     armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, ArmnnType);
515     armnn::TensorInfo biasInfo({outputChannels}, ArmnnBType);
516 
517     // Set quantization parameters if the requested type is a quantized type.
518     if(armnn::IsQuantizedType<T>())
519     {
520         inputInfo.SetQuantizationScale(qScale);
521         inputInfo.SetQuantizationOffset(qOffset);
522         outputInfo.SetQuantizationScale(qScale);
523         outputInfo.SetQuantizationOffset(qOffset);
524         kernelInfo.SetQuantizationScale(qScale);
525         kernelInfo.SetQuantizationOffset(qOffset);
526         biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale());
527         biasInfo.SetQuantizationOffset(0);
528     }
529 
530     std::vector<T> inputData = QuantizedVector<T>(
531         {
532              5.0f, -2.0f, 2.5f, 0.0f, 1.0f,
533             -3.0f,  3.2f, 5.0f, 2.0f, 3.0f,
534         },
535         inputInfo.GetQuantizationScale(),
536         inputInfo.GetQuantizationOffset());
537 
538     std::vector<T> kernelData = QuantizedVector<T>(
539         {
540             1.0f,  0.0f,  0.0f,
541             0.0f,  2.0f, -1.5f,
542 
543             0.0f,  0.0f,  0.0f,
544             0.2f,  0.2f,  0.2f,
545 
546             0.5f,  0.0f,  0.5f,
547             0.0f, -1.0f,  0.0f
548         },
549         kernelInfo.GetQuantizationScale(),
550         kernelInfo.GetQuantizationOffset());
551 
552     std::vector<B> biasData =
553         QuantizedVector<B>({ 1.0f, 0.0f, 0.0f }, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset());
554 
555     std::vector<T> outputData = QuantizedVector<T>(
556         {
557              4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f,
558             -0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f,
559              2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f
560         },
561         outputInfo.GetQuantizationScale(),
562         outputInfo.GetQuantizationOffset());
563 
564     std::vector<T> actualOutput(outputInfo.GetNumElements());
565 
566     // Optionally apply bias to output image.
567     if(biasEnabled)
568     {
569         ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(),
570             biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(),
571             1, outputSize);
572     }
573 
574     std::unique_ptr<armnn::ITensorHandle> inputHandle  = tensorHandleFactory.CreateTensorHandle(inputInfo);
575     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
576     std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelInfo);
577     std::unique_ptr<armnn::ITensorHandle> biasHandle = nullptr;
578 
579     armnn::Convolution2dQueueDescriptor data;
580     armnn::WorkloadInfo info;
581 //    armnn::ScopedTensorHandle weightsTensor(kernelInfo);
582 //    armnn::ScopedTensorHandle biasTensor(biasInfo);
583 //
584 //    AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data());
585 //    AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
586 
587     AddInputToWorkload(data, info, inputInfo, inputHandle.get());
588     AddInputToWorkload(data, info, kernelInfo, weightsHandle.get());
589     AddOutputToWorkload(data, info, outputInfo, outputHandle.get());
590 
591     data.m_Parameters.m_StrideX = 1;
592     data.m_Parameters.m_StrideY = stride;
593     data.m_Parameters.m_PadLeft = 0;
594     data.m_Parameters.m_PadRight = 0;
595     data.m_Parameters.m_PadTop = padSize;
596     data.m_Parameters.m_PadBottom = padSize;
597     data.m_Parameters.m_BiasEnabled = biasEnabled;
598 
599     if (biasEnabled)
600     {
601         biasHandle = tensorHandleFactory.CreateTensorHandle(biasInfo);
602         AddInputToWorkload(data, info, biasInfo, biasHandle.get());
603     }
604 
605     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateWorkload(armnn::LayerType::Convolution2d,
606                                                                                 data,
607                                                                                 info);
608     inputHandle->Allocate();
609     outputHandle->Allocate();
610     weightsHandle->Allocate();
611 
612     if (biasEnabled)
613     {
614         biasHandle->Allocate();
615         CopyDataToITensorHandle(biasHandle.get(), biasData.data());
616     }
617 
618     CopyDataToITensorHandle(inputHandle.get(), inputData.data());
619     CopyDataToITensorHandle(weightsHandle.get(), kernelData.data());
620 
621     ExecuteWorkload(*workload, memoryManager);
622 
623     CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
624 
625     return LayerTestResult<T, 4>(actualOutput,
626                                  outputData,
627                                  outputHandle->GetShape(),
628                                  outputInfo.GetShape());
629 }
630 
631 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
SimpleConvolution2d3x3NhwcTestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool biasEnabled,armnn::DataLayout dataLayout)632 LayerTestResult<T, 4> SimpleConvolution2d3x3NhwcTestCommon(
633     armnn::IWorkloadFactory& workloadFactory,
634     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
635     const armnn::ITensorHandleFactory& tensorHandleFactory,
636     float qScale,
637     int32_t qOffset,
638     bool biasEnabled,
639     armnn::DataLayout dataLayout)
640 {
641     armnn::IgnoreUnused(biasEnabled);
642     // Use common single-batch 5x5 image.
643 
644     armnn::TensorInfo inputDesc({ 1, 3, 4, 1 }, ArmnnType);
645     std::vector<T> input =
646     {
647         1, 5, 2, 3,
648         8, 7, 3, 6,
649         3, 3, 9, 1
650     };
651 
652     // Use a 2-element batch of 3-channel 3x3 kernels.
653     armnn::TensorInfo kernelDesc({ 1, 3, 3, 1 }, ArmnnType);
654     std::vector<T> kernel =
655     {
656         4, 5, 6,
657         0, 0, 0,
658         3, 2, 1
659     };
660 
661     // Expected output is 1 batch of a 5x5 image.
662     armnn::TensorInfo outputDesc({ 1, 3, 4, 1 }, ArmnnType);
663     const std::vector<float> outputData =
664     {
665         23, 41, 33, 21,
666         44, 65, 76, 52,
667         82, 85, 79, 42
668     };
669 
670     return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
671         workloadFactory,
672         memoryManager,
673         tensorHandleFactory,
674         input,
675         kernel,
676         std::vector<T>(),
677         outputData,
678         inputDesc.GetShape(),
679         kernelDesc.GetShape(),
680         outputDesc.GetShape(),
681         dataLayout,
682         qScale,
683         qOffset);
684 }
685 
686 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
SimpleConvolution2d3x3Stride2x2TestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool biasEnabled,const armnn::DataLayout & dataLayout)687 LayerTestResult<T, 4> SimpleConvolution2d3x3Stride2x2TestCommon(
688         armnn::IWorkloadFactory& workloadFactory,
689         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
690         const armnn::ITensorHandleFactory& tensorHandleFactory,
691         float qScale,
692         int32_t qOffset,
693         bool biasEnabled,
694         const armnn::DataLayout& dataLayout)
695 {
696     armnn::IgnoreUnused(biasEnabled);
697 
698     // Input is a single-batch, 1 channel, 5x5 image.
699     armnn::TensorInfo inputDesc({ 1, 5, 5, 1 }, ArmnnType);
700     std::vector<T> input =
701     {
702         1, 5, 2, 3, 5,
703         8, 7, 3, 6, 3,
704         3, 3, 9, 1, 9,
705         4, 1, 8, 1, 3,
706         6, 8, 1, 9, 2
707     };
708 
709     // Use a 3x3 kernel.
710     armnn::TensorInfo kernelDesc({ 1, 3, 3, 1 }, ArmnnType);
711     std::vector<T> kernel =
712     {
713         4, 5, 6,
714         0, 0, 0,
715         3, 2, 1
716     };
717 
718     // Expected output is a single-batch, 1 channel, 3x3 image.
719     armnn::TensorInfo outputDesc({ 1, 3, 3, 1 }, ArmnnType);
720     std::vector<T> outputData =
721     {
722         23, 33, 24,
723         91, 99, 48,
724         26, 50, 19
725     };
726 
727     uint32_t padLeft = 1;
728     uint32_t padTop = 1;
729     uint32_t padRight = 1;
730     uint32_t padBottom = 1;
731     uint32_t strideX  = 2;
732     uint32_t strideY  = 2;
733 
734     return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
735         workloadFactory,
736         memoryManager,
737         tensorHandleFactory,
738         input,
739         kernel,
740         std::vector<T>(),
741         outputData,
742         inputDesc.GetShape(),
743         kernelDesc.GetShape(),
744         outputDesc.GetShape(),
745         dataLayout,
746         qScale,
747         qOffset,
748         padLeft,
749         padTop,
750         padRight,
751         padBottom,
752         strideX,
753         strideY);
754 }
755 
756 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
SimpleConvolution2d3x5TestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool biasEnabled,const armnn::DataLayout layout)757 LayerTestResult<T, 4> SimpleConvolution2d3x5TestCommon(
758     armnn::IWorkloadFactory& workloadFactory,
759     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
760     const armnn::ITensorHandleFactory& tensorHandleFactory,
761     float qScale,
762     int32_t qOffset,
763     bool biasEnabled,
764     const armnn::DataLayout layout)
765 {
766     // Use common single-batch 3-channel 16x8 image.
767     armnn::TensorInfo inputDesc({ 1, 3, 8, 16 }, ArmnnType);
768     std::vector<T> input = QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset);
769 
770     // Use a 2-element batch with 3-channel 3x5 kernels.
771     armnn::TensorInfo kernelDesc({ 2, 3, 5, 3 }, ArmnnType);
772     std::vector<T> kernel = QuantizedVector<T>({
773             1,  1, 1,
774             1, -1, 1,
775             1,  1, 1,
776             1,  1, 1,
777             1,  1, 1,
778 
779             0,  0, 0,
780             0,  0, 0,
781             0,  0, 0,
782             0,  0, 0,
783             0,  0, 0,
784 
785             2,  2, 2,
786             2,  2, 2,
787             2,  2, 2,
788             2,  2, 2,
789             2,  2, 2,
790 
791 
792             0,  0, 0,
793             0,  0, 0,
794             0,  0, 0,
795             0,  0, 0,
796             0,  0, 0,
797 
798             1,  1, 1,
799             1,  1, 1,
800             1,  1, 1,
801             1,  1, 1,
802             1,  1, 1,
803 
804             0,  0, 0,
805             0,  0, 0,
806             0,  0, 0,
807             0,  0, 0,
808             0,  0, 0
809         },
810         qScale, qOffset);
811 
812     // Expected output is 2 batch elements of a 1-channel 14x4 image.
813     armnn::TensorInfo outputDesc({ 1, 2, 4, 14 }, ArmnnType);
814     std::vector<T> expectedOutput = QuantizedVector<T>({
815             -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
816             -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
817             -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
818             -23.5f, -23.5f, -23.5f,
819             -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
820             -23.5f, -23.5f, -23.5f,
821 
822             5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
823             5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
824             5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
825             5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
826         },
827         qScale, qOffset);
828 
829     return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
830         workloadFactory,
831         memoryManager,
832         tensorHandleFactory,
833         input,
834         kernel,
835         GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
836         expectedOutput,
837         inputDesc.GetShape(),
838         kernelDesc.GetShape(),
839         outputDesc.GetShape(),
840         qScale,
841         qOffset,
842         layout);
843 }
844 
845 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
846          typename T = armnn::ResolveType<ArmnnType>>
SimpleConvolution2d3x3TestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool biasEnabled,const armnn::DataLayout layout)847 LayerTestResult<T, 4> SimpleConvolution2d3x3TestCommon(
848     armnn::IWorkloadFactory& workloadFactory,
849     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
850     const armnn::ITensorHandleFactory& tensorHandleFactory,
851     float qScale,
852     int32_t qOffset,
853     bool biasEnabled,
854     const armnn::DataLayout layout)
855 {
856     // Use a 3x3 kernel, which exercises ArmCompute's direct convolution path.
857 
858     // Use common single-batch 3-channel 16x8 image.
859     armnn::TensorInfo inputDesc({ 1, 3, 8, 16 }, ArmnnType);
860     std::vector<unsigned int> inputShape = { 1, 3, 8, 16 };
861     std::vector<T> input = QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset);
862 
863     // Use a 2-element batch of 3-channel 3x3 kernels.
864     armnn::TensorInfo kernelDesc({ 2, 3, 3, 3 }, ArmnnType);
865     std::vector<T> kernel = QuantizedVector<T>({
866             1,  1, 1,
867             1, -1, 1,
868             1,  1, 1,
869 
870             0,  0, 0,
871             0,  0, 0,
872             0,  0, 0,
873 
874             2,  2, 2,
875             2,  2, 2,
876             2,  2, 2,
877 
878 
879             0,  0, 0,
880             0,  0, 0,
881             0,  0, 0,
882 
883             1,  1, 1,
884             1,  1, 1,
885             1,  1, 1,
886 
887             0,  0, 0,
888             0,  0, 0,
889             0,  0, 0
890         },
891         qScale, qOffset);
892 
893     // Expected output is 1 batch of a 2-channel 14x6 image.
894     armnn::TensorInfo outputDesc({ 1, 2, 6, 14 }, ArmnnType);
895     std::vector<T> expectedOutput = QuantizedVector<T>({
896             -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
897             -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
898             -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
899             -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
900             -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
901             -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
902 
903             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
904             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
905             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
906             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
907             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
908             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
909         },
910         qScale, qOffset);
911 
912     return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
913         workloadFactory,
914         memoryManager,
915         tensorHandleFactory,
916         input,
917         kernel,
918         GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
919         expectedOutput,
920         inputDesc.GetShape(),
921         kernelDesc.GetShape(),
922         outputDesc.GetShape(),
923         qScale,
924         qOffset,
925         layout);
926 }
927 
928 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
929          typename T = armnn::ResolveType<ArmnnType>>
Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const armnn::DataLayout layout,float qScale,int32_t qOffset)930 LayerTestResult<T, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(
931     armnn::IWorkloadFactory& workloadFactory,
932     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
933     const armnn::ITensorHandleFactory& tensorHandleFactory,
934     const armnn::DataLayout layout,
935     float qScale,
936     int32_t qOffset)
937 {
938     // Use a single-batch 1-channel 3x3 image as input.
939     armnn::TensorInfo inputDesc({ 1, 1, 3, 3 }, ArmnnType);
940     std::vector<T> input =
941         QuantizedVector<T>({
942             11,21,31,
943             12,22,32,
944             13,23,33
945         },
946         qScale, qOffset);
947 
948     // Use 1 batch of a 1-channel 2x2 kernel.
949     armnn::TensorInfo kernelDesc({ 1, 1, 2, 2 }, ArmnnType);
950     std::vector<T> kernel =
951         QuantizedVector<T>({
952             -11,-21,
953             -12,-22,
954         },
955         qScale, qOffset);
956 
957 // Expected output is 1 batch of a 1-channel 6x8 image.
958 // Manually calculated like this:
959 //[-11*0 -21*0  -12*0 -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0 -12*0  -22*0 ..]
960 //[-11*0 -21*0  -12*0 -22*11 ; -11*0  -21*0  -12*11 -22*21 ; -11*0  -21*0  -12*21 -22*31 ; -11*0  -21*0 -12*31 -22*0 ..]
961 //[-11*0 -21*11 -12*0 -22*12 ; -11*11 -21*21 -12*12 -22*22 ; -11*21 -21*31 -12*22 -22*32 ; -11*31 -21*0 -12*32 -22*0 ..]
962 //[-11*0 -21*12 -12*0 -22*13 ; -11*12 -21*22 -12*13 -22*23 ; -11*22 -21*32 -12*23 -22*33 ; -11*32 -21*0 -12*33 -22*0 ..]
963 //[-11*0 -21*13 -12*0 -22*0  ; -11*13 -21*23 -12*0  -22*0  ; -11*23 -21*33 -12*0  -22*0  ; -11*33 -21*0 -12*0  -22*0 ..]
964 //[-11*0 -21*0  -12*0 -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0 -12*0  -22*0 ..]
965 //[..... .....  ..... .....  ; .....  .....  .....  .....  ; .....  .....  .....  .....  ; .....  ..... .....  ..... ..]
966     armnn::TensorInfo outputDesc({ 1, 1, 8, 6 }, ArmnnType);
967     std::vector<T> expectedOutput =
968         QuantizedVector<T>({
969                0,    0,      0,    0,    0,    0,
970             -242,  -594,  -934, -372,    0,    0,
971             -495, -1190, -1850, -725,    0,    0,
972             -538, -1256, -1916, -748,    0,    0,
973             -273, -626,  -946,  -363,    0,    0,
974                0,    0,     0,     0,    0,    0,
975                0,    0,     0,     0,    0,    0,
976                0,    0,     0,     0,    0,    0
977         },
978         qScale, qOffset);
979 
980     return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
981         workloadFactory,
982         memoryManager,
983         tensorHandleFactory,
984         input,
985         kernel,
986         GetBias2<ArmnnBType>(false, qScale * qScale),
987         expectedOutput,
988         inputDesc.GetShape(),
989         kernelDesc.GetShape(),
990         outputDesc.GetShape(),
991         qScale,
992         qOffset,
993         layout,
994         1,  // Padding left.
995         2,  // Padding top.
996         3,  // Padding right.
997         4); // Padding bottom.
998 }
999 
1000 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
1001          typename T = armnn::ResolveType<ArmnnType>>
SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const armnn::DataLayout layout,float qScale,int32_t qOffset)1002 LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(
1003     armnn::IWorkloadFactory& workloadFactory,
1004     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1005     const armnn::ITensorHandleFactory& tensorHandleFactory,
1006     const armnn::DataLayout layout,
1007     float qScale,
1008     int32_t qOffset)
1009 {
1010     // Use a single-batch 1-channel 5x5 image as input.
1011     armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, ArmnnType);
1012     std::vector<T> input =
1013         QuantizedVector<T>({
1014             11,21,31,41,51,
1015             12,22,32,42,52,
1016             13,23,33,43,53,
1017             14,24,34,44,54,
1018             15,25,35,45,55,
1019         }, qScale, qOffset);
1020 
1021     // Use 1 batch of a 1-channel 4x4 kernel.
1022     armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, ArmnnType);
1023     std::vector<T> kernel =
1024         QuantizedVector<T>({
1025             -11,-21,-31,-41,
1026             -12,-22,-32,-42,
1027             -13,-23,-33,-43,
1028             -14,-24,-34,-44,
1029         },
1030         qScale, qOffset);
1031 
1032     // Expected output is 1 batch of a 1-channel 5x5 image.
1033     armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, ArmnnType);
1034     std::vector<T> expectedOutput =
1035         QuantizedVector<T>({
1036             -7140, -10580, -13940,  -9300, -5230,
1037             -9590, -14120, -18520, -12290, -6860,
1038             -9980, -14560, -18960, -12560, -7000,
1039             -7518, -10904, -14144,  -9318, -5152,
1040             -5032,  -7256,  -9376,  -6142, -3368,
1041         },
1042         qScale, qOffset);
1043 
1044     return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
1045         workloadFactory,
1046         memoryManager,
1047         tensorHandleFactory,
1048         input,
1049         kernel,
1050         GetBias2<ArmnnBType>(false, qScale * qScale),
1051         expectedOutput,
1052         inputDesc.GetShape(),
1053         kernelDesc.GetShape(),
1054         outputDesc.GetShape(),
1055         qScale,
1056         qOffset,
1057         layout,
1058         1,  // Padding left.
1059         1,  // Padding top.
1060         2,  // Padding right.
1061         2); // Padding bottom.
1062 }
1063 
1064 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
Convolution2d3x3DilationTestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const std::vector<float> & inputNoQuantizedValues,armnn::TensorInfo & inputTensorInfo,const std::vector<float> & kernelNoQuantizedValues,armnn::TensorInfo & kernelTensorInfo,const std::vector<float> & outputExpectedNoQuantizedValues,armnn::TensorInfo & outputTensorInfo,uint32_t dilationX,uint32_t dilationY,armnn::DataLayout layout=armnn::DataLayout::NCHW,uint32_t padLeft=0,uint32_t padTop=0,uint32_t padRight=0,uint32_t padBottom=0,uint32_t strideX=1,uint32_t strideY=1,bool biasEnabled=false)1065 LayerTestResult<T, 4> Convolution2d3x3DilationTestCommon(
1066     armnn::IWorkloadFactory& workloadFactory,
1067     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1068     const armnn::ITensorHandleFactory& tensorHandleFactory,
1069     const std::vector<float>& inputNoQuantizedValues,
1070     armnn::TensorInfo& inputTensorInfo,
1071     const std::vector<float>& kernelNoQuantizedValues,
1072     armnn::TensorInfo& kernelTensorInfo,
1073     const std::vector<float>& outputExpectedNoQuantizedValues,
1074     armnn::TensorInfo& outputTensorInfo,
1075     uint32_t dilationX,
1076     uint32_t dilationY,
1077     armnn::DataLayout layout = armnn::DataLayout::NCHW,
1078     uint32_t padLeft = 0,
1079     uint32_t padTop = 0,
1080     uint32_t padRight = 0,
1081     uint32_t padBottom = 0,
1082     uint32_t strideX  = 1,
1083     uint32_t strideY  = 1,
1084     bool biasEnabled = false
1085 )
1086 {
1087     float qScale;
1088     int32_t qOffset;
1089     switch (ArmnnType)
1090     {
1091         case armnn::DataType::QAsymmU8:
1092         case armnn::DataType::QAsymmS8:
1093         {
1094             qScale = 0.1f;
1095             qOffset = 128;
1096             break;
1097         }
1098         case armnn::DataType::QSymmS16:
1099         {
1100             qScale = 0.1f;
1101             qOffset = 0;
1102             break;
1103         }
1104         case armnn::DataType::Float32:
1105         default:
1106         {
1107             qScale = 0.f;
1108             qOffset = 0;
1109             break;
1110         }
1111     }
1112 
1113     inputTensorInfo.SetQuantizationScale(qScale);
1114     inputTensorInfo.SetQuantizationOffset(qOffset);
1115     kernelTensorInfo.SetQuantizationScale(qScale);
1116     kernelTensorInfo.SetQuantizationOffset(qOffset);
1117     outputTensorInfo.SetQuantizationScale(qScale);
1118     outputTensorInfo.SetQuantizationOffset(qOffset);
1119 
1120     auto input = QuantizedVector<T>(inputNoQuantizedValues,
1121                                     inputTensorInfo.GetQuantizationScale(),
1122                                     inputTensorInfo.GetQuantizationOffset());
1123     auto kernel = QuantizedVector<T>(kernelNoQuantizedValues,
1124                                      kernelTensorInfo.GetQuantizationScale(),
1125                                      kernelTensorInfo.GetQuantizationOffset());
1126     auto expectedOutput = QuantizedVector<T>(outputExpectedNoQuantizedValues,
1127                                              outputTensorInfo.GetQuantizationScale(),
1128                                              outputTensorInfo.GetQuantizationOffset());
1129 
1130     return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
1131             workloadFactory,
1132             memoryManager,
1133             tensorHandleFactory,
1134             input,
1135             kernel,
1136             GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
1137             expectedOutput,
1138             inputTensorInfo.GetShape(),
1139             kernelTensorInfo.GetShape(),
1140             outputTensorInfo.GetShape(),
1141             qScale,
1142             qOffset,
1143             layout,
1144             padLeft,
1145             padTop,
1146             padRight,
1147             padBottom,
1148             strideX,
1149             strideY,
1150             dilationX,
1151             dilationY);
1152 }
1153 
1154 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
Convolution2d3x3Dilation3x3Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)1155 LayerTestResult<T, 4> Convolution2d3x3Dilation3x3Test(
1156     armnn::IWorkloadFactory& workloadFactory,
1157     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1158     const armnn::ITensorHandleFactory& tensorHandleFactory,
1159     bool biasEnabled,
1160     const armnn::DataLayout layout)
1161 {
1162     armnn::TensorInfo inputTensorInfo({ 1, 1, 10, 10 }, ArmnnType);
1163     std::vector<float> inputNoQuantizedValues =
1164     {
1165         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1166         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1167         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1168         0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1169         0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1170         0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1171         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1172         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1173         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1174         0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1175     };
1176 
1177     armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
1178     std::vector<float> kernelNoQuantizedValues =
1179     {
1180         1, 2, 3,
1181         4, 5, 6,
1182         7, 8, 9
1183     };
1184 
1185     // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
1186     // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
1187     armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
1188     std::vector<float> outputExpectedNoQuantizedValues =
1189     {
1190         6., 5., 5., 5.,
1191         6., 5., 5., 5.,
1192         6., 5., 5., 5.,
1193         3., 2., 2., 2.
1194     };
1195 
1196     return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1197             workloadFactory,
1198             memoryManager,
1199             tensorHandleFactory,
1200             inputNoQuantizedValues,
1201             inputTensorInfo,
1202             kernelNoQuantizedValues,
1203             kernelTensorInfo,
1204             outputExpectedNoQuantizedValues,
1205             outputTensorInfo,
1206             3,
1207             3,
1208             layout,
1209             biasEnabled);
1210 }
1211 
1212 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
Convolution2d2x3x3Dilation3x3Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)1213 LayerTestResult<T, 4> Convolution2d2x3x3Dilation3x3Test(
1214     armnn::IWorkloadFactory& workloadFactory,
1215     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1216     const armnn::ITensorHandleFactory& tensorHandleFactory,
1217     bool biasEnabled,
1218     const armnn::DataLayout layout)
1219 {
1220     armnn::TensorInfo inputTensorInfo({ 1, 2, 10, 10 }, ArmnnType);
1221     std::vector<float> inputNoQuantizedValues =
1222     {
1223         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1224         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1225         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1226         0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1227         0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1228         0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1229         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1230         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1231         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1232         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1233 
1234         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1235         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1236         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1237         0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1238         0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1239         0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1240         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1241         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1242         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1243         0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1244     };
1245 
1246     armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3 }, ArmnnType);
1247     std::vector<float> kernelNoQuantizedValues =
1248     {
1249         1, 2, 3,
1250         4, 5, 6,
1251         7, 8, 9,
1252 
1253         1, 2, 3,
1254         4, 5, 6,
1255         7, 8, 9
1256     };
1257 
1258     // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
1259     // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
1260     armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4 }, ArmnnType);
1261     std::vector<float> outputExpectedNoQuantizedValues =
1262     {
1263         12., 10., 10., 10.,
1264         12., 10., 10., 10.,
1265         12., 10., 10., 10.,
1266          6.,  4.,  4.,  4.
1267     };
1268 
1269     return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1270             workloadFactory,
1271             memoryManager,
1272             tensorHandleFactory,
1273             inputNoQuantizedValues,
1274             inputTensorInfo,
1275             kernelNoQuantizedValues,
1276             kernelTensorInfo,
1277             outputExpectedNoQuantizedValues,
1278             outputTensorInfo,
1279             3,
1280             3,
1281             layout,
1282             biasEnabled);
1283 }
1284 
1285 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)1286 LayerTestResult<T, 4> Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test(
1287         armnn::IWorkloadFactory& workloadFactory,
1288         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1289         const armnn::ITensorHandleFactory& tensorHandleFactory,
1290         bool biasEnabled,
1291         const armnn::DataLayout layout)
1292 {
1293     armnn::TensorInfo inputTensorInfo({ 1, 1, 10, 10 }, ArmnnType);
1294     std::vector<float> inputNoQuantizedValues =
1295     {
1296         1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1297         1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1298         1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1299         1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1300         1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1301         1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1302         1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1303         1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1304         1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1305         1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1306     };
1307 
1308     armnn::TensorInfo kernelTensorInfo({ 1, 1, 2, 2 }, ArmnnType);
1309     std::vector<float> kernelNoQuantizedValues =
1310     {
1311         1, 2,
1312         3, 4
1313     };
1314 
1315     // Since the dilation rate is 2 this will dilate the kernel to be like 3x3: d(K-1)+1 --> 2 x (2-1) + 1 = 3,
1316     // therefore the output will be 4x4: (I − K + 2P)/S +1 => trunc ( (10 - 3 + 2x2 ) / 3 + 1 )
1317     // where, dilation size = d = 2; kernel size = K = 2; input size = I = 10; padding size = P = 2; stride = S = 3
1318     armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
1319     std::vector<float> outputExpectedNoQuantizedValues =
1320     {
1321         4,  7,  7, 3,
1322         6, 10, 10, 4,
1323         6, 10, 10, 4,
1324         2,  3,  3, 1
1325     };
1326     uint32_t padLeft = 1;
1327     uint32_t padTop = 1;
1328     uint32_t padRight = 1;
1329     uint32_t padBottom = 1;
1330 
1331     return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1332             workloadFactory,
1333             memoryManager,
1334             tensorHandleFactory,
1335             inputNoQuantizedValues,
1336             inputTensorInfo,
1337             kernelNoQuantizedValues,
1338             kernelTensorInfo,
1339             outputExpectedNoQuantizedValues,
1340             outputTensorInfo,
1341             2,
1342             2,
1343             layout,
1344             padLeft,
1345             padTop,
1346             padRight,
1347             padBottom,
1348             3,
1349             3,
1350             biasEnabled
1351             );
1352 }
1353 
1354 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
CompareConvolution2dTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,armnn::IWorkloadFactory & refWorkloadFactory,const armnn::ITensorHandleFactory & tensorHandleFactory,const armnn::ITensorHandleFactory & refTensorHandleFactory)1355 LayerTestResult<T,4> CompareConvolution2dTestImpl(
1356     armnn::IWorkloadFactory& workloadFactory,
1357     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1358     armnn::IWorkloadFactory& refWorkloadFactory,
1359     const armnn::ITensorHandleFactory& tensorHandleFactory,
1360     const armnn::ITensorHandleFactory& refTensorHandleFactory)
1361 {
1362     unsigned int inputHeight   = 8;
1363     unsigned int inputWidth    = 16;
1364     unsigned int inputChannels = 3;
1365     unsigned int inputNum      = 5;
1366 
1367     unsigned int kernelHeight = 3;
1368     unsigned int kernelWidth  = 3;
1369 
1370     unsigned int strideX = 2;
1371     unsigned int strideY = 3;
1372     unsigned int padX    = 1;
1373     unsigned int padY    = 1;
1374 
1375     unsigned int outputNum      = inputNum;
1376     unsigned int outputChannels = 2;
1377     unsigned int outputHeight   = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
1378     unsigned int outputWidth    = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
1379 
1380     armnn::TensorInfo inputTensorInfo;
1381     armnn::TensorInfo outputTensorInfo;
1382     armnn::TensorInfo kernelDesc;
1383     armnn::TensorInfo biasDesc;
1384 
1385     unsigned int inputShape[]  = {inputNum, inputChannels, inputHeight, inputWidth};
1386     unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth};
1387     unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth};
1388     unsigned int biasShape[]   = {outputChannels};
1389 
1390     inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
1391     outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
1392     kernelDesc = armnn::TensorInfo(4, kernelShape, ArmnnType);
1393     biasDesc = armnn::TensorInfo(1, biasShape, ArmnnType);
1394 
1395     auto input  = MakeRandomTensor<T>(inputTensorInfo, 124908);
1396     auto kernel = MakeRandomTensor<T>(kernelDesc, 891234);
1397     auto bias   = MakeRandomTensor<T>(biasDesc, 1028);
1398 
1399     std::vector<T> actualOutput(outputTensorInfo.GetNumElements());
1400     std::vector<T> expectedOutput(outputTensorInfo.GetNumElements());
1401 
1402     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
1403     std::unique_ptr<armnn::ITensorHandle> biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
1404     std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc);
1405     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
1406 
1407     armnn::Convolution2dQueueDescriptor data;
1408     armnn::WorkloadInfo info;
1409 
1410     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1411     AddInputToWorkload(data, info, kernelDesc, weightsHandle.get());
1412     AddInputToWorkload(data, info, biasDesc, biasHandle.get());
1413     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1414 
1415     AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernel.data());
1416     AllocateAndCopyDataToITensorHandle(biasHandle.get(), bias.data());
1417 
1418     data.m_Parameters.m_StrideX = strideX;
1419     data.m_Parameters.m_StrideY = strideY;
1420     data.m_Parameters.m_PadLeft = padX;
1421     data.m_Parameters.m_PadRight = padX;
1422     data.m_Parameters.m_PadTop = padY;
1423     data.m_Parameters.m_PadBottom = padY;
1424     data.m_Parameters.m_BiasEnabled = true;
1425 
1426     std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refTensorHandleFactory.CreateTensorHandle(outputTensorInfo);
1427     std::unique_ptr<armnn::ITensorHandle> weightsHandleRef = refTensorHandleFactory.CreateTensorHandle(kernelDesc);
1428     std::unique_ptr<armnn::ITensorHandle> biasHandleRef = refTensorHandleFactory.CreateTensorHandle(biasDesc);
1429     std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refTensorHandleFactory.CreateTensorHandle(inputTensorInfo);
1430 
1431     armnn::Convolution2dQueueDescriptor refData = data;
1432     armnn::WorkloadInfo                 refInfo = info;
1433     SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
1434     SetWorkloadInput(refData, refInfo, 1, kernelDesc, weightsHandleRef.get());
1435     SetWorkloadInput(refData, refInfo, 2, biasDesc, biasHandleRef.get());
1436     SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
1437 
1438     std::unique_ptr<armnn::IWorkload> workload
1439             = workloadFactory.CreateWorkload(armnn::LayerType::Convolution2d, data, info);
1440     std::unique_ptr<armnn::IWorkload> workloadRef
1441             = refWorkloadFactory.CreateWorkload(armnn::LayerType::Convolution2d, refData, refInfo);
1442 
1443     outputHandleRef->Allocate();
1444     inputHandleRef->Allocate();
1445     weightsHandleRef->Allocate();
1446     biasHandleRef->Allocate();
1447 
1448     inputHandle->Allocate();
1449     outputHandle->Allocate();
1450 
1451     CopyDataToITensorHandle(inputHandle.get(), input.data());
1452     CopyDataToITensorHandle(inputHandleRef.get(), input.data());
1453     CopyDataToITensorHandle(weightsHandleRef.get(), kernel.data());
1454     CopyDataToITensorHandle(biasHandleRef.get(), bias.data());
1455 
1456     ExecuteWorkload(*workload, memoryManager);
1457 
1458     workloadRef->PostAllocationConfigure();
1459     workloadRef->Execute();
1460 
1461     CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
1462     CopyDataFromITensorHandle(expectedOutput.data(), outputHandleRef.get());
1463 
1464     return LayerTestResult<T, 4>(actualOutput,
1465                                  expectedOutput,
1466                                  outputHandle->GetShape(),
1467                                  outputTensorInfo.GetShape());
1468 }
1469 
Convolution2d3x3Stride2x2BFloat16Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout & dataLayout)1470 LayerTestResult<float, 4> Convolution2d3x3Stride2x2BFloat16Test(
1471         armnn::IWorkloadFactory& workloadFactory,
1472         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1473         const armnn::ITensorHandleFactory& tensorHandleFactory,
1474         bool biasEnabled,
1475         const armnn::DataLayout& dataLayout)
1476 {
1477     // BFloat16 input and weight, Float32 output
1478     armnn::IgnoreUnused(biasEnabled);
1479 
1480     // Input is a single-batch, 1 channel, 5x5 image.
1481     armnn::TensorInfo inputDesc({ 1, 5, 5, 1 }, armnn::DataType::BFloat16);
1482 
1483     std::vector<armnn::BFloat16> inputValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
1484         {
1485             10.0367984f,  // 10.0625
1486              2.0380895f,  // 2.03125
1487             15.0420157f,  // 15.0625
1488             22.0675631f,  // 22.125
1489              8.0938920f,  // 8.125
1490              5.0476106f,  // 5.0625
1491             80.1035490f,  // 80
1492            100.1260370f,  // 100
1493             55.0461647f,  // 55
1494            120.0883828f,  // 120
1495              9.1159540f,  // 9.125
1496             90.0498519f,  // 90
1497            200.0104630f,  // 200
1498             30.0154114f,  // 30
1499             75.00137681f, // 75
1500             30.0344238f,  // 30
1501             25.0356445f,  // 25
1502            130.0495605f,  // 130
1503             60.0683594f,  // 60
1504             35.0991211f,  // 35
1505              8.0461426f,  // 8.0625
1506             12.0996094f,  // 12.125
1507             98.1269530f,  // 98
1508            125.0393066f,  // 125
1509              5.103516f    // 5.0937
1510        },
1511         1.0f, 0);
1512 
1513     // Use a 3x3 kernel.
1514     armnn::TensorInfo kernelDesc({1, 3, 3, 1}, armnn::DataType::BFloat16);
1515 
1516     std::vector<armnn::BFloat16> kernelValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
1517         {
1518             -0.126184f, // -0.125977
1519             -0.150468f, // -0.150391
1520             -0.101412f, // -0.101562
1521             -0.0586369f,// -0.0585938
1522             -0.0865864f,// -0.0864258
1523             -0.0435089f,// -0.043457
1524             0.0347555f, // 0.034668
1525             0.0323111f, // 0.0322266
1526             0.0385381f  // 0.0385742
1527          },
1528         1.0f, 0);
1529 
1530     // Expected output is a single-batch, 1 channel, 3x3 image.
1531     armnn::TensorInfo outputDesc({ 1, 3, 3, 1 }, armnn::DataType::Float32);
1532 
1533     // Expected output (with results if calculated as FP32 in the comments)
1534     const std::vector<float> outputData =
1535         {
1536             2.296875f, //  2.29240716
1537             5.75f,     //  5.75851926
1538             3.78125f,  //  3.79855026
1539             -11.625f,  // -11.65498118
1540             -47.25f,   // -47.27316893
1541             -30.0f,    // -30.04771684
1542             -8.25f,    //  -8.28126168
1543             -43.5f,    // -43.46531337
1544             -20.625f   // -20.63477281
1545         };
1546 
1547     uint32_t padLeft = 1;
1548     uint32_t padTop = 1;
1549     uint32_t padRight = 1;
1550     uint32_t padBottom = 1;
1551     uint32_t strideX  = 2;
1552     uint32_t strideY  = 2;
1553 
1554     return SimpleConvolution2dNhwcTestImpl
1555         <armnn::DataType::BFloat16, armnn::DataType::Float32, armnn::BFloat16, float, armnn::DataType::Float32, float>(
1556         workloadFactory,
1557         memoryManager,
1558         tensorHandleFactory,
1559         inputValues,
1560         kernelValues,
1561         std::vector<float>(),
1562         outputData,
1563         inputDesc.GetShape(),
1564         kernelDesc.GetShape(),
1565         outputDesc.GetShape(),
1566         dataLayout,
1567         1.0f,
1568         0,
1569         padLeft,
1570         padTop,
1571         padRight,
1572         padBottom,
1573         strideX,
1574         strideY);
1575 }
1576 
Convolution2d3x3Stride2x2BFloat16SmallValueTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout & dataLayout)1577 LayerTestResult<float, 4> Convolution2d3x3Stride2x2BFloat16SmallValueTest(
1578         armnn::IWorkloadFactory& workloadFactory,
1579         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1580         const armnn::ITensorHandleFactory& tensorHandleFactory,
1581         bool biasEnabled,
1582         const armnn::DataLayout& dataLayout)
1583 {
1584     // BFloat16 input and weight, Float32 output
1585     armnn::IgnoreUnused(biasEnabled);
1586 
1587     // Input is a single-batch, 1 channel, 5x5 image.
1588     armnn::TensorInfo inputDesc({1, 5, 5, 1}, armnn::DataType::BFloat16);
1589 
1590     std::vector<armnn::BFloat16> inputValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
1591         {
1592             0.0367984f,  // 0.0368652
1593             0.0380895f,  // 0.0380859
1594             0.0420157f,  // 0.0419922
1595             0.0675631f,  // 0.0673828
1596             0.0938920f,  // 0.09375
1597             0.0476106f,  // 0.0476074
1598             0.1035490f,  // 0.103516
1599             0.1260370f,  // 0.125977
1600             0.0461647f,  // 0.0461426
1601             0.0883828f,  // 0.0883789
1602             0.1159540f,  // 0.115723
1603             0.0498519f,  // 0.0498047
1604             0.0104630f,  // 0.010437
1605             0.0154114f,  // 0.0154419
1606             0.00137681f, // 0.00137329
1607             0.0344238f,  // 0.0344616
1608             0.0356445f,  // 0.0355693
1609             0.0495605f,  // 0.0495018
1610             0.0683594f,  // 0.0683308
1611             0.0991211f,  // 0.0988837
1612             0.0461426f,  // 0.0461838
1613             0.0996094f,  // 0.0997546
1614             0.1269530f,  // 0.127099
1615             0.0393066f,  // 0.0392791
1616             0.103516f    // 0.103641
1617        },
1618         1.0f, 0);
1619 
1620     // Use a 3x3 kernel.
1621     armnn::TensorInfo kernelDesc({1, 3, 3, 1}, armnn::DataType::BFloat16);
1622 
1623     std::vector<armnn::BFloat16> kernelValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
1624         {
1625             -0.126184f, // -0.125977
1626             -0.150468f, // -0.150391
1627             -0.101412f, // -0.101562
1628             -0.0586369f,// -0.0585938
1629             -0.0865864f,// -0.0864258
1630             -0.0435089f,// -0.043457
1631             0.0347555f, // 0.034668
1632             0.0323111f, // 0.0322266
1633             0.0385381f  // 0.0385742
1634          },
1635         1.0f, 0);
1636 
1637     // Expected output is a single-batch, 1 channel, 3x3 image.
1638     armnn::TensorInfo outputDesc({1, 3, 3, 1}, armnn::DataType::Float32);
1639 
1640     // Expected output (with results if calculated as FP32 in the comments)
1641     const std::vector<float> outputData =
1642         {
1643              0.000686645508f, // 0.000685
1644              0.000640869141f, // 0.000639
1645             -0.00759887695f,  // -0.007631
1646             -0.02734375f,     // -0.027388
1647             -0.0356445312f,   // -0.035737
1648             -0.0145874023f,   // -0.014568
1649             -0.0170898438f,   // -0.017124
1650             -0.0373535156f,   // -0.037431
1651             -0.0346679688f    // -0.034808
1652         };
1653 
1654     uint32_t padLeft = 1;
1655     uint32_t padTop = 1;
1656     uint32_t padRight = 1;
1657     uint32_t padBottom = 1;
1658     uint32_t strideX  = 2;
1659     uint32_t strideY  = 2;
1660 
1661     return SimpleConvolution2dNhwcTestImpl
1662         <armnn::DataType::BFloat16, armnn::DataType::Float32, armnn::BFloat16, float, armnn::DataType::Float32, float>(
1663         workloadFactory,
1664         memoryManager,
1665         tensorHandleFactory,
1666         inputValues,
1667         kernelValues,
1668         std::vector<float>(),
1669         outputData,
1670         inputDesc.GetShape(),
1671         kernelDesc.GetShape(),
1672         outputDesc.GetShape(),
1673         dataLayout,
1674         1.0f,
1675         0,
1676         padLeft,
1677         padTop,
1678         padRight,
1679         padBottom,
1680         strideX,
1681         strideY);
1682 }
1683 
1684 //
1685 // DepthwiseConvolution2d implementations
1686 //
1687 
1688 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
1689          typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
DepthwiseConvolution2dAsymmetricTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const std::vector<T> & input,const std::vector<T> & kernel,const std::vector<B> & bias,const std::vector<T> & outputExpected,const armnn::TensorShape & inputShape,const armnn::TensorShape & kernelShape,const armnn::TensorShape & outputExpectedShape,float qScale,int32_t qOffset,const armnn::DataLayout layout,uint32_t padLeft=0,uint32_t padTop=0,uint32_t padRight=0,uint32_t padBottom=0,uint32_t strideX=1,uint32_t strideY=1)1690 LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
1691     armnn::IWorkloadFactory& workloadFactory,
1692     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1693     const armnn::ITensorHandleFactory& tensorHandleFactory,
1694     const std::vector<T>& input,
1695     const std::vector<T>& kernel,
1696     const std::vector<B>& bias,
1697     const std::vector<T>& outputExpected,
1698     const armnn::TensorShape& inputShape,
1699     const armnn::TensorShape& kernelShape,
1700     const armnn::TensorShape& outputExpectedShape,
1701     float qScale,
1702     int32_t qOffset,
1703     const armnn::DataLayout layout,
1704     uint32_t padLeft = 0,
1705     uint32_t padTop = 0,
1706     uint32_t padRight = 0,
1707     uint32_t padBottom = 0,
1708     uint32_t strideX = 1,
1709     uint32_t strideY = 1)
1710 {
1711     unsigned int inputNum       = armnn::numeric_cast<unsigned int>(inputShape[0]);
1712     unsigned int inputChannels  = armnn::numeric_cast<unsigned int>(inputShape[1]);
1713     unsigned int inputHeight    = armnn::numeric_cast<unsigned int>(inputShape[2]);
1714     unsigned int inputWidth     = armnn::numeric_cast<unsigned int>(inputShape[3]);
1715     unsigned int kernelHeight   = armnn::numeric_cast<unsigned int>(kernelShape[1]);
1716     unsigned int kernelWidth    = armnn::numeric_cast<unsigned int>(kernelShape[2]);
1717     unsigned int kernelChannels = armnn::numeric_cast<unsigned int>(kernelShape[3]);
1718     unsigned int outputNum      = armnn::numeric_cast<unsigned int>(outputExpectedShape[0]);
1719     unsigned int outputChannels = armnn::numeric_cast<unsigned int>(outputExpectedShape[1]);
1720     unsigned int outputHeight   = armnn::numeric_cast<unsigned int>(outputExpectedShape[2]);
1721     unsigned int outputWidth    = armnn::numeric_cast<unsigned int>(outputExpectedShape[3]);
1722 
1723     // If a bias is used, its size must equal the number of output channels.
1724     bool biasEnabled = bias.size() > 0;
1725     ARMNN_ASSERT(!biasEnabled || bias.size() == outputChannels);
1726 
1727     // Creates the tensors.
1728     armnn::TensorInfo inputTensorInfo =
1729             armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
1730     armnn::TensorInfo outputTensorInfo =
1731             armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
1732     armnn::TensorInfo kernelDesc({1, kernelHeight, kernelWidth, kernelChannels}, ArmnnType);
1733     armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
1734 
1735     // Set quantization parameters if the requested type is a quantized type.
1736     if (armnn::IsQuantizedType<T>())
1737     {
1738         inputTensorInfo.SetQuantizationScale(qScale);
1739         inputTensorInfo.SetQuantizationOffset(qOffset);
1740         outputTensorInfo.SetQuantizationScale(qScale);
1741         outputTensorInfo.SetQuantizationOffset(qOffset);
1742         kernelDesc.SetQuantizationScale(qScale);
1743         kernelDesc.SetQuantizationOffset(qOffset);
1744         biasDesc.SetQuantizationScale(qScale*qScale);
1745         biasDesc.SetQuantizationOffset(0);
1746     }
1747 
1748     // Construct the input data.
1749     std::vector<T> inputData;
1750     inputData.assign(input.data(), input.data() + inputChannels*inputHeight*inputWidth);
1751 
1752     // At this point if we require it permute the input data
1753     const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
1754     if (layout == armnn::DataLayout::NHWC)
1755     {
1756         std::vector<T> tmp(inputData.size());
1757         armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
1758         inputData = tmp;
1759     }
1760 
1761     std::vector<T> kernelData;
1762     kernelData.assign(kernel.data(), kernel.data() + kernelHeight * kernelWidth * outputChannels);
1763     if (workloadFactory.GetBackendId() == armnn::BackendId("GpuAcc") ||
1764         workloadFactory.GetBackendId() == armnn::BackendId("CpuAcc"))
1765     {
1766         if (layout == armnn::DataLayout::NCHW)
1767         {
1768             std::vector<T> tmp(kernelData.size());
1769             kernelDesc.SetShape(armnnUtils::Permuted(kernelDesc.GetShape(), {0, 2, 3, 1}));
1770             armnnUtils::Permute(kernelDesc.GetShape(), {0, 2, 3, 1}, kernelData.data(), tmp.data(), sizeof(T));
1771             kernelData = tmp;
1772         }
1773     }
1774 
1775     // Construct the output data, with bias applied, as appropriate.
1776     std::vector<T> outputData;
1777     outputData.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth);
1778     if (biasEnabled)
1779     {
1780         std::vector<T> biasV;
1781         biasV.assign(bias.data(), bias.data() + outputChannels);
1782         ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
1783             biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
1784             outputWidth, outputHeight);
1785     }
1786 
1787     std::vector<T> actualOutput(outputTensorInfo.GetNumElements());
1788 
1789     // At this point if we require it permute the expected output
1790     if (layout == armnn::DataLayout::NHWC)
1791     {
1792         std::vector<T> tmp(outputData.size());
1793         armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
1794         outputData = tmp;
1795     }
1796 
1797     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
1798     std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc);
1799     std::unique_ptr<armnn::ITensorHandle> biasHandle = nullptr;
1800     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
1801 
1802     armnn::DepthwiseConvolution2dQueueDescriptor data;
1803     armnn::WorkloadInfo info;
1804 
1805     AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernelData.data()); // required for ConstantTensor
1806 
1807     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1808     AddInputToWorkload(data, info, kernelDesc, weightsHandle.get());
1809     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1810 
1811     armnn::ScopedTensorHandle biasTensor(biasDesc);
1812     if (biasEnabled)
1813     {
1814         AllocateAndCopyDataToITensorHandle(&biasTensor, bias.data());
1815 
1816         biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
1817         AllocateAndCopyDataToITensorHandle(biasHandle.get(), bias.data());
1818         AddInputToWorkload(data, info, biasDesc, biasHandle.get());
1819     }
1820 
1821     data.m_Parameters.m_StrideX = strideX;
1822     data.m_Parameters.m_StrideY = strideY;
1823     data.m_Parameters.m_PadLeft = padLeft;
1824     data.m_Parameters.m_PadRight = padRight;
1825     data.m_Parameters.m_PadTop = padTop;
1826     data.m_Parameters.m_PadBottom = padBottom;
1827     data.m_Parameters.m_BiasEnabled = biasEnabled;
1828     data.m_Parameters.m_DataLayout = layout;
1829 
1830     std::unique_ptr<armnn::IWorkload> workload
1831             = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, data, info);
1832 
1833     inputHandle->Allocate();
1834     outputHandle->Allocate();
1835 
1836     CopyDataToITensorHandle(inputHandle.get(), inputData.data());
1837 
1838     ExecuteWorkload(*workload, memoryManager);
1839 
1840     CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
1841 
1842     return LayerTestResult<T, 4>(actualOutput,
1843                                  outputData,
1844                                  outputHandle->GetShape(),
1845                                  outputTensorInfo.GetShape());
1846 }
1847 
1848 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
DepthwiseConvolution2dDepthMul1TestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool biasEnabled,const armnn::DataLayout layout)1849 LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(
1850     armnn::IWorkloadFactory& workloadFactory,
1851     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1852     const armnn::ITensorHandleFactory& tensorHandleFactory,
1853     float qScale,
1854     int32_t qOffset,
1855     bool biasEnabled,
1856     const armnn::DataLayout layout)
1857 {
1858     using B = armnn::ResolveType<ArmnnBType>;
1859 
1860     unsigned int inputHeight = 3;
1861     unsigned int inputWidth = 3;
1862     unsigned int inputChannels = 2;
1863     unsigned int inputNum = 1;
1864 
1865     unsigned int kernelHeight = 3;
1866     unsigned int kernelWidth = 3;
1867 
1868     unsigned int outputHeight = 1;
1869     unsigned int outputWidth = 1;
1870     unsigned int outputChannels = inputChannels;
1871     unsigned int outputNum = inputNum;
1872 
1873     armnn::TensorInfo inputTensorInfo =
1874             armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
1875     armnn::TensorInfo outputTensorInfo =
1876             armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
1877     armnn::TensorInfo kernelDesc({1, kernelHeight, kernelWidth, outputChannels},
1878                                  ArmnnType);
1879     armnn::TensorInfo biasDesc({ outputChannels }, ArmnnBType);
1880 
1881     // Set quantization parameters if the requested type is a quantized type.
1882     if(armnn::IsQuantizedType<T>())
1883     {
1884         inputTensorInfo.SetQuantizationScale(qScale);
1885         inputTensorInfo.SetQuantizationOffset(qOffset);
1886         outputTensorInfo.SetQuantizationScale(qScale);
1887         outputTensorInfo.SetQuantizationOffset(qOffset);
1888         kernelDesc.SetQuantizationScale(qScale);
1889         kernelDesc.SetQuantizationOffset(qOffset);
1890         biasDesc.SetQuantizationScale(qScale*qScale);
1891         biasDesc.SetQuantizationOffset(0);
1892     }
1893     std::vector<T> inputData = std::vector<T>(
1894             QuantizedVector<T>({
1895                 1.f, 2.f, 1.f,
1896                 2.f, 1.f, 2.f,
1897                 1.f, 2.f, 1.f,
1898 
1899                 1.f, 2.f, 1.f,
1900                 2.f, 1.f, 2.f,
1901                 1.f, 2.f, 1.f,
1902             },
1903             inputTensorInfo.GetQuantizationScale(),
1904             inputTensorInfo.GetQuantizationOffset()));
1905 
1906     // at this point if we require it permute the input data
1907     const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
1908     if (layout == armnn::DataLayout::NHWC)
1909     {
1910         std::vector<T> tmp(inputData.size());
1911         armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
1912         inputData = tmp;
1913     }
1914 
1915     std::vector<B> biasV(QuantizedVector<B>({ 0, 2 },
1916                                             biasDesc.GetQuantizationScale(),
1917                                             biasDesc.GetQuantizationOffset()));
1918 
1919     std::vector<T> kernelData = std::vector<T>(
1920             QuantizedVector<T>({
1921                  1.f, 0.f,  1.f,
1922                  0.f, 0.f,  0.f,
1923                 -1.f, 0.f, -1.f,
1924 
1925                  1.f, 0.f,  1.f,
1926                  0.f, 0.f,  0.f,
1927                 -1.f, 0.f, -1.f,
1928             },
1929             kernelDesc.GetQuantizationScale(),
1930             kernelDesc.GetQuantizationOffset()));
1931 
1932     if (workloadFactory.GetBackendId() == armnn::BackendId("GpuAcc") ||
1933         workloadFactory.GetBackendId() == armnn::BackendId("CpuAcc"))
1934     {
1935         if (layout == armnn::DataLayout::NCHW)
1936         {
1937             std::vector<T> tmp(kernelData.size());
1938             kernelDesc.SetShape(armnnUtils::Permuted(kernelDesc.GetShape(), {0, 2, 3, 1}));
1939             armnnUtils::Permute(kernelDesc.GetShape(), {0, 2, 3, 1}, kernelData.data(), tmp.data(), sizeof(T));
1940             kernelData = tmp;
1941         }
1942     }
1943 
1944     // Manually calculated.
1945     std::vector<T> outputImage(
1946         QuantizedVector<T>({ 0.f, 0.f },
1947                            outputTensorInfo.GetQuantizationScale(),
1948                            outputTensorInfo.GetQuantizationOffset())
1949     );
1950 
1951     // Optionally apply bias to output image.
1952     if(biasEnabled)
1953     {
1954         ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
1955                   biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
1956                   outputWidth, outputHeight);
1957     }
1958 
1959     if (layout == armnn::DataLayout::NHWC)
1960     {
1961         std::vector<T> tmp(outputImage.size());
1962         armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data(), sizeof(T));
1963         outputImage = tmp;
1964     }
1965 
1966     std::vector<T> actualOutput(outputTensorInfo.GetNumElements());
1967 
1968     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
1969     std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc);
1970     std::unique_ptr<armnn::ITensorHandle> biasHandle = nullptr;
1971     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
1972 
1973     armnn::DepthwiseConvolution2dQueueDescriptor data;
1974     armnn::WorkloadInfo info;
1975 
1976     AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernelData.data()); // required for ConstantTensor
1977 
1978     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1979     AddInputToWorkload(data, info, kernelDesc, weightsHandle.get());
1980     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1981 
1982     armnn::ScopedTensorHandle biasTensor(biasDesc);
1983     if (biasEnabled)
1984     {
1985         AllocateAndCopyDataToITensorHandle(&biasTensor, biasV.data());
1986 
1987         biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
1988         AllocateAndCopyDataToITensorHandle(biasHandle.get(), biasV.data());
1989         AddInputToWorkload(data, info, biasDesc, biasHandle.get());
1990     }
1991 
1992     data.m_Parameters.m_StrideX = 1;
1993     data.m_Parameters.m_StrideY = 1;
1994     data.m_Parameters.m_PadLeft = 0;
1995     data.m_Parameters.m_PadRight = 0;
1996     data.m_Parameters.m_PadTop = 0;
1997     data.m_Parameters.m_PadBottom = 0;
1998     data.m_Parameters.m_BiasEnabled = biasEnabled;
1999     data.m_Parameters.m_DataLayout = layout;
2000 
2001     std::unique_ptr<armnn::IWorkload> workload
2002             = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, data, info);
2003 
2004     inputHandle->Allocate();
2005     outputHandle->Allocate();
2006 
2007     CopyDataToITensorHandle(inputHandle.get(), inputData.data());
2008 
2009     ExecuteWorkload(*workload, memoryManager);
2010 
2011     CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
2012 
2013     return LayerTestResult<T, 4>(actualOutput,
2014                                  outputImage,
2015                                  outputHandle->GetShape(),
2016                                  outputTensorInfo.GetShape());
2017 }
2018 
2019 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool biasEnabled,const armnn::DataLayout layout)2020 LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
2021     armnn::IWorkloadFactory& workloadFactory,
2022     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2023     const armnn::ITensorHandleFactory& tensorHandleFactory,
2024     float qScale,
2025     int32_t qOffset,
2026     bool biasEnabled,
2027     const armnn::DataLayout layout)
2028 {
2029     using B = armnn::ResolveType<ArmnnBType>;
2030 
2031     unsigned int depthMultiplier = 2;
2032 
2033     unsigned int inputHeight    = 8;
2034     unsigned int inputWidth     = 16;
2035     unsigned int inputChannels  = 2;
2036     unsigned int inputBatchSize = 1;
2037 
2038     unsigned int kernelHeight = 5;
2039     unsigned int kernelWidth  = 3;
2040 
2041     unsigned int outputHeight    = inputHeight - kernelHeight + 1 + 2;
2042     unsigned int outputWidth     = (inputWidth - kernelWidth + 1)/2;
2043     unsigned int outputChannels  = inputChannels * depthMultiplier;
2044     unsigned int outputBatchSize = inputBatchSize;
2045 
2046     armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo(
2047             inputBatchSize, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
2048     armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo(
2049             outputBatchSize, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
2050     armnn::TensorInfo kernelDesc({1, kernelHeight, kernelWidth, outputChannels},
2051                                  ArmnnType);
2052     armnn::TensorInfo biasDesc({outputChannels}, ArmnnBType);
2053 
2054     // Set quantization parameters if the requested type is a quantized type.
2055     if(armnn::IsQuantizedType<T>())
2056     {
2057         inputTensorInfo.SetQuantizationScale(qScale);
2058         inputTensorInfo.SetQuantizationOffset(qOffset);
2059         outputTensorInfo.SetQuantizationScale(qScale);
2060         outputTensorInfo.SetQuantizationOffset(qOffset);
2061         kernelDesc.SetQuantizationScale(qScale);
2062         kernelDesc.SetQuantizationOffset(qOffset);
2063         biasDesc.SetQuantizationScale(qScale*qScale);
2064         biasDesc.SetQuantizationOffset(0);
2065     }
2066 
2067     // NOTE: originalInputData is in NCHW format
2068     std::vector<T> originalInputData = std::vector<T>(
2069             QuantizedVector<T>({
2070                 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
2071                 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2072                 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
2073                 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
2074                 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
2075                 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
2076                 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
2077                 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
2078                 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2079                 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2080                 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2081                 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2082                 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2083                 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2084                 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2085                 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
2086             },
2087             inputTensorInfo.GetQuantizationScale(),
2088             inputTensorInfo.GetQuantizationOffset()));
2089 
2090     std::vector<T> inputData = originalInputData;
2091     // at this point if we require it permute the input data
2092     const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
2093     if (layout == armnn::DataLayout::NHWC)
2094     {
2095         armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC,
2096                             originalInputData.data(), inputData.data(), sizeof(T));
2097     }
2098 
2099     std::vector<B> biasV = QuantizedVector<B>({ 0, 2, 1, -1 },
2100                                               biasDesc.GetQuantizationScale(),
2101                                               biasDesc.GetQuantizationOffset());
2102 
2103     std::vector<T> kernelData = std::vector<T>(
2104             QuantizedVector<T>({
2105                 1,  1, 1,
2106                 1, -1, 1,
2107                 1,  1, 1,
2108                 1,  1, 1,
2109                 1,  1, 1,
2110 
2111                 2,  2, 2,
2112                 2,  2, 2,
2113                 2,  2, 2,
2114                 2,  2, 2,
2115                 2,  2, 2,
2116 
2117                 0,  0, 0,
2118                 0, -1, 0,
2119                 0,  0, 0,
2120                 0,  0, 0,
2121                 0,  0, 0,
2122 
2123                 0,  0, 0,
2124                 0,  0, 0,
2125                 0,  1, 0,
2126                 0,  0, 0,
2127                 0,  0, 0
2128             },
2129             kernelDesc.GetQuantizationScale(),
2130             kernelDesc.GetQuantizationOffset()));
2131 
2132     if (workloadFactory.GetBackendId() == armnn::BackendId("GpuAcc") ||
2133         workloadFactory.GetBackendId() == armnn::BackendId("CpuAcc"))
2134     {
2135         if (layout == armnn::DataLayout::NCHW)
2136         {
2137             std::vector<T> tmp(kernelData.size());
2138             kernelDesc.SetShape(armnnUtils::Permuted(kernelDesc.GetShape(), {0, 2, 3, 1}));
2139             armnnUtils::Permute(kernelDesc.GetShape(), {0, 2, 3, 1}, kernelData.data(), tmp.data(), sizeof(T));
2140             kernelData = tmp;
2141         }
2142     }
2143 
2144     // Manually calculated.
2145     std::vector<T> originalOutputImage = std::vector<T>(
2146         QuantizedVector<T>({
2147                3,   3,   3,   3,   3,   3,   3,   3,   3,   3,   3,   3,   3,   3,
2148                5,   5,   5,   5,   5,   5,   5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5,
2149              5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5,   5,   5,   5,   5,   5,   5,   5,
2150              2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5,
2151              4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5,   6,   6,   6,   6,   6,   6,   6,
2152                6,   6,   6,   6,   6,   6,   6,   6,   6,   6,   6,   6,   6,   6,
2153                1,   3,   0,   0,   0,   0,   0,   2,   4,   0,   0,   0,   0,   0,
2154                2,   4,   0,   0,   0,   0,   0,   2,   4,   0,   0,   0,   0,   0,
2155                2,   4,   0,   0,   0,   0,   0,   2,   4,   0,   0,   0,   0,   0,
2156                2,   4,   0,   0,   0,   0,   0,   3,   5,   0,   0,   0,   0,   0,
2157                3,   5,   0,   0,   0,   0,   0,   3,   5,   0,   0,   0,   0,   0,
2158                3,   5,   0,   0,   0,   0,   0,   3,   5,   0,   0,   0,   0,   0
2159         },
2160         outputTensorInfo.GetQuantizationScale(),
2161         outputTensorInfo.GetQuantizationOffset()));
2162 
2163     // Optionally apply bias to output image.
2164     if(biasEnabled)
2165     {
2166         ApplyBias(originalOutputImage,
2167                   outputTensorInfo.GetQuantizationScale(),
2168                   outputTensorInfo.GetQuantizationOffset(),
2169                   biasV,
2170                   biasDesc.GetQuantizationScale(),
2171                   biasDesc.GetQuantizationOffset(),
2172                   outputWidth,
2173                   outputHeight);
2174     }
2175 
2176     std::vector<T> outputImage = originalOutputImage;
2177     if (layout == armnn::DataLayout::NHWC)
2178     {
2179         armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC,
2180                             originalOutputImage.data(), outputImage.data(), sizeof(T));
2181     }
2182 
2183     std::vector<T> actualOutput(outputTensorInfo.GetNumElements());
2184 
2185     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
2186     std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc);
2187     std::unique_ptr<armnn::ITensorHandle> biasHandle = nullptr;
2188     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
2189 
2190     armnn::DepthwiseConvolution2dQueueDescriptor data;
2191     armnn::WorkloadInfo info;
2192 
2193     AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernelData.data()); // required for ConstantTensor
2194 
2195     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
2196     AddInputToWorkload(data, info, kernelDesc, weightsHandle.get());
2197     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2198 
2199     armnn::ScopedTensorHandle biasTensor(biasDesc);
2200     if (biasEnabled)
2201     {
2202         AllocateAndCopyDataToITensorHandle(&biasTensor, biasV.data());
2203 
2204         biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
2205         AllocateAndCopyDataToITensorHandle(biasHandle.get(), biasV.data());
2206         AddInputToWorkload(data, info, biasDesc, biasHandle.get());
2207     }
2208 
2209     data.m_Parameters.m_StrideX = 2;
2210     data.m_Parameters.m_StrideY = 1;
2211     data.m_Parameters.m_PadLeft = 0;
2212     data.m_Parameters.m_PadRight = 0;
2213     data.m_Parameters.m_PadTop = 1;
2214     data.m_Parameters.m_PadBottom = 1;
2215     data.m_Parameters.m_BiasEnabled = biasEnabled;
2216     data.m_Parameters.m_DataLayout = layout;
2217 
2218     std::unique_ptr<armnn::IWorkload> workload
2219             = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, data, info);
2220 
2221     inputHandle->Allocate();
2222     outputHandle->Allocate();
2223 
2224     CopyDataToITensorHandle(inputHandle.get(), inputData.data());
2225 
2226     ExecuteWorkload(*workload, memoryManager);
2227 
2228     CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
2229 
2230     return LayerTestResult<T, 4>(actualOutput,
2231                                  outputImage,
2232                                  outputHandle->GetShape(),
2233                                  outputTensorInfo.GetShape());
2234 
2235 }
2236 
2237 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2238         typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const std::vector<T> & originalInput,const std::vector<T> & originalKernel,const std::vector<B> & bias,const std::vector<T> & originalOutputExpected,const armnn::TensorShape & originalInputShape,const armnn::TensorShape & originalKernelShape,const armnn::TensorShape & originalOutputExpectedShape,float qScale,int32_t qOffset,const armnn::DataLayout layout=armnn::DataLayout::NCHW,uint32_t padLeft=0,uint32_t padTop=0,uint32_t padRight=0,uint32_t padBottom=0,uint32_t strideX=1,uint32_t strideY=1,uint32_t dilationX=1,uint32_t dilationY=1)2239 LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
2240     armnn::IWorkloadFactory& workloadFactory,
2241     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2242     const armnn::ITensorHandleFactory& tensorHandleFactory,
2243     const std::vector<T>& originalInput,
2244     const std::vector<T>& originalKernel,
2245     const std::vector<B>& bias,
2246     const std::vector<T>& originalOutputExpected,
2247     const armnn::TensorShape& originalInputShape,
2248     const armnn::TensorShape& originalKernelShape,
2249     const armnn::TensorShape& originalOutputExpectedShape,
2250     float qScale,
2251     int32_t qOffset,
2252     const armnn::DataLayout layout = armnn::DataLayout::NCHW,
2253     uint32_t padLeft = 0,
2254     uint32_t padTop = 0,
2255     uint32_t padRight = 0,
2256     uint32_t padBottom = 0,
2257     uint32_t strideX = 1,
2258     uint32_t strideY = 1,
2259     uint32_t dilationX = 1,
2260     uint32_t dilationY = 1)
2261 {
2262     unsigned int inputHeight    = armnn::numeric_cast<unsigned int>(originalInputShape[2]);
2263     unsigned int inputWidth     = armnn::numeric_cast<unsigned int>(originalInputShape[3]);
2264     unsigned int inputChannels  = armnn::numeric_cast<unsigned int>(originalInputShape[1]);
2265     unsigned int inputNum       = armnn::numeric_cast<unsigned int>(originalInputShape[0]);
2266 
2267     unsigned int outputHeight   = armnn::numeric_cast<unsigned int>(originalOutputExpectedShape[2]);
2268     unsigned int outputWidth    = armnn::numeric_cast<unsigned int>(originalOutputExpectedShape[3]);
2269     unsigned int outputChannels = armnn::numeric_cast<unsigned int>(originalOutputExpectedShape[1]);
2270     unsigned int outputNum      = armnn::numeric_cast<unsigned int>(originalOutputExpectedShape[0]);
2271 
2272     unsigned int kernelHeight   = armnn::numeric_cast<unsigned int>(originalKernelShape[1]);
2273     unsigned int kernelWidth    = armnn::numeric_cast<unsigned int>(originalKernelShape[2]);
2274     unsigned int kernelChannels = armnn::numeric_cast<unsigned int>(originalKernelShape[3]);
2275 
2276     bool biasEnabled = bias.size() > 0;
2277 
2278     // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
2279     ARMNN_ASSERT(inputNum == 1);
2280     ARMNN_ASSERT(outputNum == 1);
2281 
2282     // If a bias is used, its size must equal the number of output channels.
2283     ARMNN_ASSERT(!biasEnabled || bias.size() == outputChannels);
2284 
2285 
2286     // Note these tensors will use two (identical) batches.
2287     armnn::TensorInfo inputTensorInfo =
2288             armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
2289     armnn::TensorInfo outputTensorInfo =
2290             armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
2291 
2292     // Kernel must be NCHW layout always, independently of the layout of the input and output for depthwise convolution.
2293     armnn::TensorInfo kernelDesc({1, kernelHeight, kernelWidth, kernelChannels}, ArmnnType);
2294 
2295     armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
2296 
2297     // Set quantization parameters if the requested type is a quantized type.
2298     if(armnn::IsQuantizedType<T>())
2299     {
2300         inputTensorInfo.SetQuantizationScale(qScale);
2301         inputTensorInfo.SetQuantizationOffset(qOffset);
2302         outputTensorInfo.SetQuantizationScale(qScale);
2303         outputTensorInfo.SetQuantizationOffset(qOffset);
2304         kernelDesc.SetQuantizationScale(qScale);
2305         kernelDesc.SetQuantizationOffset(qOffset);
2306         biasDesc.SetQuantizationScale(qScale*qScale);
2307         biasDesc.SetQuantizationOffset(0);
2308     }
2309 
2310     std::vector<T> kernelData;
2311     kernelData.assign(originalKernel.data(), originalKernel.data() + kernelHeight*kernelWidth*outputChannels);
2312     if (workloadFactory.GetBackendId() == armnn::BackendId("GpuAcc") ||
2313         workloadFactory.GetBackendId() == armnn::BackendId("CpuAcc"))
2314     {
2315         if (layout == armnn::DataLayout::NCHW)
2316         {
2317             std::vector<T> tmp(kernelData.size());
2318             kernelDesc.SetShape(armnnUtils::Permuted(kernelDesc.GetShape(), {0, 2, 3, 1}));
2319             armnnUtils::Permute(kernelDesc.GetShape(), {0, 2, 3, 1}, kernelData.data(), tmp.data(), sizeof(T));
2320             kernelData = tmp;
2321         }
2322     }
2323 
2324     // Construct input data
2325     std::vector<T> input;
2326     input.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth);
2327     std::vector<T> inputData;
2328     inputData.insert(inputData.end(), input.begin(), input.end());
2329     inputData.insert(inputData.end(), input.begin(), input.end());
2330 
2331     // at this point if we require it permute the input data
2332     const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
2333     if (layout == armnn::DataLayout::NHWC)
2334     {
2335         std::vector<T> tmp(inputData.size());
2336         armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
2337         inputData = tmp;
2338     }
2339 
2340     std::vector<T> output;
2341     output.assign(originalOutputExpected.data(),
2342                        originalOutputExpected.data() + outputChannels*outputHeight*outputWidth);
2343 
2344     // Apply bias to output data if it is enabled.
2345     if(biasEnabled)
2346     {
2347         std::vector<T> biasV;
2348         biasV.assign(bias.data(), bias.data() + outputChannels);
2349         ApplyBias(output, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
2350                   biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
2351                   outputWidth, outputHeight);
2352     }
2353 
2354     std::vector<T> actualOutput(outputTensorInfo.GetNumElements());
2355 
2356     // Construct expected output data
2357     std::vector<T> outputData;
2358     outputData.insert(outputData.end(), output.begin(), output.end());
2359     outputData.insert(outputData.end(), output.begin(), output.end());
2360 
2361     // at this point if we require it permute the expected output
2362     if (layout == armnn::DataLayout::NHWC)
2363     {
2364         std::vector<T> tmp(outputData.size());
2365         armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
2366         outputData = tmp;
2367     }
2368 
2369     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
2370     std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc);
2371     std::unique_ptr<armnn::ITensorHandle> biasHandle = nullptr;
2372     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
2373 
2374     armnn::DepthwiseConvolution2dQueueDescriptor data;
2375     armnn::WorkloadInfo info;
2376 
2377     AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernelData.data()); // required for ConstantTensor
2378 
2379     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
2380     AddInputToWorkload(data, info, kernelDesc, weightsHandle.get());
2381     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2382 
2383     armnn::ScopedTensorHandle biasTensor(biasDesc);
2384     if (biasEnabled)
2385     {
2386         AllocateAndCopyDataToITensorHandle(&biasTensor, bias.data());
2387 
2388         biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
2389         AllocateAndCopyDataToITensorHandle(biasHandle.get(), bias.data());
2390         AddInputToWorkload(data, info, biasDesc, biasHandle.get());
2391     }
2392 
2393     data.m_Parameters.m_StrideX = strideX;
2394     data.m_Parameters.m_StrideY = strideY;
2395     data.m_Parameters.m_PadLeft = padLeft;
2396     data.m_Parameters.m_PadRight = padRight;
2397     data.m_Parameters.m_PadTop = padTop;
2398     data.m_Parameters.m_PadBottom = padBottom;
2399     data.m_Parameters.m_BiasEnabled = biasEnabled;
2400     data.m_Parameters.m_DataLayout = layout;
2401     data.m_Parameters.m_DilationX = dilationX;
2402     data.m_Parameters.m_DilationY = dilationY;
2403 
2404     std::unique_ptr<armnn::IWorkload> workload
2405             = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, data, info);
2406 
2407     inputHandle->Allocate();
2408     outputHandle->Allocate();
2409 
2410     CopyDataToITensorHandle(inputHandle.get(), inputData.data());
2411 
2412     ExecuteWorkload(*workload, memoryManager);
2413 
2414     CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
2415 
2416     return LayerTestResult<T, 4>(actualOutput,
2417                                  outputData,
2418                                  outputHandle->GetShape(),
2419                                  outputTensorInfo.GetShape());
2420 }
2421 
2422 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2423          typename T = armnn::ResolveType<ArmnnType>>
DepthwiseConvolution2dAsymmetricTestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool biasEnabled,const armnn::DataLayout layout)2424 LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon(
2425     armnn::IWorkloadFactory& workloadFactory,
2426     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2427     const armnn::ITensorHandleFactory& tensorHandleFactory,
2428     float qScale,
2429     int32_t qOffset,
2430     bool biasEnabled,
2431     const armnn::DataLayout layout)
2432 {
2433     // Use a single-batch 2-channel 5x5 image as input.
2434     armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
2435     auto input = QuantizedVector<T>(
2436          {
2437              0,  1,  2,  3,  4,
2438              5,  6,  7,  8,  9,
2439             10, 11, 12, 13, 14,
2440             15, 16, 17, 18, 19,
2441             20, 21, 22, 23, 24,
2442 
2443             25, 26, 27, 28, 29,
2444             30, 31, 32, 33, 34,
2445             35, 36, 37, 38, 39,
2446             40, 41, 42, 43, 44,
2447             45, 46, 47, 48, 49
2448         },
2449         inputTensorInfo.GetQuantizationScale(),
2450         inputTensorInfo.GetQuantizationOffset());
2451 
2452     // Use a depth multiplier of 1 on a 2-channel 4x4 kernel.
2453     // Weights layout for depthwise: [1,H,W,I*M]
2454     armnn::TensorInfo kernelTensorInfo({ 1, 4, 4, 2 }, ArmnnType);
2455     auto kernel = QuantizedVector<T>({
2456             32, 31, 30, 29,
2457             28, 27, 26, 25,
2458             24, 23, 22, 21,
2459             20, 19, 18, 17,
2460 
2461             16, 15, 14, 13,
2462             12, 11, 10,  9,
2463              8,  7,  6,  5,
2464              4,  3,  2,  1
2465         },
2466         kernelTensorInfo.GetQuantizationScale(),
2467         kernelTensorInfo.GetQuantizationOffset());
2468 
2469     // Expected output is 1 batch of a 2-channel 5x5 image.
2470     // Calculated using the python tensorflow library with strideX=1, strideY=1.
2471     armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
2472     auto expectedOutput = QuantizedVector<T>(
2473          {
2474              396, 664, 820, 756, 602, 1016, 1608, 1880, 1652, 1268, 1976, 2968, 3240, 2732,
2475              2028, 2628, 3808, 4060, 3312, 2390, 2596, 3700, 3900, 3130, 2226, 2817, 4186,
2476              4330, 3609, 2651, 5414, 7864, 8120, 6626, 4780, 6314, 9144, 9400, 7646, 5500,
2477              6759, 9610, 9850, 7875, 5579, 5935, 8348, 8540, 6757, 4742
2478         },
2479         outputTensorInfo.GetQuantizationScale(),
2480         outputTensorInfo.GetQuantizationOffset());
2481 
2482     return DepthwiseConvolution2dAsymmetricTestImpl<ArmnnType, ArmnnBType>(
2483         workloadFactory,
2484         memoryManager,
2485         tensorHandleFactory,
2486         input,
2487         kernel,
2488         GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
2489         expectedOutput,
2490         inputTensorInfo.GetShape(),
2491         kernelTensorInfo.GetShape(),
2492         outputTensorInfo.GetShape(),
2493         qScale,
2494         qOffset,
2495         layout,
2496         1,  // Padding left.
2497         1,  // Padding top.
2498         2,  // Padding right.
2499         2,  // Padding bottom.
2500         1,  // strideX
2501         1); // strideY
2502 }
2503 
2504 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2505          typename T = armnn::ResolveType<ArmnnType>>
DepthwiseConvolution2dNhwcTestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool biasEnabled)2506 LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestCommon(
2507     armnn::IWorkloadFactory& workloadFactory,
2508     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2509     const armnn::ITensorHandleFactory& tensorHandleFactory,
2510     float qScale,
2511     int32_t qOffset,
2512     bool biasEnabled)
2513 {
2514     auto layout = armnn::DataLayout::NHWC;
2515 
2516     armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
2517     auto input = QuantizedVector<T>(
2518          {
2519              0,  1,  2,  3,  4,
2520              5,  6,  7,  8,  9,
2521             10, 11, 12, 13, 14,
2522             15, 16, 17, 18, 19,
2523             20, 21, 22, 23, 24,
2524 
2525             25, 26, 27, 28, 29,
2526             30, 31, 32, 33, 34,
2527             35, 36, 37, 38, 39,
2528             40, 41, 42, 43, 44,
2529             45, 46, 47, 48, 49
2530         },
2531         inputTensorInfo.GetQuantizationScale(),
2532         inputTensorInfo.GetQuantizationOffset());
2533 
2534     armnn::TensorInfo kernelTensorInfo({ 1, 4, 4, 2 }, ArmnnType);
2535     auto kernel = QuantizedVector<T>({
2536              32, 31, 30, 29,
2537              28, 27, 26, 25,
2538              24, 23, 22, 21,
2539              20, 19, 18, 17,
2540 
2541              16, 15, 14, 13,
2542              12, 11, 10,  9,
2543               8,  7,  6,  5,
2544               4,  3,  2,  1
2545         },
2546         kernelTensorInfo.GetQuantizationScale(),
2547         kernelTensorInfo.GetQuantizationOffset());
2548 
2549     armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
2550     auto expectedOutput = QuantizedVector<T>(
2551          {
2552              396,664,820,756,602,
2553              1016,1608,1880,1652,1268,
2554              1976,2968,3240,2732,2028,
2555              2628,3808,4060,3312,2390,
2556              2596,3700,3900,3130,2226,
2557 
2558              2817,4186,4330,3609,2651,
2559              5414,7864,8120,6626,4780,
2560              6314,9144,9400,7646,5500,
2561              6759,9610,9850,7875,5579,
2562              5935,8348,8540,6757,4742
2563         },
2564         outputTensorInfo.GetQuantizationScale(),
2565         outputTensorInfo.GetQuantizationOffset());
2566 
2567     return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
2568         workloadFactory,
2569         memoryManager,
2570         tensorHandleFactory,
2571         input,
2572         kernel,
2573         GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
2574         expectedOutput,
2575         inputTensorInfo.GetShape(),
2576         kernelTensorInfo.GetShape(),
2577         outputTensorInfo.GetShape(),
2578         qScale,
2579         qOffset,
2580         layout,
2581         1,  // Padding left.
2582         1,  // Padding top.
2583         2,  // Padding right.
2584         2,  // Padding bottom.
2585         1,  // strideX
2586         1);  // strideY
2587 }
2588 
2589 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2590          typename T = armnn::ResolveType<ArmnnType>>
SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool biasEnabled)2591 LayerTestResult<T, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon(
2592     armnn::IWorkloadFactory& workloadFactory,
2593     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2594     const armnn::ITensorHandleFactory& tensorHandleFactory,
2595     float qScale,
2596     int32_t qOffset,
2597     bool biasEnabled)
2598 {
2599     auto layout = armnn::DataLayout::NHWC;
2600 
2601     armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9 }, ArmnnType);
2602     auto input = QuantizedVector<T>(
2603          {
2604             0, 0, 0, 0, 0, 0, 0, 0, 0,
2605             0, 0, 0, 0, 0, 0, 0, 0, 0,
2606             0, 0, 0, 0, 0, 0, 0, 0, 0,
2607             0, 0, 0, 1, 1, 1, 0, 0, 0,
2608             0, 0, 0, 1, 1, 1, 0, 0, 0,
2609             0, 0, 0, 1, 1, 1, 0, 0, 0,
2610             0, 0, 0, 0, 0, 0, 0, 0, 0,
2611             0, 0, 0, 0, 0, 0, 0, 0, 0,
2612             0, 0, 0, 0, 0, 0, 0, 0, 0
2613         },
2614         inputTensorInfo.GetQuantizationScale(),
2615         inputTensorInfo.GetQuantizationOffset());
2616 
2617     armnn::TensorInfo kernelTensorInfo({ 1, 3, 3, 1}, ArmnnType);
2618     auto kernel = QuantizedVector<T>({
2619             1, 2, 3,
2620             4, 5, 6,
2621             7, 8, 9
2622         },
2623         kernelTensorInfo.GetQuantizationScale(),
2624         kernelTensorInfo.GetQuantizationOffset());
2625 
2626     uint32_t padLeft = 0;
2627     uint32_t padTop = 0;
2628     uint32_t padRight = 0;
2629     uint32_t padBottom = 0;
2630     uint32_t strideX  = 1;
2631     uint32_t strideY  = 1;
2632     uint32_t dilationX  = 3;
2633     uint32_t dilationY  = 3;
2634 
2635     // Since the dilation rate is 3 this will reduce the size of the output from 9x9 to 3x3 of all 5s.
2636     armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, ArmnnType);
2637     auto expectedOutput = QuantizedVector<T>(
2638         {
2639             5, 5, 5,
2640             5, 5, 5,
2641             5, 5, 5
2642         },
2643         outputTensorInfo.GetQuantizationScale(),
2644         outputTensorInfo.GetQuantizationOffset());
2645 
2646     return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
2647         workloadFactory,
2648         memoryManager,
2649         tensorHandleFactory,
2650         input,
2651         kernel,
2652         GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
2653         expectedOutput,
2654         inputTensorInfo.GetShape(),
2655         kernelTensorInfo.GetShape(),
2656         outputTensorInfo.GetShape(),
2657         qScale,
2658         qOffset,
2659         layout,
2660         padLeft,
2661         padTop,
2662         padRight,
2663         padBottom,
2664         strideX,
2665         strideY,
2666         dilationX,
2667         dilationY);
2668 }
2669 
2670 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
DepthwiseConvolution2d3x3DilationTestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const std::vector<float> & inputNoQuantizedValues,armnn::TensorInfo & inputTensorInfo,const std::vector<float> & kernelNoQuantizedValues,armnn::TensorInfo & kernelTensorInfo,const std::vector<float> & outputExpectedNoQuantizedValues,armnn::TensorInfo & outputTensorInfo,uint32_t dilationX,uint32_t dilationY,armnn::DataLayout layout=armnn::DataLayout::NCHW,bool biasEnabled=false)2671 LayerTestResult<T, 4> DepthwiseConvolution2d3x3DilationTestCommon(
2672         armnn::IWorkloadFactory& workloadFactory,
2673         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2674         const armnn::ITensorHandleFactory& tensorHandleFactory,
2675         const std::vector<float>& inputNoQuantizedValues,
2676         armnn::TensorInfo& inputTensorInfo,
2677         const std::vector<float>& kernelNoQuantizedValues,
2678         armnn::TensorInfo& kernelTensorInfo,
2679         const std::vector<float>& outputExpectedNoQuantizedValues,
2680         armnn::TensorInfo& outputTensorInfo,
2681         uint32_t dilationX,
2682         uint32_t dilationY,
2683         armnn::DataLayout layout = armnn::DataLayout::NCHW,
2684         bool biasEnabled = false)
2685 {
2686     float qScale;
2687     int32_t qOffset;
2688     switch (ArmnnType)
2689     {
2690         case armnn::DataType::QAsymmS8:
2691         case armnn::DataType::QAsymmU8:
2692         {
2693             qScale = 0.1f;
2694             qOffset = 128;
2695             break;
2696         }
2697         case armnn::DataType::QSymmS16:
2698         {
2699             qScale = 0.1f;
2700             qOffset = 0;
2701             break;
2702         }
2703         case armnn::DataType::Float32:
2704         default:
2705         {
2706             qScale = 0.f;
2707             qOffset = 0;
2708             break;
2709         }
2710     }
2711 
2712     inputTensorInfo.SetQuantizationScale(qScale);
2713     inputTensorInfo.SetQuantizationOffset(qOffset);
2714     kernelTensorInfo.SetQuantizationScale(qScale);
2715     kernelTensorInfo.SetQuantizationOffset(qOffset);
2716     outputTensorInfo.SetQuantizationScale(qScale);
2717     outputTensorInfo.SetQuantizationOffset(qOffset);
2718 
2719     auto input = QuantizedVector<T>(inputNoQuantizedValues,
2720                                     inputTensorInfo.GetQuantizationScale(),
2721                                     inputTensorInfo.GetQuantizationOffset());
2722     auto kernel = QuantizedVector<T>(kernelNoQuantizedValues,
2723                                      kernelTensorInfo.GetQuantizationScale(),
2724                                      kernelTensorInfo.GetQuantizationOffset());
2725     auto expectedOutput = QuantizedVector<T>(outputExpectedNoQuantizedValues,
2726                                              outputTensorInfo.GetQuantizationScale(),
2727                                              outputTensorInfo.GetQuantizationOffset());
2728 
2729     uint32_t padLeft = 0;
2730     uint32_t padTop = 0;
2731     uint32_t padRight = 0;
2732     uint32_t padBottom = 0;
2733     uint32_t strideX  = 1;
2734     uint32_t strideY  = 1;
2735 
2736     return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
2737             workloadFactory,
2738             memoryManager,
2739             tensorHandleFactory,
2740             input,
2741             kernel,
2742             GetBias<ArmnnBType>(biasEnabled, qScale * qScale, outputTensorInfo, layout),
2743             expectedOutput,
2744             inputTensorInfo.GetShape(),
2745             kernelTensorInfo.GetShape(),
2746             outputTensorInfo.GetShape(),
2747             qScale,
2748             qOffset,
2749             layout,
2750             padLeft,
2751             padTop,
2752             padRight,
2753             padBottom,
2754             strideX,
2755             strideY,
2756             dilationX,
2757             dilationY);
2758 }
2759 
2760 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
DepthwiseConvolution2d3x3Dilation3x3Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)2761 LayerTestResult<T, 4> DepthwiseConvolution2d3x3Dilation3x3Test(
2762         armnn::IWorkloadFactory& workloadFactory,
2763         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2764         const armnn::ITensorHandleFactory& tensorHandleFactory,
2765         bool biasEnabled,
2766         const armnn::DataLayout layout)
2767 {
2768     armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
2769     std::vector<float> inputNoQuantizedValues =
2770             {
2771                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2772                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2773                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2774                     0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2775                     0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2776                     0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2777                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2778                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2779                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2780                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2781             };
2782 
2783     armnn::TensorInfo kernelTensorInfo({ 1, 3, 3, 1}, ArmnnType);
2784     std::vector<float> kernelNoQuantizedValues =
2785             {
2786                     1, 2, 3,
2787                     4, 5, 6,
2788                     7, 8, 9
2789             };
2790 
2791     // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
2792     // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
2793     armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
2794     std::vector<float> outputExpectedNoQuantizedValues =
2795             {
2796                     6., 5., 5., 5.,
2797                     6., 5., 5., 5.,
2798                     6., 5., 5., 5.,
2799                     3., 2., 2., 2.
2800             };
2801 
2802     return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2803             workloadFactory,
2804             memoryManager,
2805             tensorHandleFactory,
2806             inputNoQuantizedValues,
2807             inputTensorInfo,
2808             kernelNoQuantizedValues,
2809             kernelTensorInfo,
2810             outputExpectedNoQuantizedValues,
2811             outputTensorInfo,
2812             3,
2813             3,
2814             layout,
2815             biasEnabled);
2816 }
2817 
2818 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
DepthwiseConvolution2d2x3x3Dilation3x3Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)2819 LayerTestResult<T, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test(
2820         armnn::IWorkloadFactory& workloadFactory,
2821         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2822         const armnn::ITensorHandleFactory& tensorHandleFactory,
2823         bool biasEnabled,
2824         const armnn::DataLayout layout)
2825 {
2826     armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
2827     std::vector<float> inputNoQuantizedValues =
2828             {
2829                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2830                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2831                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2832                     0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2833                     0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2834                     0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2835                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2836                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2837                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2838                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2839 
2840                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2841                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2842                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2843                     0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2844                     0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2845                     0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2846                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2847                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2848                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2849                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2850             };
2851 
2852     armnn::TensorInfo kernelTensorInfo({ 1, 3, 3, 2}, ArmnnType);
2853     std::vector<float> kernelNoQuantizedValues =
2854             {
2855                     1, 2, 3,
2856                     4, 5, 6,
2857                     7, 8, 9,
2858 
2859                     1, 2, 3,
2860                     4, 5, 6,
2861                     7, 8, 9
2862             };
2863 
2864     // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
2865     // therefore the output will be 2x4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
2866     armnn::TensorInfo outputTensorInfo({ 1, 2, 4, 4}, ArmnnType);
2867     std::vector<float> outputExpectedNoQuantizedValues =
2868             {
2869                     2, 9, 9, 9, 2, 9, 9, 9, 2, 9, 9, 9, 5, 3, 3, 3, 3,
2870 
2871                     1, 1, 1, 3, 1, 1, 1, 3, 1, 1, 1, 6, 4, 4, 4
2872             };
2873 
2874     return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2875             workloadFactory,
2876             memoryManager,
2877             tensorHandleFactory,
2878             inputNoQuantizedValues,
2879             inputTensorInfo,
2880             kernelNoQuantizedValues,
2881             kernelTensorInfo,
2882             outputExpectedNoQuantizedValues,
2883             outputTensorInfo,
2884             3,
2885             3,
2886             layout,
2887             biasEnabled);
2888 }
2889 
2890 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
DepthwiseConvolution2dMult4Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)2891 LayerTestResult<T, 4> DepthwiseConvolution2dMult4Test(
2892             armnn::IWorkloadFactory& workloadFactory,
2893             const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2894             const armnn::ITensorHandleFactory& tensorHandleFactory,
2895             bool biasEnabled,
2896             const armnn::DataLayout layout)
2897 {
2898     armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
2899     std::vector<float> inputNoQuantizedValues =
2900             {
2901                     10.0, 10.0, 10.0,
2902                     10.0, 10.0, 10.0,
2903                     10.0, 10.0, 10.0,
2904 
2905                     21.0, 22.0, 23.0,
2906                     24.0, 25.0, 26.0,
2907                     27.0, 28.0, 29.0
2908             };
2909 
2910     armnn::TensorInfo kernelTensorInfo({ 1, 2, 2, 8}, ArmnnType);
2911 
2912     std::vector<float> kernelNoQuantizedValues =
2913             {
2914                     0.25f, 0.25f,
2915                     0.25f, 0.25f,
2916 
2917                     0.25f, 0.25f,
2918                     0.25f, 0.25f,
2919 
2920                     0.0f , 0.0f,
2921                     0.0f , 0.1f,
2922 
2923                     0.0f , 0.0f,
2924                     0.0f , 0.1f,
2925 
2926                     0.2f , 0.0f,
2927                     0.0f , 0.0f,
2928 
2929                     0.2f , 0.0f,
2930                     0.0f , 0.0f,
2931 
2932                     0.0f , 0.3f,
2933                     0.0f , 0.0f,
2934 
2935                     0.0f , 0.3f,
2936                     0.0f , 0.0f
2937             };
2938 
2939     armnn::TensorInfo outputTensorInfo({ 1, 8, 2, 2}, ArmnnType);
2940     std::vector<float> outputExpectedNoQuantizedValues =
2941             {
2942                       4.5f,  4.5f,  4.5f,   4.5f,   5.5f,  5.5f,  5.5f,   5.5f,
2943                       2.5f,  2.5f,  2.5f,   2.5f,   3.5f,  3.5f,  3.5f,   3.5f,
2944                     10.05f, 10.5f, 11.4f, 11.85f, 12.75f, 13.3f, 14.4f, 14.95f,
2945                      5.25f,  5.5f,  6.0f,  6.25f,  7.45f,  7.8f,  8.5f,  8.85f
2946             };
2947 
2948 
2949     return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2950             workloadFactory,
2951             memoryManager,
2952             tensorHandleFactory,
2953             inputNoQuantizedValues,
2954             inputTensorInfo,
2955             kernelNoQuantizedValues,
2956             kernelTensorInfo,
2957             outputExpectedNoQuantizedValues,
2958             outputTensorInfo,
2959             1,
2960             1,
2961             layout,
2962             biasEnabled);
2963 }
2964 
2965 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
DepthwiseConvolution2dMult2Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)2966 LayerTestResult<T, 4> DepthwiseConvolution2dMult2Test(
2967             armnn::IWorkloadFactory& workloadFactory,
2968             const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2969             const armnn::ITensorHandleFactory& tensorHandleFactory,
2970             bool biasEnabled,
2971             const armnn::DataLayout layout)
2972 {
2973     armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
2974     std::vector<float> inputNoQuantizedValues =
2975             {
2976                     10.0, 10.0, 10.0,
2977                     10.0, 10.0, 10.0,
2978                     10.0, 10.0, 10.0,
2979 
2980                     21.0, 22.0, 23.0,
2981                     24.0, 25.0, 26.0,
2982                     27.0, 28.0, 29.0
2983             };
2984 
2985     armnn::TensorInfo kernelTensorInfo({ 1, 2, 2, 4}, ArmnnType);
2986 
2987     std::vector<float> kernelNoQuantizedValues =
2988             {
2989                     0.25f, 0.25f,
2990                     0.25f, 0.25f,
2991 
2992                     0.2f , 0.0f,
2993                     0.0f , 0.0f,
2994 
2995                     0.0f , 0.0f,
2996                     0.0f , 0.1f,
2997 
2998                     0.0f , 0.3f,
2999                     0.0f , 0.0f
3000 
3001             };
3002 
3003     armnn::TensorInfo outputTensorInfo({ 1, 4, 2, 2}, ArmnnType);
3004     std::vector<float> outputExpectedNoQuantizedValues =
3005             {
3006                      4.5f, 4.5f, 4.5f,  4.5f,
3007                      5.5f, 5.5f, 5.5f,  5.5f,
3008                     5.25f, 5.5f, 6.0f, 6.25f,
3009                     7.65f, 8.0f, 8.7f, 9.05f
3010             };
3011 
3012 
3013     return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
3014             workloadFactory,
3015             memoryManager,
3016             tensorHandleFactory,
3017             inputNoQuantizedValues,
3018             inputTensorInfo,
3019             kernelNoQuantizedValues,
3020             kernelTensorInfo,
3021             outputExpectedNoQuantizedValues,
3022             outputTensorInfo,
3023             1,
3024             1,
3025             layout,
3026             biasEnabled);
3027 }
3028 
3029 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
CompareDepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,armnn::IWorkloadFactory & refWorkloadFactory,const armnn::ITensorHandleFactory & tensorHandleFactory,const armnn::ITensorHandleFactory & refTensorHandleFactory,const armnnUtils::DataLayoutIndexed & layout)3030 LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(
3031     armnn::IWorkloadFactory& workloadFactory,
3032     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3033     armnn::IWorkloadFactory& refWorkloadFactory,
3034     const armnn::ITensorHandleFactory& tensorHandleFactory,
3035     const armnn::ITensorHandleFactory& refTensorHandleFactory,
3036     const armnnUtils::DataLayoutIndexed& layout)
3037 {
3038     unsigned int inputHeight = 8;
3039     unsigned int inputWidth = 16;
3040     unsigned int inputChannels = 3;
3041     unsigned int inputNum = 5;
3042 
3043     unsigned int kernelHeight = 3;
3044     unsigned int kernelWidth = 3;
3045     unsigned int channelMultiplier = 1;
3046 
3047     unsigned int strideX = 2;
3048     unsigned int strideY = 3;
3049     unsigned int padX = 1;
3050     unsigned int padY = 1;
3051 
3052     unsigned int outputNum = inputNum;
3053     unsigned int outputChannels = inputChannels * channelMultiplier;
3054     unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
3055     unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
3056 
3057     armnn::TensorInfo inputTensorInfo;
3058     armnn::TensorInfo outputTensorInfo;
3059     armnn::TensorInfo kernelDesc;
3060     armnn::TensorInfo biasDesc;
3061 
3062     std::vector<unsigned int> inputShape;
3063     std::vector<unsigned int> outputShape;
3064     std::vector<unsigned int> kernelShape{ 1, kernelHeight, kernelWidth, outputChannels };
3065     std::vector<unsigned int> biasShape{ outputChannels };
3066     switch (layout.GetDataLayout())
3067     {
3068         case armnn::DataLayout::NCHW:
3069             inputShape =  { inputNum, inputChannels, inputHeight, inputWidth };
3070             outputShape = { outputNum, outputChannels, outputHeight, outputWidth };
3071             break;
3072         case armnn::DataLayout ::NHWC:
3073             inputShape =  { inputNum, inputHeight, inputWidth, inputChannels };
3074             outputShape = { outputNum, outputHeight, outputWidth, outputChannels };
3075             break;
3076         default:
3077             throw armnn::InvalidArgumentException("unknown data layout ["
3078                                                   + std::to_string(static_cast<int>(layout.GetDataLayout())) + "]");
3079     }
3080 
3081     float inputsQScale = armnn::IsQuantizedType<T>() ? 1.0f : 0;
3082     float outputQScale = armnn::IsQuantizedType<T>() ? 2.0f : 0;
3083     int32_t qOffset = 0;
3084 
3085     inputTensorInfo = armnn::TensorInfo(4, inputShape.data(), ArmnnType, inputsQScale, qOffset);
3086     outputTensorInfo = armnn::TensorInfo(4, outputShape.data(), ArmnnType, outputQScale, qOffset);
3087     kernelDesc = armnn::TensorInfo(4, kernelShape.data(), ArmnnType, inputsQScale, qOffset);
3088     biasDesc = armnn::TensorInfo(1, biasShape.data(), armnn::GetBiasDataType(ArmnnType), inputsQScale, qOffset);
3089 
3090     auto input  = MakeRandomTensor<T>(inputTensorInfo, 124908, 0.0f, 255.0f);
3091     auto kernel = MakeRandomTensor<T>(kernelDesc, 891234, 0.0f, 255.0f);
3092     auto bias   = MakeRandomTensor<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasDesc, 1028, 0.0f, 255.0f);
3093 
3094     armnn::TensorInfo aclKernelDescriptor = kernelDesc;
3095     std::vector<T> aclKernelData;
3096     aclKernelData.assign(kernel.data(), kernel.data() + kernelHeight * kernelWidth * outputChannels);
3097     if (workloadFactory.GetBackendId() == armnn::BackendId("GpuAcc") ||
3098         workloadFactory.GetBackendId() == armnn::BackendId("CpuAcc"))
3099     {
3100         if (layout == armnn::DataLayout::NCHW)
3101         {
3102             std::vector<T> tmp(kernel.size());
3103             aclKernelDescriptor.SetShape(armnnUtils::Permuted(kernelDesc.GetShape(), {0, 2, 3, 1}));
3104             armnnUtils::Permute(kernelDesc.GetShape(), {0, 2, 3, 1}, kernel.data(), tmp.data(), sizeof(T));
3105             aclKernelData = tmp;
3106         }
3107     }
3108 
3109     std::vector<T> actualOutput(outputTensorInfo.GetNumElements());
3110     std::vector<T> expectedOutput(outputTensorInfo.GetNumElements());
3111 
3112     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
3113     std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(aclKernelDescriptor);
3114     std::unique_ptr<armnn::ITensorHandle> biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
3115     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
3116 
3117     armnn::DepthwiseConvolution2dQueueDescriptor data;
3118     armnn::WorkloadInfo info;
3119 
3120     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
3121     AddInputToWorkload(data, info, aclKernelDescriptor, weightsHandle.get());
3122     AddInputToWorkload(data, info, biasDesc, biasHandle.get());
3123     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
3124 
3125     AllocateAndCopyDataToITensorHandle(weightsHandle.get(), aclKernelData.data());
3126     AllocateAndCopyDataToITensorHandle(biasHandle.get(), bias.data());
3127 
3128     data.m_Parameters.m_StrideX = strideX;
3129     data.m_Parameters.m_StrideY = strideY;
3130     data.m_Parameters.m_PadLeft = padX;
3131     data.m_Parameters.m_PadRight = padX;
3132     data.m_Parameters.m_PadTop = padY;
3133     data.m_Parameters.m_PadBottom = padY;
3134     data.m_Parameters.m_BiasEnabled = true;
3135     data.m_Parameters.m_DataLayout = layout.GetDataLayout();
3136 
3137     std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refTensorHandleFactory.CreateTensorHandle(outputTensorInfo);
3138     std::unique_ptr<armnn::ITensorHandle> weightsHandleRef = refTensorHandleFactory.CreateTensorHandle(kernelDesc);
3139     std::unique_ptr<armnn::ITensorHandle> biasHandleRef = refTensorHandleFactory.CreateTensorHandle(biasDesc);
3140     std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refTensorHandleFactory.CreateTensorHandle(inputTensorInfo);
3141 
3142     armnn::DepthwiseConvolution2dQueueDescriptor refData = data;
3143     armnn::WorkloadInfo refInfo = info;
3144     SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
3145     SetWorkloadInput(refData, refInfo, 1, kernelDesc, weightsHandleRef.get());
3146     SetWorkloadInput(refData, refInfo, 2, biasDesc, biasHandleRef.get());
3147     SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
3148 
3149     std::unique_ptr<armnn::IWorkload> workload
3150             = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, data, info);
3151     std::unique_ptr<armnn::IWorkload> workloadRef
3152             = refWorkloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, refData, refInfo);
3153 
3154     outputHandleRef->Allocate();
3155     weightsHandleRef->Allocate();
3156     biasHandleRef->Allocate();
3157     inputHandleRef->Allocate();
3158 
3159     inputHandle->Allocate();
3160     outputHandle->Allocate();
3161 
3162     CopyDataToITensorHandle(inputHandle.get(), input.data());
3163     CopyDataToITensorHandle(inputHandleRef.get(), input.data());
3164     CopyDataToITensorHandle(weightsHandleRef.get(), kernel.data());
3165     CopyDataToITensorHandle(biasHandleRef.get(), bias.data());
3166 
3167     ExecuteWorkload(*workload, memoryManager);
3168 
3169     workloadRef->PostAllocationConfigure();
3170     workloadRef->Execute();
3171 
3172     CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
3173     CopyDataFromITensorHandle(expectedOutput.data(), outputHandleRef.get());
3174 
3175     return LayerTestResult<T, 4>(actualOutput,
3176                                  expectedOutput,
3177                                  outputHandle->GetShape(),
3178                                  outputTensorInfo.GetShape());
3179 }
3180 
3181 //
3182 // Explicit template specializations
3183 //
3184 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3185 Convolution2d3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3186     armnn::IWorkloadFactory&,
3187     const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3188     const armnn::ITensorHandleFactory&,
3189     bool,
3190     armnn::DataLayout);
3191 
3192 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3193 Convolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3194     armnn::IWorkloadFactory&,
3195     const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3196     const armnn::ITensorHandleFactory&,
3197     bool,
3198     armnn::DataLayout);
3199 
3200 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4>
3201 Convolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmS8, armnn::DataType::Signed32>(
3202         armnn::IWorkloadFactory&,
3203         const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3204         const armnn::ITensorHandleFactory&,
3205         bool,
3206         armnn::DataLayout);
3207 
3208 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
3209 Convolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3210     armnn::IWorkloadFactory&,
3211     const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3212     const armnn::ITensorHandleFactory&,
3213     bool,
3214     armnn::DataLayout);
3215 
3216 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
3217 Convolution2d3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3218     armnn::IWorkloadFactory&,
3219     const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3220     const armnn::ITensorHandleFactory&,
3221     bool,
3222     armnn::DataLayout);
3223 
3224 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3225 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3226     armnn::IWorkloadFactory&,
3227     const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3228     const armnn::ITensorHandleFactory&,
3229     bool,
3230     armnn::DataLayout);
3231 
3232 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3233 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3234     armnn::IWorkloadFactory&,
3235     const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3236     const armnn::ITensorHandleFactory&,
3237     bool,
3238     armnn::DataLayout);
3239 
3240 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4>
3241 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmS8, armnn::DataType::Signed32>(
3242         armnn::IWorkloadFactory&,
3243         const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3244         const armnn::ITensorHandleFactory&,
3245         bool,
3246         armnn::DataLayout);
3247 
3248 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
3249 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3250     armnn::IWorkloadFactory&,
3251     const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3252     const armnn::ITensorHandleFactory&,
3253     bool,
3254     armnn::DataLayout);
3255 
3256 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
3257 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3258     armnn::IWorkloadFactory&,
3259     const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3260     const armnn::ITensorHandleFactory&,
3261     bool,
3262     armnn::DataLayout);
3263 
3264 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3265 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3266     armnn::IWorkloadFactory &workloadFactory,
3267     const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3268     const armnn::ITensorHandleFactory& tensorHandleFactory,
3269     bool biasEnabled,
3270     const armnn::DataLayout layout);
3271 
3272 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3273 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3274     armnn::IWorkloadFactory &workloadFactory,
3275     const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3276     const armnn::ITensorHandleFactory& tensorHandleFactory,
3277     bool biasEnabled,
3278     const armnn::DataLayout layout);
3279 
3280 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4>
3281 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QAsymmS8, armnn::DataType::Signed32>(
3282         armnn::IWorkloadFactory &workloadFactory,
3283         const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3284         const armnn::ITensorHandleFactory& tensorHandleFactory,
3285         bool biasEnabled,
3286         const armnn::DataLayout layout);
3287 
3288 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
3289 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3290     armnn::IWorkloadFactory &workloadFactory,
3291     const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3292     const armnn::ITensorHandleFactory& tensorHandleFactory,
3293     bool biasEnabled,
3294     const armnn::DataLayout layout);
3295 
3296 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
3297 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3298     armnn::IWorkloadFactory &workloadFactory,
3299     const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3300     const armnn::ITensorHandleFactory& tensorHandleFactory,
3301     bool biasEnabled,
3302     const armnn::DataLayout layout);
3303 
3304 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3305 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3306         armnn::IWorkloadFactory&,
3307         const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3308         const armnn::ITensorHandleFactory&,
3309         bool,
3310         armnn::DataLayout);
3311 
3312 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3313 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3314         armnn::IWorkloadFactory&,
3315         const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3316         const armnn::ITensorHandleFactory&,
3317         bool,
3318         armnn::DataLayout);
3319 
3320 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4>
3321 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmS8, armnn::DataType::Signed32>(
3322         armnn::IWorkloadFactory&,
3323         const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3324         const armnn::ITensorHandleFactory&,
3325         bool,
3326         armnn::DataLayout);
3327 
3328 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
3329 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3330         armnn::IWorkloadFactory&,
3331         const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3332         const armnn::ITensorHandleFactory&,
3333         bool,
3334         armnn::DataLayout);
3335 
3336 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
3337 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3338         armnn::IWorkloadFactory&,
3339         const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3340         const armnn::ITensorHandleFactory&,
3341         bool,
3342         armnn::DataLayout);
3343 
3344 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3345 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3346         armnn::IWorkloadFactory&,
3347         const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3348         const armnn::ITensorHandleFactory&,
3349         bool,
3350         armnn::DataLayout);
3351 
3352 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3353 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3354         armnn::IWorkloadFactory&,
3355         const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3356         const armnn::ITensorHandleFactory&,
3357         bool,
3358         armnn::DataLayout);
3359 
3360 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4>
3361 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmS8, armnn::DataType::Signed32>(
3362         armnn::IWorkloadFactory&,
3363         const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3364         const armnn::ITensorHandleFactory&,
3365         bool,
3366         armnn::DataLayout);
3367 
3368 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
3369 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3370         armnn::IWorkloadFactory&,
3371         const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3372         const armnn::ITensorHandleFactory&,
3373         bool,
3374         armnn::DataLayout);
3375 
3376 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
3377 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3378         armnn::IWorkloadFactory&,
3379         const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3380         const armnn::ITensorHandleFactory&,
3381         bool,
3382         armnn::DataLayout);
3383 
3384 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3385 DepthwiseConvolution2dMult4Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3386         armnn::IWorkloadFactory &workloadFactory,
3387         const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3388         const armnn::ITensorHandleFactory& tensorHandleFactory,
3389         bool biasEnabled,
3390         const armnn::DataLayout layout);
3391 
3392 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3393 DepthwiseConvolution2dMult4Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3394         armnn::IWorkloadFactory &workloadFactory,
3395         const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3396         const armnn::ITensorHandleFactory& tensorHandleFactory,
3397         bool biasEnabled,
3398         const armnn::DataLayout layout);
3399 
3400 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3401 DepthwiseConvolution2dMult2Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3402         armnn::IWorkloadFactory &workloadFactory,
3403         const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3404         const armnn::ITensorHandleFactory& tensorHandleFactory,
3405         bool biasEnabled,
3406         const armnn::DataLayout layout);
3407 
3408 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3409 DepthwiseConvolution2dMult2Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3410         armnn::IWorkloadFactory &workloadFactory,
3411         const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3412         const armnn::ITensorHandleFactory& tensorHandleFactory,
3413         bool biasEnabled,
3414         const armnn::DataLayout layout);
3415 
3416 //
3417 // Implementation functions
3418 //
3419 
SimpleConvolution2d3x5Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3420 LayerTestResult<float, 4> SimpleConvolution2d3x5Test(
3421     armnn::IWorkloadFactory& workloadFactory,
3422     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3423     const armnn::ITensorHandleFactory& tensorHandleFactory,
3424     bool biasEnabled,
3425     const armnn::DataLayout layout)
3426 {
3427     return SimpleConvolution2d3x5TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3428         workloadFactory, memoryManager, tensorHandleFactory, 0.f, 0, biasEnabled, layout);
3429 }
3430 
SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3431 LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(
3432     armnn::IWorkloadFactory& workloadFactory,
3433     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3434     const armnn::ITensorHandleFactory& tensorHandleFactory,
3435     bool biasEnabled,
3436     const armnn::DataLayout layout)
3437 {
3438     return SimpleConvolution2d3x5TestCommon<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3439         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3440 }
3441 
SimpleConvolution2d3x3Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3442 LayerTestResult<float, 4> SimpleConvolution2d3x3Test(
3443     armnn::IWorkloadFactory& workloadFactory,
3444     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3445     const armnn::ITensorHandleFactory& tensorHandleFactory,
3446     bool biasEnabled,
3447     const armnn::DataLayout layout)
3448 {
3449     return SimpleConvolution2d3x3TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3450         workloadFactory, memoryManager, tensorHandleFactory, 0.f, 0, biasEnabled, layout);
3451 }
3452 
SimpleConvolution2d3x3NhwcTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled)3453 LayerTestResult<float, 4> SimpleConvolution2d3x3NhwcTest(
3454     armnn::IWorkloadFactory& workloadFactory,
3455     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3456     const armnn::ITensorHandleFactory& tensorHandleFactory,
3457     bool biasEnabled)
3458 {
3459     return SimpleConvolution2d3x3NhwcTestCommon<armnn::DataType::Float32>(
3460         workloadFactory,
3461         memoryManager,
3462         tensorHandleFactory,
3463         0.f,
3464         0,
3465         biasEnabled,
3466         armnn::DataLayout::NHWC);
3467 }
3468 
SimpleConvolution2d3x3Stride2x2Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3469 LayerTestResult<float, 4> SimpleConvolution2d3x3Stride2x2Test(
3470         armnn::IWorkloadFactory& workloadFactory,
3471         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3472         const armnn::ITensorHandleFactory& tensorHandleFactory,
3473         bool biasEnabled,
3474         const armnn::DataLayout layout)
3475 {
3476     return SimpleConvolution2d3x3Stride2x2TestCommon<armnn::DataType::Float32>(
3477         workloadFactory,
3478         memoryManager,
3479         tensorHandleFactory,
3480         0.f,
3481         0,
3482         biasEnabled,
3483         layout);
3484 }
3485 
SimpleConvolution2d3x3Uint8Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3486 LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(
3487     armnn::IWorkloadFactory& workloadFactory,
3488     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3489     const armnn::ITensorHandleFactory& tensorHandleFactory,
3490     bool biasEnabled,
3491     const armnn::DataLayout layout)
3492 {
3493     return SimpleConvolution2d3x3TestCommon<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3494         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3495 }
3496 
SimpleConvolution2d3x5QSymm16Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3497 LayerTestResult<int16_t, 4> SimpleConvolution2d3x5QSymm16Test(
3498     armnn::IWorkloadFactory& workloadFactory,
3499     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3500     const armnn::ITensorHandleFactory& tensorHandleFactory,
3501     bool biasEnabled,
3502     const armnn::DataLayout layout)
3503 {
3504     return SimpleConvolution2d3x5TestCommon<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3505         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3506 }
3507 
SimpleConvolution2d3x3QSymm16Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3508 LayerTestResult<int16_t, 4> SimpleConvolution2d3x3QSymm16Test(
3509     armnn::IWorkloadFactory& workloadFactory,
3510     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3511     const armnn::ITensorHandleFactory& tensorHandleFactory,
3512     bool biasEnabled,
3513     const armnn::DataLayout layout)
3514 {
3515     return SimpleConvolution2d3x3TestCommon<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3516             workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3517 }
3518 
Convolution2dAsymmetricPaddingTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,armnn::DataLayout layout)3519 LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(
3520     armnn::IWorkloadFactory& workloadFactory,
3521     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3522     const armnn::ITensorHandleFactory& tensorHandleFactory,
3523     armnn::DataLayout layout)
3524 {
3525     return SimpleConvolution2dAsymmetricPaddingTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3526             workloadFactory, memoryManager, tensorHandleFactory, layout, 0.0f, 0);
3527 }
3528 
Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,armnn::DataLayout layout)3529 LayerTestResult<float, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(
3530     armnn::IWorkloadFactory& workloadFactory,
3531     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3532     const armnn::ITensorHandleFactory& tensorHandleFactory,
3533     armnn::DataLayout layout)
3534 {
3535     return Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon
3536             <armnn::DataType::Float32, armnn::DataType::Float32>(
3537             workloadFactory, memoryManager, tensorHandleFactory, layout, 0.0f, 0);
3538 }
3539 
Convolution1dTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled)3540 LayerTestResult<float, 4> Convolution1dTest(
3541     armnn::IWorkloadFactory& workloadFactory,
3542     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3543     const armnn::ITensorHandleFactory& tensorHandleFactory,
3544     bool biasEnabled)
3545 {
3546     return Convolution1dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3547             workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, biasEnabled);
3548 }
3549 
Convolution1dUint8Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled)3550 LayerTestResult<uint8_t, 4> Convolution1dUint8Test(
3551     armnn::IWorkloadFactory& workloadFactory,
3552     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3553     const armnn::ITensorHandleFactory& tensorHandleFactory,
3554     bool biasEnabled)
3555 {
3556     return Convolution1dTestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3557             workloadFactory, memoryManager, tensorHandleFactory, 0.1f, 128, biasEnabled);
3558 }
3559 
Convolution2dPerAxisQuantTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const armnn::DataLayout layout)3560 LayerTestResult<uint8_t, 4> Convolution2dPerAxisQuantTest(
3561     armnn::IWorkloadFactory& workloadFactory,
3562     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3563     const armnn::ITensorHandleFactory& tensorHandleFactory,
3564     const armnn::DataLayout layout)
3565 {
3566     using namespace armnn;
3567 
3568     const DataType inputType  = DataType::QAsymmU8;
3569     const DataType kernelType = DataType::QSymmS8;
3570     const DataType biasType   = DataType::Signed32;
3571 
3572     TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128);
3573     TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128);
3574 
3575     const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f };
3576     constexpr unsigned int quantDimension = 0;
3577 
3578     TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension);
3579 
3580     const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f };
3581     TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension);
3582 
3583     std::vector<uint8_t> inputData =
3584     {
3585         138, 108, 138, 108, 138, 108
3586     };
3587 
3588     std::vector<int8_t> kernelData =
3589     {
3590         1, 2, 1, 2, 1, 2
3591     };
3592 
3593     std::vector<int32_t> biasData =
3594     {
3595         4, 4, 4
3596     };
3597 
3598     std::vector<uint8_t> expectedOutputData =
3599     {
3600         121, 118, 115, 121, 118, 115, 121, 118, 115
3601     };
3602 
3603     if (layout == DataLayout::NCHW)
3604     {
3605         PermuteTensorNhwcToNchw(inputInfo, inputData);
3606         PermuteTensorNhwcToNchw(kernelInfo, kernelData);
3607         PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
3608     }
3609 
3610     std::vector<uint8_t> actualOutput(outputInfo.GetNumElements());
3611 
3612     Convolution2dDescriptor descriptor;
3613     descriptor.m_StrideX     = 1;
3614     descriptor.m_StrideY     = 1;
3615     descriptor.m_PadLeft     = 0;
3616     descriptor.m_PadRight    = 0;
3617     descriptor.m_PadTop      = 0;
3618     descriptor.m_PadBottom   = 0;
3619     descriptor.m_BiasEnabled = true;
3620     descriptor.m_DataLayout  = layout;
3621 
3622     std::unique_ptr<ITensorHandle> inputHandle  = tensorHandleFactory.CreateTensorHandle(inputInfo);
3623     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
3624     std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelInfo);
3625     std::unique_ptr<armnn::ITensorHandle> biasHandle = nullptr;
3626 
3627     WorkloadInfo workloadInfo;
3628 //    ScopedTensorHandle weightTensor(kernelInfo);
3629 //    ScopedTensorHandle biasTensor(biasInfo);
3630 //
3631 //    AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
3632 //    AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
3633 
3634     Convolution2dQueueDescriptor queueDescriptor;
3635     queueDescriptor.m_Parameters = descriptor;
3636 
3637     AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
3638     AddInputToWorkload(queueDescriptor, workloadInfo, kernelInfo, weightsHandle.get());
3639 
3640     if (descriptor.m_BiasEnabled)
3641     {
3642         biasHandle = tensorHandleFactory.CreateTensorHandle(biasInfo);
3643         AddInputToWorkload(queueDescriptor, workloadInfo, biasInfo, biasHandle.get());
3644     }
3645 
3646     AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
3647 
3648     std::unique_ptr<IWorkload> workload= workloadFactory.CreateWorkload(armnn::LayerType::Convolution2d,
3649                                                                         queueDescriptor,
3650                                                                         workloadInfo);
3651     inputHandle->Allocate();
3652     outputHandle->Allocate();
3653     weightsHandle->Allocate();
3654 
3655     if (descriptor.m_BiasEnabled)
3656     {
3657         biasHandle->Allocate();
3658         CopyDataToITensorHandle(biasHandle.get(), biasData.data());
3659     }
3660     CopyDataToITensorHandle(inputHandle.get(), inputData.data());
3661     CopyDataToITensorHandle(weightsHandle.get(), kernelData.data());
3662 
3663 
3664     ExecuteWorkload(*workload, memoryManager);
3665 
3666     CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
3667 
3668     return LayerTestResult<uint8_t, 4>(actualOutput,
3669                                        expectedOutputData,
3670                                        outputHandle->GetShape(),
3671                                        outputInfo.GetShape());
3672 }
3673 
CompareConvolution2dTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,armnn::IWorkloadFactory & refWorkloadFactory,const armnn::ITensorHandleFactory & tensorHandleFactory,const armnn::ITensorHandleFactory & refTensorHandleFactory)3674 LayerTestResult<float,4> CompareConvolution2dTest(
3675     armnn::IWorkloadFactory& workloadFactory,
3676     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3677     armnn::IWorkloadFactory& refWorkloadFactory,
3678     const armnn::ITensorHandleFactory& tensorHandleFactory,
3679     const armnn::ITensorHandleFactory& refTensorHandleFactory)
3680 {
3681     return CompareConvolution2dTestImpl<armnn::DataType::Float32>(
3682             workloadFactory, memoryManager, refWorkloadFactory, tensorHandleFactory, refTensorHandleFactory);
3683 }
3684 
DepthwiseConvolution2dTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3685 LayerTestResult<float, 4> DepthwiseConvolution2dTest(
3686     armnn::IWorkloadFactory& workloadFactory,
3687     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3688     const armnn::ITensorHandleFactory& tensorHandleFactory,
3689     bool biasEnabled,
3690     const armnn::DataLayout layout)
3691 {
3692     return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3693         workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, biasEnabled, layout);
3694 }
3695 
DepthwiseConvolution2dDepthNhwcTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled)3696 LayerTestResult<float, 4> DepthwiseConvolution2dDepthNhwcTest(
3697     armnn::IWorkloadFactory& workloadFactory,
3698     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3699     const armnn::ITensorHandleFactory& tensorHandleFactory,
3700     bool biasEnabled)
3701 {
3702     return DepthwiseConvolution2dNhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3703         workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, biasEnabled);
3704 }
3705 
DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3706 LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(
3707     armnn::IWorkloadFactory& workloadFactory,
3708     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3709     const armnn::ITensorHandleFactory& tensorHandleFactory,
3710     bool biasEnabled,
3711     const armnn::DataLayout layout)
3712 {
3713     return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3714         workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, biasEnabled, layout);
3715 }
3716 
DepthwiseConvolution2dDepthMul64Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)3717 LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul64Test(
3718     armnn::IWorkloadFactory& workloadFactory,
3719     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3720     const armnn::ITensorHandleFactory& tensorHandleFactory)
3721 {
3722     armnn::TensorInfo inputTensorInfo({ 1, 1, 2, 2 }, armnn::DataType::Float32);
3723     std::vector<float> input = { 1.f, 2.f, 3.f, 4.f };
3724 
3725     std::vector<float> kernelData;
3726     std::vector<float> singleDepthKernel{ 1.f, -1.f, -1.f, 1.f };
3727     for (unsigned int i = 0; i < 64; ++i)
3728     {
3729         kernelData.insert(kernelData.end(), singleDepthKernel.begin(), singleDepthKernel.end());
3730     }
3731     armnn::TensorInfo kernelTensorInfo({ 64, 1, 2, 2 }, armnn::DataType::Float32);
3732 
3733     // permute from [O,1,H,W] --> [1,H,W,O]
3734     armnn::PermutationVector permutationVector {3,0,1,2};
3735     kernelTensorInfo = armnnUtils::Permuted(kernelTensorInfo, permutationVector);
3736     std::vector<float> kernelPermuted(kernelTensorInfo.GetNumElements());
3737     armnnUtils::Permute(kernelTensorInfo.GetShape(), permutationVector,
3738                         kernelData.data(), kernelPermuted.data(),
3739                         GetDataTypeSize(kernelTensorInfo.GetDataType()));
3740 
3741     std::vector<float> expectedOutputData(64, 0.f);
3742     armnn::TensorInfo outputTensorInfo({ 1, 64, 1, 1 }, armnn::DataType::Float32);
3743 
3744     return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3745             workloadFactory,
3746             memoryManager,
3747             tensorHandleFactory,
3748             input,
3749             kernelPermuted,
3750             std::vector<float>(),
3751             expectedOutputData,
3752             inputTensorInfo.GetShape(),
3753             kernelTensorInfo.GetShape(),
3754             outputTensorInfo.GetShape(),
3755             0.f,
3756             0,
3757             armnn::DataLayout::NCHW);
3758 }
3759 
DepthwiseConvolution2dAsymmetricTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3760 LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(
3761     armnn::IWorkloadFactory& workloadFactory,
3762     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3763     const armnn::ITensorHandleFactory& tensorHandleFactory,
3764     bool biasEnabled,
3765     const armnn::DataLayout layout)
3766 {
3767     return DepthwiseConvolution2dAsymmetricTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3768         workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, biasEnabled, layout);
3769 }
3770 
DepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3771 LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(
3772     armnn::IWorkloadFactory& workloadFactory,
3773     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3774     const armnn::ITensorHandleFactory& tensorHandleFactory,
3775     bool biasEnabled,
3776     const armnn::DataLayout layout)
3777 {
3778     return DepthwiseConvolution2dTestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3779         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3780 }
3781 
DepthwiseConvolution2dDepthMul1Uint8Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3782 LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(
3783     armnn::IWorkloadFactory& workloadFactory,
3784     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3785     const armnn::ITensorHandleFactory& tensorHandleFactory,
3786     bool biasEnabled,
3787     const armnn::DataLayout layout)
3788 {
3789     return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3790         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3791 }
3792 
SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)3793 LayerTestResult<float, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest(
3794         armnn::IWorkloadFactory& workloadFactory,
3795         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3796         const armnn::ITensorHandleFactory& tensorHandleFactory)
3797 {
3798     return SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3799             workloadFactory,
3800             memoryManager,
3801             tensorHandleFactory,
3802             0.f,
3803             0,
3804             false);
3805 }
3806 
DepthwiseConvolution2dInt16Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3807 LayerTestResult<int16_t, 4> DepthwiseConvolution2dInt16Test(
3808         armnn::IWorkloadFactory& workloadFactory,
3809         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3810         const armnn::ITensorHandleFactory& tensorHandleFactory,
3811         bool biasEnabled,
3812         const armnn::DataLayout layout)
3813 {
3814     return DepthwiseConvolution2dTestImpl<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3815         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3816 }
3817 
DepthwiseConvolution2dDepthMul1Int16Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3818 LayerTestResult<int16_t, 4> DepthwiseConvolution2dDepthMul1Int16Test(
3819                 armnn::IWorkloadFactory& workloadFactory,
3820                 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3821                 const armnn::ITensorHandleFactory& tensorHandleFactory,
3822                 bool biasEnabled,
3823                 const armnn::DataLayout layout)
3824 {
3825     return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3826         workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3827 }
3828 
DepthwiseConvolution2dPerAxisQuantTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const armnn::DataLayout layout)3829 LayerTestResult<uint8_t, 4> DepthwiseConvolution2dPerAxisQuantTest(
3830         armnn::IWorkloadFactory& workloadFactory,
3831         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3832         const armnn::ITensorHandleFactory& tensorHandleFactory,
3833         const armnn::DataLayout layout)
3834 {
3835     using namespace armnn;
3836 
3837     const DataType inputType  = DataType::QAsymmU8;
3838     const DataType kernelType = DataType::QSymmS8;
3839     const DataType biasType   = DataType::Signed32;
3840 
3841     TensorInfo inputInfo ({ 1, 3, 3, 2 }, inputType, 0.5f, 128); // N H W C
3842     TensorInfo outputInfo({ 1, 2, 2, 4 }, inputType, 1.0f, 128); // N H W C
3843 
3844     const std::vector<float> quantScales{ 1.0f, 0.5f, 1.0f, 0.5f };
3845     const unsigned int quantDimension = 3;
3846     TensorInfo kernelInfo({ 1, 2, 2, 4 }, kernelType, quantScales, quantDimension); // [1, H, W, I*M]
3847 
3848     const std::vector<float> biasQuantScales{ 0.5f, 0.25f, 0.5f, 0.25f };
3849     constexpr unsigned int biasQuantDimension = 0;
3850     TensorInfo biasInfo({ 4 }, biasType, biasQuantScales, biasQuantDimension);
3851 
3852     std::vector<uint8_t> inputData =
3853     {
3854         129, 130,
3855         129, 130,
3856         129, 130,
3857         129, 130,
3858         129, 130,
3859         129, 130,
3860         129, 130,
3861         129, 130,
3862         129, 130
3863     };
3864 
3865     std::vector<int8_t> kernelData =
3866     {
3867         1, 1, 1, 1,
3868         1, 1, 1, 1,
3869         1, 1, 1, 1,
3870         1, 1, 1, 1
3871     };
3872 
3873     if (workloadFactory.GetBackendId() == armnn::BackendId("GpuAcc") ||
3874         workloadFactory.GetBackendId() == armnn::BackendId("CpuAcc"))
3875     {
3876         if (layout == armnn::DataLayout::NCHW)
3877         {
3878             std::vector<int8_t> tmp(kernelData.size());
3879             kernelInfo.SetShape(armnnUtils::Permuted(kernelInfo.GetShape(), {0, 2, 3, 1}));
3880             armnnUtils::Permute(kernelInfo.GetShape(), {0, 2, 3, 1}, kernelData.data(), tmp.data(), sizeof(int8_t));
3881             kernelData = tmp;
3882         }
3883     }
3884 
3885     std::vector<int32_t> biasData =
3886     {
3887         4, 4, 4, 4
3888     };
3889 
3890     std::vector<uint8_t> expectedOutputData =
3891     {
3892         132, 130, 134, 131,
3893         132, 130, 134, 131,
3894         132, 130, 134, 131,
3895         132, 130, 134, 131
3896     };
3897 
3898     if (layout == DataLayout::NCHW)
3899     {
3900         PermuteTensorNhwcToNchw(inputInfo, inputData);
3901         PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
3902     }
3903 
3904     std::vector<uint8_t> actualOutput(outputInfo.GetNumElements());
3905 
3906     DepthwiseConvolution2dDescriptor descriptor;
3907     descriptor.m_StrideX     = 1;
3908     descriptor.m_StrideY     = 1;
3909     descriptor.m_PadLeft     = 0;
3910     descriptor.m_PadRight    = 0;
3911     descriptor.m_PadTop      = 0;
3912     descriptor.m_PadBottom   = 0;
3913     descriptor.m_DilationX   = 1;
3914     descriptor.m_DilationY   = 1;
3915     descriptor.m_BiasEnabled = true;
3916     descriptor.m_DataLayout  = layout;
3917 
3918     std::unique_ptr<ITensorHandle> inputHandle  = tensorHandleFactory.CreateTensorHandle(inputInfo);
3919     std::unique_ptr<ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelInfo);
3920     std::unique_ptr<ITensorHandle> biasHandle = tensorHandleFactory.CreateTensorHandle(biasInfo);
3921     std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
3922 
3923     DepthwiseConvolution2dQueueDescriptor queueDescriptor;
3924     WorkloadInfo workloadInfo;
3925 
3926     AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
3927     AddInputToWorkload(queueDescriptor, workloadInfo, kernelInfo, weightsHandle.get());
3928     AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
3929     AddInputToWorkload(queueDescriptor, workloadInfo, biasInfo, biasHandle.get());
3930 
3931     AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernelData.data());
3932     AllocateAndCopyDataToITensorHandle(biasHandle.get(), biasData.data());
3933 
3934     queueDescriptor.m_Parameters = descriptor;
3935 
3936     std::unique_ptr<IWorkload> workload = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d,
3937                                                                          queueDescriptor,
3938                                                                          workloadInfo);
3939     inputHandle->Allocate();
3940     outputHandle->Allocate();
3941 
3942     CopyDataToITensorHandle(inputHandle.get(), inputData.data());
3943 
3944     ExecuteWorkload(*workload, memoryManager);
3945 
3946     LayerTestResult<uint8_t, 4> ret(outputInfo);
3947 
3948     CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
3949 
3950     return LayerTestResult<uint8_t, 4>(actualOutput,
3951                                        expectedOutputData,
3952                                        outputHandle->GetShape(),
3953                                        outputInfo.GetShape());
3954 }
3955 
CompareDepthwiseConvolution2dFloatTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,armnn::IWorkloadFactory & refWorkloadFactory,const armnn::ITensorHandleFactory & tensorHandleFactory,const armnn::ITensorHandleFactory & refTensorHandleFactory,const armnn::DataLayout layout)3956 LayerTestResult<float, 4> CompareDepthwiseConvolution2dFloatTest(
3957     armnn::IWorkloadFactory& workloadFactory,
3958     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3959     armnn::IWorkloadFactory& refWorkloadFactory,
3960     const armnn::ITensorHandleFactory& tensorHandleFactory,
3961     const armnn::ITensorHandleFactory& refTensorHandleFactory,
3962     const armnn::DataLayout layout)
3963 {
3964     return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::Float32>(
3965         workloadFactory, memoryManager, refWorkloadFactory, tensorHandleFactory, refTensorHandleFactory, layout);
3966 }
3967 
CompareDepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,armnn::IWorkloadFactory & refWorkloadFactory,const armnn::ITensorHandleFactory & tensorHandleFactory,const armnn::ITensorHandleFactory & refTensorHandleFactory,const armnn::DataLayout layout)3968 LayerTestResult<uint8_t, 4> CompareDepthwiseConvolution2dUint8Test(
3969     armnn::IWorkloadFactory& workloadFactory,
3970     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3971     armnn::IWorkloadFactory& refWorkloadFactory,
3972     const armnn::ITensorHandleFactory& tensorHandleFactory,
3973     const armnn::ITensorHandleFactory& refTensorHandleFactory,
3974     const armnn::DataLayout layout)
3975 {
3976     return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::QAsymmU8>(
3977         workloadFactory, memoryManager, refWorkloadFactory, tensorHandleFactory, refTensorHandleFactory, layout);
3978 }
3979