1 //
2 // Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "Conv2dTestImpl.hpp"
7
8 #include <armnnUtils/QuantizeHelper.hpp>
9 #include <armnnUtils/TensorUtils.hpp>
10
11 #include <armnn/utility/IgnoreUnused.hpp>
12 #include <armnn/utility/NumericCast.hpp>
13 #include <armnnUtils/DataLayoutIndexed.hpp>
14 #include <armnnUtils/Permute.hpp>
15
16 #include <armnn/backends/TensorHandle.hpp>
17
18 #include <armnnTestUtils/DataLayoutUtils.hpp>
19 #include <armnnTestUtils/TensorCopyUtils.hpp>
20 #include <armnnTestUtils/WorkloadTestUtils.hpp>
21
22 #include <armnnTestUtils/TensorHelpers.hpp>
23
24 #include <string>
25
26 //
27 // Static data
28 //
29
30 // 2-channel bias used by a number of Conv2d tests.
31 static std::vector<float> Bias2({0, 2});
32
33 static std::vector<float> Bias4({1, 2, 3, 4});
34
35 static std::vector<float> Bias8({1, 2, 3, 4, 1, 2, 3, 4});
36
37 // 3-channel 16x8 image used as common input data for a number of Conv2d tests.
38 static std::vector<float> ConvInput3x8x16({
39 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
40 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
41 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
42 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
43 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
44 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
45 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
46 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
47 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
56 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
57 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
58 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
59 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
60 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
61 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
62 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
63 });
64
65 using namespace armnnUtils;
66
67 //
68 // Helper templates
69 //
70
71 // Helper template that returns either Bias2 or an empty vector depending on whether bias is enabled.
72 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
GetBias2(bool biasEnabled,float qScale)73 std::vector<T> GetBias2(bool biasEnabled, float qScale)
74 {
75 if(biasEnabled)
76 {
77 return QuantizedVector<T>(Bias2, qScale, 0);
78 }
79 else
80 {
81 return std::vector<T>();
82 }
83 }
84
85 // Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
86 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
GetBias4(bool biasEnabled,float qScale)87 std::vector<T> GetBias4(bool biasEnabled, float qScale)
88 {
89 if(biasEnabled)
90 {
91 return QuantizedVector<T>(Bias4, qScale, 0);
92 }
93 else
94 {
95 return std::vector<T>();
96 }
97 }
98
99 // Helper template that returns either Bias8 or an empty vector depending on whether bias is enabled.
100 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
GetBias8(bool biasEnabled,float qScale)101 std::vector<T> GetBias8(bool biasEnabled, float qScale)
102 {
103 if(biasEnabled)
104 {
105 return QuantizedVector<T>(Bias8, qScale, 0);
106 }
107 else
108 {
109 return std::vector<T>();
110 }
111 }
112
113 // Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled.
114 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
GetBias(bool biasEnabled,float qScale,armnn::TensorInfo outputInfo,armnn::DataLayout layout)115 std::vector<T> GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout)
116 {
117 const armnnUtils::DataLayoutIndexed dataLayoutIndexed(layout);
118 const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
119 const unsigned int outputChannels = outputInfo.GetShape()[channelsIndex];
120
121 switch (outputChannels)
122 {
123 case 2:
124 default:
125 {
126 return GetBias2<ArmnnType>(biasEnabled, qScale);
127 }
128 case 4:
129 {
130 return GetBias4<ArmnnType>(biasEnabled, qScale);
131 }
132 case 8:
133 {
134 return GetBias8<ArmnnType>(biasEnabled, qScale);
135 }
136 }
137 }
138
139 //
140 // Implementation templates
141 //
142
143 // Mapping from input type to bias type for fully connected layers.
144 // float => float, uint8_t => int32_t
145 template<typename T>
146 struct FullyConnectedBiasTypeForInputType;
147
148 template<>
149 struct FullyConnectedBiasTypeForInputType<float>
150 {
151 using Type = float;
152 };
153
154 template<>
155 struct FullyConnectedBiasTypeForInputType<uint8_t>
156 {
157 using Type = int32_t;
158 };
159
160 // Modifies a std::vector in-place using a specified bias.
161 template<typename T, typename B>
ApplyBias(std::vector<T> & v,float vScale,int32_t vOffset,const std::vector<B> & bias,float bScale,int32_t bOffset,uint32_t w,uint32_t h)162 void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset,
163 const std::vector<B>& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h)
164 {
165 ARMNN_ASSERT_MSG((armnn::IsQuantizedType<T>() && vScale != 0.0f) || (!armnn::IsQuantizedType<T>()),
166 "Invalid type and parameter combination.");
167 ARMNN_ASSERT_MSG((armnn::IsQuantizedType<B>() && bScale != 0.0f) || (!armnn::IsQuantizedType<B>()),
168 "Invalid type and parameter combination.");
169
170 // Note we need to dequantize and re-quantize the image value and the bias.
171 for (uint32_t i = 0; i < bias.size(); ++i)
172 {
173 float dBias = SelectiveDequantize(bias[i], bScale, bOffset);
174 for (uint32_t y = 0; y < h; ++y)
175 {
176 for (uint32_t x = 0; x < w; ++x)
177 {
178 uint32_t offset = (i * h + y) * w + x;
179 ARMNN_ASSERT(offset < v.size());
180 T& outRef = v[offset];
181 float dOutput = SelectiveDequantize(outRef, vScale, vOffset);
182 outRef = SelectiveQuantize<T>(dOutput + dBias, vScale, vOffset);
183 }
184 }
185 }
186 }
187
188 //
189 // Convolution2d implementations
190 //
191
192 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
193 typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
SimpleConvolution2dTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const std::vector<T> & originalInput,const std::vector<T> & originalKernel,const std::vector<B> & bias,const std::vector<T> & originalOutputExpected,const armnn::TensorShape & originalInputShape,const armnn::TensorShape & originalKernelShape,const armnn::TensorShape & originalOutputExpectedShape,float qScale,int32_t qOffset,const armnn::DataLayout layout=armnn::DataLayout::NCHW,uint32_t padLeft=0,uint32_t padTop=0,uint32_t padRight=0,uint32_t padBottom=0,uint32_t strideX=1,uint32_t strideY=1,uint32_t dilationX=1,uint32_t dilationY=1)194 LayerTestResult<T, 4> SimpleConvolution2dTestImpl(
195 armnn::IWorkloadFactory& workloadFactory,
196 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
197 const armnn::ITensorHandleFactory& tensorHandleFactory,
198 const std::vector<T>& originalInput,
199 const std::vector<T>& originalKernel,
200 const std::vector<B>& bias,
201 const std::vector<T>& originalOutputExpected,
202 const armnn::TensorShape& originalInputShape,
203 const armnn::TensorShape& originalKernelShape,
204 const armnn::TensorShape& originalOutputExpectedShape,
205 float qScale,
206 int32_t qOffset,
207 const armnn::DataLayout layout = armnn::DataLayout::NCHW,
208 uint32_t padLeft = 0,
209 uint32_t padTop = 0,
210 uint32_t padRight = 0,
211 uint32_t padBottom = 0,
212 uint32_t strideX = 1,
213 uint32_t strideY = 1,
214 uint32_t dilationX = 1,
215 uint32_t dilationY = 1)
216 {
217 armnn::IgnoreUnused(memoryManager);
218 unsigned int inputHeight = armnn::numeric_cast<unsigned int>(originalInputShape[2]);
219 unsigned int inputWidth = armnn::numeric_cast<unsigned int>(originalInputShape[3]);
220 unsigned int inputChannels = armnn::numeric_cast<unsigned int>(originalInputShape[1]);
221 unsigned int inputNum = armnn::numeric_cast<unsigned int>(originalInputShape[0]);
222
223 unsigned int outputHeight = armnn::numeric_cast<unsigned int>(originalOutputExpectedShape[2]);
224 unsigned int outputWidth = armnn::numeric_cast<unsigned int>(originalOutputExpectedShape[3]);
225 unsigned int outputChannels = armnn::numeric_cast<unsigned int>(originalOutputExpectedShape[1]);
226 unsigned int outputNum = armnn::numeric_cast<unsigned int>(originalOutputExpectedShape[0]);
227
228 unsigned int kernelHeight = armnn::numeric_cast<unsigned int>(originalKernelShape[2]);
229 unsigned int kernelWidth = armnn::numeric_cast<unsigned int>(originalKernelShape[3]);
230 unsigned int kernelChannels = armnn::numeric_cast<unsigned int>(originalKernelShape[1]);
231 unsigned int kernelDepthMul = armnn::numeric_cast<unsigned int>(originalKernelShape[0]);
232
233 bool biasEnabled = bias.size() > 0;
234
235 // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
236 ARMNN_ASSERT(inputNum == 1);
237 ARMNN_ASSERT(outputNum == 1);
238
239 // If a bias is used, its size must equal the number of output channels.
240 ARMNN_ASSERT(!biasEnabled || bias.size() == outputChannels);
241
242 // Note these tensors will use two (identical) batches.
243 armnn::TensorInfo inputTensorInfo =
244 armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
245 armnn::TensorInfo outputTensorInfo =
246 armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
247 armnn::TensorInfo kernelDesc =
248 armnnUtils::GetTensorInfo(kernelDepthMul, kernelChannels, kernelHeight, kernelWidth, layout, ArmnnType);
249 armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
250
251 // Set quantization parameters if the requested type is a quantized type.
252 if(armnn::IsQuantizedType<T>())
253 {
254 inputTensorInfo.SetQuantizationScale(qScale);
255 inputTensorInfo.SetQuantizationOffset(qOffset);
256 outputTensorInfo.SetQuantizationScale(qScale);
257 outputTensorInfo.SetQuantizationOffset(qOffset);
258 kernelDesc.SetQuantizationScale(qScale);
259 kernelDesc.SetQuantizationOffset(qOffset);
260 biasDesc.SetQuantizationScale(qScale*qScale);
261 biasDesc.SetQuantizationOffset(0);
262 }
263
264 // Construct input data - two batches of the same input image.
265 std::vector<T> inputImage;
266 inputImage.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth);
267 std::vector<T> inputData;
268 inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
269 inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
270
271 // at this point if we require it permute the input data
272 const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
273 if (layout == armnn::DataLayout::NHWC)
274 {
275 std::vector<T> tmp(inputData.size());
276 armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
277 inputData = tmp;
278 }
279
280 std::vector<T> outputImage;
281 outputImage.assign(originalOutputExpected.data(),
282 originalOutputExpected.data() + outputChannels*outputHeight*outputWidth);
283
284 // Apply bias to output image if it is enabled.
285 if(biasEnabled)
286 {
287 std::vector<T> biasV;
288 biasV.assign(bias.data(), bias.data() + outputChannels);
289 ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
290 biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
291 outputWidth, outputHeight);
292 }
293
294 // Data will be copied from outputHandle
295 std::vector<T> actualOutput(outputTensorInfo.GetNumElements());
296
297 // Construct expected output data - two identical images.
298 std::vector<T> expectedOutput;
299 expectedOutput.insert(expectedOutput.end(), outputImage.begin(), outputImage.end());
300 expectedOutput.insert(expectedOutput.end(), outputImage.begin(), outputImage.end());
301
302 // at this point if we require it permute the expected output
303 if (layout == armnn::DataLayout::NHWC)
304 {
305 std::vector<T> tmp(expectedOutput.size());
306 armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, expectedOutput.data(), tmp.data(), sizeof(T));
307 expectedOutput = tmp;
308 }
309
310 std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
311 std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
312 std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc);
313
314 armnn::Convolution2dQueueDescriptor data;
315 armnn::WorkloadInfo info;
316
317 // Permute the kernel if necessary
318 std::vector<T> kernel = originalKernel;
319 if (layout == armnn::DataLayout::NHWC)
320 {
321 armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data(), sizeof(T));
322 }
323
324 AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
325 AddInputToWorkload(data, info, kernelDesc, weightsHandle.get());
326 AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
327
328 std::unique_ptr<armnn::ITensorHandle> biasHandle = nullptr;
329 if (biasEnabled)
330 {
331 biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
332 AddInputToWorkload(data, info, biasDesc, biasHandle.get());
333 }
334 data.m_Parameters.m_StrideX = strideX;
335 data.m_Parameters.m_StrideY = strideY;
336 data.m_Parameters.m_PadLeft = padLeft;
337 data.m_Parameters.m_PadRight = padRight;
338 data.m_Parameters.m_PadTop = padTop;
339 data.m_Parameters.m_PadBottom = padBottom;
340 data.m_Parameters.m_BiasEnabled = biasEnabled;
341 data.m_Parameters.m_DataLayout = layout;
342 data.m_Parameters.m_DilationX = dilationX;
343 data.m_Parameters.m_DilationY = dilationY;
344
345 std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateWorkload(armnn::LayerType::Convolution2d,
346 data,
347 info);
348 inputHandle->Allocate();
349 outputHandle->Allocate();
350 weightsHandle->Allocate();
351
352 if (biasEnabled)
353 {
354 biasHandle->Allocate();
355 CopyDataToITensorHandle(biasHandle.get(), bias.data());
356 }
357
358 CopyDataToITensorHandle(inputHandle.get(), inputData.data());
359 CopyDataToITensorHandle(weightsHandle.get(), kernel.data());
360
361 ExecuteWorkload(*workload, memoryManager);
362
363 CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
364
365 return LayerTestResult<T, 4>(actualOutput,
366 expectedOutput,
367 outputHandle->GetShape(),
368 outputTensorInfo.GetShape());
369 }
370
371 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
372 typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>,
373 armnn::DataType OutType = ArmnnType, typename O = armnn::ResolveType<OutType>>
SimpleConvolution2dNhwcTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const std::vector<T> & input,const std::vector<T> & kernel,const std::vector<B> & bias,const std::vector<O> & outputExpected,const armnn::TensorShape & inputShape,const armnn::TensorShape & kernelShape,const armnn::TensorShape & outputExpectedShape,const armnn::DataLayout dataLayout,float qScale,int32_t qOffset,uint32_t padLeft=1,uint32_t padTop=1,uint32_t padRight=1,uint32_t padBottom=1,uint32_t strideX=1,uint32_t strideY=1)374 LayerTestResult<O, 4> SimpleConvolution2dNhwcTestImpl(
375 armnn::IWorkloadFactory& workloadFactory,
376 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
377 const armnn::ITensorHandleFactory& tensorHandleFactory,
378 const std::vector<T>& input,
379 const std::vector<T>& kernel,
380 const std::vector<B>& bias,
381 const std::vector<O>& outputExpected,
382 const armnn::TensorShape& inputShape,
383 const armnn::TensorShape& kernelShape,
384 const armnn::TensorShape& outputExpectedShape,
385 const armnn::DataLayout dataLayout,
386 float qScale,
387 int32_t qOffset,
388 uint32_t padLeft = 1,
389 uint32_t padTop = 1,
390 uint32_t padRight = 1,
391 uint32_t padBottom = 1,
392 uint32_t strideX = 1,
393 uint32_t strideY = 1)
394 {
395 armnn::IgnoreUnused(qScale, qOffset);
396 unsigned int inputNum = armnn::numeric_cast<unsigned int>(inputShape[0]);
397 unsigned int inputChannels = armnn::numeric_cast<unsigned int>(inputShape[3]);
398 unsigned int inputHeight = armnn::numeric_cast<unsigned int>(inputShape[1]);
399 unsigned int inputWidth = armnn::numeric_cast<unsigned int>(inputShape[2]);
400
401 unsigned int kernelChanMul = armnn::numeric_cast<unsigned int>(kernelShape[0]);
402 unsigned int kernelChannels = armnn::numeric_cast<unsigned int>(kernelShape[3]);
403 unsigned int kernelHeight = armnn::numeric_cast<unsigned int>(kernelShape[1]);
404 unsigned int kernelWidth = armnn::numeric_cast<unsigned int>(kernelShape[2]);
405
406 unsigned int outputNum = armnn::numeric_cast<unsigned int>(outputExpectedShape[0]);
407 unsigned int outputChannels = armnn::numeric_cast<unsigned int>(outputExpectedShape[3]);
408 unsigned int outputHeight = armnn::numeric_cast<unsigned int>(outputExpectedShape[1]);
409 unsigned int outputWidth = armnn::numeric_cast<unsigned int>(outputExpectedShape[2]);
410
411 bool biasEnabled = bias.size() > 0;
412
413 // Creates the tensors.
414 armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, ArmnnType);
415 armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels},
416 OutType);
417 armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, ArmnnType);
418 armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
419
420 // Construct the input data.
421 std::vector<T> inputData;
422 inputData.assign(input.data(), input.data() + inputHeight*inputWidth*inputChannels);
423
424 // Construct the output data, with bias applied, as appropriate.
425 std::vector<O> outputData;
426 outputData.assign(outputExpected.data(), outputExpected.data() + outputHeight*outputWidth*outputChannels);
427
428 std::vector<O> actualOutput(outputTensorInfo.GetNumElements());
429
430 std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
431 std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
432 std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc);
433 std::unique_ptr<armnn::ITensorHandle> biasHandle = nullptr;
434
435 // armnn::ScopedTensorHandle weightsTensor(kernelDesc);
436 // AllocateAndCopyDataToITensorHandle(&weightsTensor, kernel.data());
437
438 // armnn::ScopedTensorHandle biasTensor(biasDesc);
439
440 armnn::Convolution2dQueueDescriptor data;
441
442 data.m_Parameters.m_StrideX = strideX;
443 data.m_Parameters.m_StrideY = strideY;
444 data.m_Parameters.m_PadLeft = padLeft;
445 data.m_Parameters.m_PadRight = padRight;
446 data.m_Parameters.m_PadTop = padTop;
447 data.m_Parameters.m_PadBottom = padBottom;
448 data.m_Parameters.m_BiasEnabled = biasEnabled;
449 data.m_Parameters.m_DataLayout = dataLayout;
450
451 armnn::WorkloadInfo info;
452 AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
453 AddInputToWorkload(data, info, kernelDesc, weightsHandle.get());
454 AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
455
456 if (biasEnabled)
457 {
458 biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
459 AddInputToWorkload(data, info, biasDesc, biasHandle.get());
460 }
461
462 std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateWorkload(armnn::LayerType::Convolution2d,
463 data,
464 info);
465 inputHandle->Allocate();
466 outputHandle->Allocate();
467 weightsHandle->Allocate();
468
469 if (biasEnabled)
470 {
471 biasHandle->Allocate();
472 CopyDataToITensorHandle(biasHandle.get(), bias.data());
473 }
474
475 CopyDataToITensorHandle(inputHandle.get(), inputData.data());
476 CopyDataToITensorHandle(weightsHandle.get(), kernel.data());
477
478 ExecuteWorkload(*workload, memoryManager);
479
480 CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
481
482 return LayerTestResult<O, 4>(actualOutput,
483 outputData,
484 outputHandle->GetShape(),
485 outputTensorInfo.GetShape());
486 }
487
488 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
Convolution1dTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool biasEnabled)489 LayerTestResult<T,4> Convolution1dTestImpl(
490 armnn::IWorkloadFactory& workloadFactory,
491 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
492 const armnn::ITensorHandleFactory& tensorHandleFactory,
493 float qScale,
494 int32_t qOffset,
495 bool biasEnabled)
496 {
497 using B = armnn::ResolveType<ArmnnBType>;
498 // Until we have a specialist 1D convolution layer, we can fake one using
499 // 2D convolution with the final dimension set to 1.
500 // I don't anticipate this being particularly slow, given that convolution is implemented
501 // as a matrix multiplication, at which point dimension doesn't matter.
502
503 unsigned int batchSize = 1;
504 unsigned int inputChannels = 2;
505 unsigned int outputChannels = 3;
506 unsigned int inputSize = 5; // The 1D size (could view as 'width' or 'height').
507 unsigned int kernelSize = 3;
508 unsigned int padSize = 2;
509 unsigned int stride = 1;
510 unsigned int outputSize = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride.
511
512 armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, ArmnnType);
513 armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, ArmnnType);
514 armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, ArmnnType);
515 armnn::TensorInfo biasInfo({outputChannels}, ArmnnBType);
516
517 // Set quantization parameters if the requested type is a quantized type.
518 if(armnn::IsQuantizedType<T>())
519 {
520 inputInfo.SetQuantizationScale(qScale);
521 inputInfo.SetQuantizationOffset(qOffset);
522 outputInfo.SetQuantizationScale(qScale);
523 outputInfo.SetQuantizationOffset(qOffset);
524 kernelInfo.SetQuantizationScale(qScale);
525 kernelInfo.SetQuantizationOffset(qOffset);
526 biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale());
527 biasInfo.SetQuantizationOffset(0);
528 }
529
530 std::vector<T> inputData = QuantizedVector<T>(
531 {
532 5.0f, -2.0f, 2.5f, 0.0f, 1.0f,
533 -3.0f, 3.2f, 5.0f, 2.0f, 3.0f,
534 },
535 inputInfo.GetQuantizationScale(),
536 inputInfo.GetQuantizationOffset());
537
538 std::vector<T> kernelData = QuantizedVector<T>(
539 {
540 1.0f, 0.0f, 0.0f,
541 0.0f, 2.0f, -1.5f,
542
543 0.0f, 0.0f, 0.0f,
544 0.2f, 0.2f, 0.2f,
545
546 0.5f, 0.0f, 0.5f,
547 0.0f, -1.0f, 0.0f
548 },
549 kernelInfo.GetQuantizationScale(),
550 kernelInfo.GetQuantizationOffset());
551
552 std::vector<B> biasData =
553 QuantizedVector<B>({ 1.0f, 0.0f, 0.0f }, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset());
554
555 std::vector<T> outputData = QuantizedVector<T>(
556 {
557 4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f,
558 -0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f,
559 2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f
560 },
561 outputInfo.GetQuantizationScale(),
562 outputInfo.GetQuantizationOffset());
563
564 std::vector<T> actualOutput(outputInfo.GetNumElements());
565
566 // Optionally apply bias to output image.
567 if(biasEnabled)
568 {
569 ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(),
570 biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(),
571 1, outputSize);
572 }
573
574 std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
575 std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
576 std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelInfo);
577 std::unique_ptr<armnn::ITensorHandle> biasHandle = nullptr;
578
579 armnn::Convolution2dQueueDescriptor data;
580 armnn::WorkloadInfo info;
581 // armnn::ScopedTensorHandle weightsTensor(kernelInfo);
582 // armnn::ScopedTensorHandle biasTensor(biasInfo);
583 //
584 // AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data());
585 // AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
586
587 AddInputToWorkload(data, info, inputInfo, inputHandle.get());
588 AddInputToWorkload(data, info, kernelInfo, weightsHandle.get());
589 AddOutputToWorkload(data, info, outputInfo, outputHandle.get());
590
591 data.m_Parameters.m_StrideX = 1;
592 data.m_Parameters.m_StrideY = stride;
593 data.m_Parameters.m_PadLeft = 0;
594 data.m_Parameters.m_PadRight = 0;
595 data.m_Parameters.m_PadTop = padSize;
596 data.m_Parameters.m_PadBottom = padSize;
597 data.m_Parameters.m_BiasEnabled = biasEnabled;
598
599 if (biasEnabled)
600 {
601 biasHandle = tensorHandleFactory.CreateTensorHandle(biasInfo);
602 AddInputToWorkload(data, info, biasInfo, biasHandle.get());
603 }
604
605 std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateWorkload(armnn::LayerType::Convolution2d,
606 data,
607 info);
608 inputHandle->Allocate();
609 outputHandle->Allocate();
610 weightsHandle->Allocate();
611
612 if (biasEnabled)
613 {
614 biasHandle->Allocate();
615 CopyDataToITensorHandle(biasHandle.get(), biasData.data());
616 }
617
618 CopyDataToITensorHandle(inputHandle.get(), inputData.data());
619 CopyDataToITensorHandle(weightsHandle.get(), kernelData.data());
620
621 ExecuteWorkload(*workload, memoryManager);
622
623 CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
624
625 return LayerTestResult<T, 4>(actualOutput,
626 outputData,
627 outputHandle->GetShape(),
628 outputInfo.GetShape());
629 }
630
631 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
SimpleConvolution2d3x3NhwcTestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool biasEnabled,armnn::DataLayout dataLayout)632 LayerTestResult<T, 4> SimpleConvolution2d3x3NhwcTestCommon(
633 armnn::IWorkloadFactory& workloadFactory,
634 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
635 const armnn::ITensorHandleFactory& tensorHandleFactory,
636 float qScale,
637 int32_t qOffset,
638 bool biasEnabled,
639 armnn::DataLayout dataLayout)
640 {
641 armnn::IgnoreUnused(biasEnabled);
642 // Use common single-batch 5x5 image.
643
644 armnn::TensorInfo inputDesc({ 1, 3, 4, 1 }, ArmnnType);
645 std::vector<T> input =
646 {
647 1, 5, 2, 3,
648 8, 7, 3, 6,
649 3, 3, 9, 1
650 };
651
652 // Use a 2-element batch of 3-channel 3x3 kernels.
653 armnn::TensorInfo kernelDesc({ 1, 3, 3, 1 }, ArmnnType);
654 std::vector<T> kernel =
655 {
656 4, 5, 6,
657 0, 0, 0,
658 3, 2, 1
659 };
660
661 // Expected output is 1 batch of a 5x5 image.
662 armnn::TensorInfo outputDesc({ 1, 3, 4, 1 }, ArmnnType);
663 const std::vector<float> outputData =
664 {
665 23, 41, 33, 21,
666 44, 65, 76, 52,
667 82, 85, 79, 42
668 };
669
670 return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
671 workloadFactory,
672 memoryManager,
673 tensorHandleFactory,
674 input,
675 kernel,
676 std::vector<T>(),
677 outputData,
678 inputDesc.GetShape(),
679 kernelDesc.GetShape(),
680 outputDesc.GetShape(),
681 dataLayout,
682 qScale,
683 qOffset);
684 }
685
686 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
SimpleConvolution2d3x3Stride2x2TestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool biasEnabled,const armnn::DataLayout & dataLayout)687 LayerTestResult<T, 4> SimpleConvolution2d3x3Stride2x2TestCommon(
688 armnn::IWorkloadFactory& workloadFactory,
689 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
690 const armnn::ITensorHandleFactory& tensorHandleFactory,
691 float qScale,
692 int32_t qOffset,
693 bool biasEnabled,
694 const armnn::DataLayout& dataLayout)
695 {
696 armnn::IgnoreUnused(biasEnabled);
697
698 // Input is a single-batch, 1 channel, 5x5 image.
699 armnn::TensorInfo inputDesc({ 1, 5, 5, 1 }, ArmnnType);
700 std::vector<T> input =
701 {
702 1, 5, 2, 3, 5,
703 8, 7, 3, 6, 3,
704 3, 3, 9, 1, 9,
705 4, 1, 8, 1, 3,
706 6, 8, 1, 9, 2
707 };
708
709 // Use a 3x3 kernel.
710 armnn::TensorInfo kernelDesc({ 1, 3, 3, 1 }, ArmnnType);
711 std::vector<T> kernel =
712 {
713 4, 5, 6,
714 0, 0, 0,
715 3, 2, 1
716 };
717
718 // Expected output is a single-batch, 1 channel, 3x3 image.
719 armnn::TensorInfo outputDesc({ 1, 3, 3, 1 }, ArmnnType);
720 std::vector<T> outputData =
721 {
722 23, 33, 24,
723 91, 99, 48,
724 26, 50, 19
725 };
726
727 uint32_t padLeft = 1;
728 uint32_t padTop = 1;
729 uint32_t padRight = 1;
730 uint32_t padBottom = 1;
731 uint32_t strideX = 2;
732 uint32_t strideY = 2;
733
734 return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
735 workloadFactory,
736 memoryManager,
737 tensorHandleFactory,
738 input,
739 kernel,
740 std::vector<T>(),
741 outputData,
742 inputDesc.GetShape(),
743 kernelDesc.GetShape(),
744 outputDesc.GetShape(),
745 dataLayout,
746 qScale,
747 qOffset,
748 padLeft,
749 padTop,
750 padRight,
751 padBottom,
752 strideX,
753 strideY);
754 }
755
756 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
SimpleConvolution2d3x5TestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool biasEnabled,const armnn::DataLayout layout)757 LayerTestResult<T, 4> SimpleConvolution2d3x5TestCommon(
758 armnn::IWorkloadFactory& workloadFactory,
759 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
760 const armnn::ITensorHandleFactory& tensorHandleFactory,
761 float qScale,
762 int32_t qOffset,
763 bool biasEnabled,
764 const armnn::DataLayout layout)
765 {
766 // Use common single-batch 3-channel 16x8 image.
767 armnn::TensorInfo inputDesc({ 1, 3, 8, 16 }, ArmnnType);
768 std::vector<T> input = QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset);
769
770 // Use a 2-element batch with 3-channel 3x5 kernels.
771 armnn::TensorInfo kernelDesc({ 2, 3, 5, 3 }, ArmnnType);
772 std::vector<T> kernel = QuantizedVector<T>({
773 1, 1, 1,
774 1, -1, 1,
775 1, 1, 1,
776 1, 1, 1,
777 1, 1, 1,
778
779 0, 0, 0,
780 0, 0, 0,
781 0, 0, 0,
782 0, 0, 0,
783 0, 0, 0,
784
785 2, 2, 2,
786 2, 2, 2,
787 2, 2, 2,
788 2, 2, 2,
789 2, 2, 2,
790
791
792 0, 0, 0,
793 0, 0, 0,
794 0, 0, 0,
795 0, 0, 0,
796 0, 0, 0,
797
798 1, 1, 1,
799 1, 1, 1,
800 1, 1, 1,
801 1, 1, 1,
802 1, 1, 1,
803
804 0, 0, 0,
805 0, 0, 0,
806 0, 0, 0,
807 0, 0, 0,
808 0, 0, 0
809 },
810 qScale, qOffset);
811
812 // Expected output is 2 batch elements of a 1-channel 14x4 image.
813 armnn::TensorInfo outputDesc({ 1, 2, 4, 14 }, ArmnnType);
814 std::vector<T> expectedOutput = QuantizedVector<T>({
815 -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
816 -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
817 -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
818 -23.5f, -23.5f, -23.5f,
819 -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
820 -23.5f, -23.5f, -23.5f,
821
822 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
823 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
824 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
825 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
826 },
827 qScale, qOffset);
828
829 return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
830 workloadFactory,
831 memoryManager,
832 tensorHandleFactory,
833 input,
834 kernel,
835 GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
836 expectedOutput,
837 inputDesc.GetShape(),
838 kernelDesc.GetShape(),
839 outputDesc.GetShape(),
840 qScale,
841 qOffset,
842 layout);
843 }
844
845 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
846 typename T = armnn::ResolveType<ArmnnType>>
SimpleConvolution2d3x3TestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool biasEnabled,const armnn::DataLayout layout)847 LayerTestResult<T, 4> SimpleConvolution2d3x3TestCommon(
848 armnn::IWorkloadFactory& workloadFactory,
849 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
850 const armnn::ITensorHandleFactory& tensorHandleFactory,
851 float qScale,
852 int32_t qOffset,
853 bool biasEnabled,
854 const armnn::DataLayout layout)
855 {
856 // Use a 3x3 kernel, which exercises ArmCompute's direct convolution path.
857
858 // Use common single-batch 3-channel 16x8 image.
859 armnn::TensorInfo inputDesc({ 1, 3, 8, 16 }, ArmnnType);
860 std::vector<unsigned int> inputShape = { 1, 3, 8, 16 };
861 std::vector<T> input = QuantizedVector<T>(ConvInput3x8x16, qScale, qOffset);
862
863 // Use a 2-element batch of 3-channel 3x3 kernels.
864 armnn::TensorInfo kernelDesc({ 2, 3, 3, 3 }, ArmnnType);
865 std::vector<T> kernel = QuantizedVector<T>({
866 1, 1, 1,
867 1, -1, 1,
868 1, 1, 1,
869
870 0, 0, 0,
871 0, 0, 0,
872 0, 0, 0,
873
874 2, 2, 2,
875 2, 2, 2,
876 2, 2, 2,
877
878
879 0, 0, 0,
880 0, 0, 0,
881 0, 0, 0,
882
883 1, 1, 1,
884 1, 1, 1,
885 1, 1, 1,
886
887 0, 0, 0,
888 0, 0, 0,
889 0, 0, 0
890 },
891 qScale, qOffset);
892
893 // Expected output is 1 batch of a 2-channel 14x6 image.
894 armnn::TensorInfo outputDesc({ 1, 2, 6, 14 }, ArmnnType);
895 std::vector<T> expectedOutput = QuantizedVector<T>({
896 -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
897 -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
898 -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
899 -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
900 -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
901 -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
902
903 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
904 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
905 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
906 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
907 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
908 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
909 },
910 qScale, qOffset);
911
912 return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
913 workloadFactory,
914 memoryManager,
915 tensorHandleFactory,
916 input,
917 kernel,
918 GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
919 expectedOutput,
920 inputDesc.GetShape(),
921 kernelDesc.GetShape(),
922 outputDesc.GetShape(),
923 qScale,
924 qOffset,
925 layout);
926 }
927
928 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
929 typename T = armnn::ResolveType<ArmnnType>>
Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const armnn::DataLayout layout,float qScale,int32_t qOffset)930 LayerTestResult<T, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(
931 armnn::IWorkloadFactory& workloadFactory,
932 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
933 const armnn::ITensorHandleFactory& tensorHandleFactory,
934 const armnn::DataLayout layout,
935 float qScale,
936 int32_t qOffset)
937 {
938 // Use a single-batch 1-channel 3x3 image as input.
939 armnn::TensorInfo inputDesc({ 1, 1, 3, 3 }, ArmnnType);
940 std::vector<T> input =
941 QuantizedVector<T>({
942 11,21,31,
943 12,22,32,
944 13,23,33
945 },
946 qScale, qOffset);
947
948 // Use 1 batch of a 1-channel 2x2 kernel.
949 armnn::TensorInfo kernelDesc({ 1, 1, 2, 2 }, ArmnnType);
950 std::vector<T> kernel =
951 QuantizedVector<T>({
952 -11,-21,
953 -12,-22,
954 },
955 qScale, qOffset);
956
957 // Expected output is 1 batch of a 1-channel 6x8 image.
958 // Manually calculated like this:
959 //[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..]
960 //[-11*0 -21*0 -12*0 -22*11 ; -11*0 -21*0 -12*11 -22*21 ; -11*0 -21*0 -12*21 -22*31 ; -11*0 -21*0 -12*31 -22*0 ..]
961 //[-11*0 -21*11 -12*0 -22*12 ; -11*11 -21*21 -12*12 -22*22 ; -11*21 -21*31 -12*22 -22*32 ; -11*31 -21*0 -12*32 -22*0 ..]
962 //[-11*0 -21*12 -12*0 -22*13 ; -11*12 -21*22 -12*13 -22*23 ; -11*22 -21*32 -12*23 -22*33 ; -11*32 -21*0 -12*33 -22*0 ..]
963 //[-11*0 -21*13 -12*0 -22*0 ; -11*13 -21*23 -12*0 -22*0 ; -11*23 -21*33 -12*0 -22*0 ; -11*33 -21*0 -12*0 -22*0 ..]
964 //[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..]
965 //[..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ..]
966 armnn::TensorInfo outputDesc({ 1, 1, 8, 6 }, ArmnnType);
967 std::vector<T> expectedOutput =
968 QuantizedVector<T>({
969 0, 0, 0, 0, 0, 0,
970 -242, -594, -934, -372, 0, 0,
971 -495, -1190, -1850, -725, 0, 0,
972 -538, -1256, -1916, -748, 0, 0,
973 -273, -626, -946, -363, 0, 0,
974 0, 0, 0, 0, 0, 0,
975 0, 0, 0, 0, 0, 0,
976 0, 0, 0, 0, 0, 0
977 },
978 qScale, qOffset);
979
980 return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
981 workloadFactory,
982 memoryManager,
983 tensorHandleFactory,
984 input,
985 kernel,
986 GetBias2<ArmnnBType>(false, qScale * qScale),
987 expectedOutput,
988 inputDesc.GetShape(),
989 kernelDesc.GetShape(),
990 outputDesc.GetShape(),
991 qScale,
992 qOffset,
993 layout,
994 1, // Padding left.
995 2, // Padding top.
996 3, // Padding right.
997 4); // Padding bottom.
998 }
999
1000 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
1001 typename T = armnn::ResolveType<ArmnnType>>
SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const armnn::DataLayout layout,float qScale,int32_t qOffset)1002 LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(
1003 armnn::IWorkloadFactory& workloadFactory,
1004 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1005 const armnn::ITensorHandleFactory& tensorHandleFactory,
1006 const armnn::DataLayout layout,
1007 float qScale,
1008 int32_t qOffset)
1009 {
1010 // Use a single-batch 1-channel 5x5 image as input.
1011 armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, ArmnnType);
1012 std::vector<T> input =
1013 QuantizedVector<T>({
1014 11,21,31,41,51,
1015 12,22,32,42,52,
1016 13,23,33,43,53,
1017 14,24,34,44,54,
1018 15,25,35,45,55,
1019 }, qScale, qOffset);
1020
1021 // Use 1 batch of a 1-channel 4x4 kernel.
1022 armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, ArmnnType);
1023 std::vector<T> kernel =
1024 QuantizedVector<T>({
1025 -11,-21,-31,-41,
1026 -12,-22,-32,-42,
1027 -13,-23,-33,-43,
1028 -14,-24,-34,-44,
1029 },
1030 qScale, qOffset);
1031
1032 // Expected output is 1 batch of a 1-channel 5x5 image.
1033 armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, ArmnnType);
1034 std::vector<T> expectedOutput =
1035 QuantizedVector<T>({
1036 -7140, -10580, -13940, -9300, -5230,
1037 -9590, -14120, -18520, -12290, -6860,
1038 -9980, -14560, -18960, -12560, -7000,
1039 -7518, -10904, -14144, -9318, -5152,
1040 -5032, -7256, -9376, -6142, -3368,
1041 },
1042 qScale, qOffset);
1043
1044 return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
1045 workloadFactory,
1046 memoryManager,
1047 tensorHandleFactory,
1048 input,
1049 kernel,
1050 GetBias2<ArmnnBType>(false, qScale * qScale),
1051 expectedOutput,
1052 inputDesc.GetShape(),
1053 kernelDesc.GetShape(),
1054 outputDesc.GetShape(),
1055 qScale,
1056 qOffset,
1057 layout,
1058 1, // Padding left.
1059 1, // Padding top.
1060 2, // Padding right.
1061 2); // Padding bottom.
1062 }
1063
1064 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
Convolution2d3x3DilationTestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const std::vector<float> & inputNoQuantizedValues,armnn::TensorInfo & inputTensorInfo,const std::vector<float> & kernelNoQuantizedValues,armnn::TensorInfo & kernelTensorInfo,const std::vector<float> & outputExpectedNoQuantizedValues,armnn::TensorInfo & outputTensorInfo,uint32_t dilationX,uint32_t dilationY,armnn::DataLayout layout=armnn::DataLayout::NCHW,uint32_t padLeft=0,uint32_t padTop=0,uint32_t padRight=0,uint32_t padBottom=0,uint32_t strideX=1,uint32_t strideY=1,bool biasEnabled=false)1065 LayerTestResult<T, 4> Convolution2d3x3DilationTestCommon(
1066 armnn::IWorkloadFactory& workloadFactory,
1067 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1068 const armnn::ITensorHandleFactory& tensorHandleFactory,
1069 const std::vector<float>& inputNoQuantizedValues,
1070 armnn::TensorInfo& inputTensorInfo,
1071 const std::vector<float>& kernelNoQuantizedValues,
1072 armnn::TensorInfo& kernelTensorInfo,
1073 const std::vector<float>& outputExpectedNoQuantizedValues,
1074 armnn::TensorInfo& outputTensorInfo,
1075 uint32_t dilationX,
1076 uint32_t dilationY,
1077 armnn::DataLayout layout = armnn::DataLayout::NCHW,
1078 uint32_t padLeft = 0,
1079 uint32_t padTop = 0,
1080 uint32_t padRight = 0,
1081 uint32_t padBottom = 0,
1082 uint32_t strideX = 1,
1083 uint32_t strideY = 1,
1084 bool biasEnabled = false
1085 )
1086 {
1087 float qScale;
1088 int32_t qOffset;
1089 switch (ArmnnType)
1090 {
1091 case armnn::DataType::QAsymmU8:
1092 case armnn::DataType::QAsymmS8:
1093 {
1094 qScale = 0.1f;
1095 qOffset = 128;
1096 break;
1097 }
1098 case armnn::DataType::QSymmS16:
1099 {
1100 qScale = 0.1f;
1101 qOffset = 0;
1102 break;
1103 }
1104 case armnn::DataType::Float32:
1105 default:
1106 {
1107 qScale = 0.f;
1108 qOffset = 0;
1109 break;
1110 }
1111 }
1112
1113 inputTensorInfo.SetQuantizationScale(qScale);
1114 inputTensorInfo.SetQuantizationOffset(qOffset);
1115 kernelTensorInfo.SetQuantizationScale(qScale);
1116 kernelTensorInfo.SetQuantizationOffset(qOffset);
1117 outputTensorInfo.SetQuantizationScale(qScale);
1118 outputTensorInfo.SetQuantizationOffset(qOffset);
1119
1120 auto input = QuantizedVector<T>(inputNoQuantizedValues,
1121 inputTensorInfo.GetQuantizationScale(),
1122 inputTensorInfo.GetQuantizationOffset());
1123 auto kernel = QuantizedVector<T>(kernelNoQuantizedValues,
1124 kernelTensorInfo.GetQuantizationScale(),
1125 kernelTensorInfo.GetQuantizationOffset());
1126 auto expectedOutput = QuantizedVector<T>(outputExpectedNoQuantizedValues,
1127 outputTensorInfo.GetQuantizationScale(),
1128 outputTensorInfo.GetQuantizationOffset());
1129
1130 return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
1131 workloadFactory,
1132 memoryManager,
1133 tensorHandleFactory,
1134 input,
1135 kernel,
1136 GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
1137 expectedOutput,
1138 inputTensorInfo.GetShape(),
1139 kernelTensorInfo.GetShape(),
1140 outputTensorInfo.GetShape(),
1141 qScale,
1142 qOffset,
1143 layout,
1144 padLeft,
1145 padTop,
1146 padRight,
1147 padBottom,
1148 strideX,
1149 strideY,
1150 dilationX,
1151 dilationY);
1152 }
1153
1154 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
Convolution2d3x3Dilation3x3Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)1155 LayerTestResult<T, 4> Convolution2d3x3Dilation3x3Test(
1156 armnn::IWorkloadFactory& workloadFactory,
1157 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1158 const armnn::ITensorHandleFactory& tensorHandleFactory,
1159 bool biasEnabled,
1160 const armnn::DataLayout layout)
1161 {
1162 armnn::TensorInfo inputTensorInfo({ 1, 1, 10, 10 }, ArmnnType);
1163 std::vector<float> inputNoQuantizedValues =
1164 {
1165 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1166 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1167 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1168 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1169 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1170 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1171 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1172 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1175 };
1176
1177 armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
1178 std::vector<float> kernelNoQuantizedValues =
1179 {
1180 1, 2, 3,
1181 4, 5, 6,
1182 7, 8, 9
1183 };
1184
1185 // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
1186 // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
1187 armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
1188 std::vector<float> outputExpectedNoQuantizedValues =
1189 {
1190 6., 5., 5., 5.,
1191 6., 5., 5., 5.,
1192 6., 5., 5., 5.,
1193 3., 2., 2., 2.
1194 };
1195
1196 return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1197 workloadFactory,
1198 memoryManager,
1199 tensorHandleFactory,
1200 inputNoQuantizedValues,
1201 inputTensorInfo,
1202 kernelNoQuantizedValues,
1203 kernelTensorInfo,
1204 outputExpectedNoQuantizedValues,
1205 outputTensorInfo,
1206 3,
1207 3,
1208 layout,
1209 biasEnabled);
1210 }
1211
1212 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
Convolution2d2x3x3Dilation3x3Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)1213 LayerTestResult<T, 4> Convolution2d2x3x3Dilation3x3Test(
1214 armnn::IWorkloadFactory& workloadFactory,
1215 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1216 const armnn::ITensorHandleFactory& tensorHandleFactory,
1217 bool biasEnabled,
1218 const armnn::DataLayout layout)
1219 {
1220 armnn::TensorInfo inputTensorInfo({ 1, 2, 10, 10 }, ArmnnType);
1221 std::vector<float> inputNoQuantizedValues =
1222 {
1223 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1225 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1226 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1227 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1228 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1229 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1230 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1231 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1232 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1233
1234 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1235 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1236 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1237 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1238 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1239 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1240 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1241 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1242 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1243 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1244 };
1245
1246 armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3 }, ArmnnType);
1247 std::vector<float> kernelNoQuantizedValues =
1248 {
1249 1, 2, 3,
1250 4, 5, 6,
1251 7, 8, 9,
1252
1253 1, 2, 3,
1254 4, 5, 6,
1255 7, 8, 9
1256 };
1257
1258 // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
1259 // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
1260 armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4 }, ArmnnType);
1261 std::vector<float> outputExpectedNoQuantizedValues =
1262 {
1263 12., 10., 10., 10.,
1264 12., 10., 10., 10.,
1265 12., 10., 10., 10.,
1266 6., 4., 4., 4.
1267 };
1268
1269 return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1270 workloadFactory,
1271 memoryManager,
1272 tensorHandleFactory,
1273 inputNoQuantizedValues,
1274 inputTensorInfo,
1275 kernelNoQuantizedValues,
1276 kernelTensorInfo,
1277 outputExpectedNoQuantizedValues,
1278 outputTensorInfo,
1279 3,
1280 3,
1281 layout,
1282 biasEnabled);
1283 }
1284
1285 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)1286 LayerTestResult<T, 4> Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test(
1287 armnn::IWorkloadFactory& workloadFactory,
1288 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1289 const armnn::ITensorHandleFactory& tensorHandleFactory,
1290 bool biasEnabled,
1291 const armnn::DataLayout layout)
1292 {
1293 armnn::TensorInfo inputTensorInfo({ 1, 1, 10, 10 }, ArmnnType);
1294 std::vector<float> inputNoQuantizedValues =
1295 {
1296 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1297 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1298 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1299 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1300 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1301 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1302 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1303 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1304 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1305 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
1306 };
1307
1308 armnn::TensorInfo kernelTensorInfo({ 1, 1, 2, 2 }, ArmnnType);
1309 std::vector<float> kernelNoQuantizedValues =
1310 {
1311 1, 2,
1312 3, 4
1313 };
1314
1315 // Since the dilation rate is 2 this will dilate the kernel to be like 3x3: d(K-1)+1 --> 2 x (2-1) + 1 = 3,
1316 // therefore the output will be 4x4: (I − K + 2P)/S +1 => trunc ( (10 - 3 + 2x2 ) / 3 + 1 )
1317 // where, dilation size = d = 2; kernel size = K = 2; input size = I = 10; padding size = P = 2; stride = S = 3
1318 armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
1319 std::vector<float> outputExpectedNoQuantizedValues =
1320 {
1321 4, 7, 7, 3,
1322 6, 10, 10, 4,
1323 6, 10, 10, 4,
1324 2, 3, 3, 1
1325 };
1326 uint32_t padLeft = 1;
1327 uint32_t padTop = 1;
1328 uint32_t padRight = 1;
1329 uint32_t padBottom = 1;
1330
1331 return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1332 workloadFactory,
1333 memoryManager,
1334 tensorHandleFactory,
1335 inputNoQuantizedValues,
1336 inputTensorInfo,
1337 kernelNoQuantizedValues,
1338 kernelTensorInfo,
1339 outputExpectedNoQuantizedValues,
1340 outputTensorInfo,
1341 2,
1342 2,
1343 layout,
1344 padLeft,
1345 padTop,
1346 padRight,
1347 padBottom,
1348 3,
1349 3,
1350 biasEnabled
1351 );
1352 }
1353
1354 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
CompareConvolution2dTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,armnn::IWorkloadFactory & refWorkloadFactory,const armnn::ITensorHandleFactory & tensorHandleFactory,const armnn::ITensorHandleFactory & refTensorHandleFactory)1355 LayerTestResult<T,4> CompareConvolution2dTestImpl(
1356 armnn::IWorkloadFactory& workloadFactory,
1357 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1358 armnn::IWorkloadFactory& refWorkloadFactory,
1359 const armnn::ITensorHandleFactory& tensorHandleFactory,
1360 const armnn::ITensorHandleFactory& refTensorHandleFactory)
1361 {
1362 unsigned int inputHeight = 8;
1363 unsigned int inputWidth = 16;
1364 unsigned int inputChannels = 3;
1365 unsigned int inputNum = 5;
1366
1367 unsigned int kernelHeight = 3;
1368 unsigned int kernelWidth = 3;
1369
1370 unsigned int strideX = 2;
1371 unsigned int strideY = 3;
1372 unsigned int padX = 1;
1373 unsigned int padY = 1;
1374
1375 unsigned int outputNum = inputNum;
1376 unsigned int outputChannels = 2;
1377 unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
1378 unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
1379
1380 armnn::TensorInfo inputTensorInfo;
1381 armnn::TensorInfo outputTensorInfo;
1382 armnn::TensorInfo kernelDesc;
1383 armnn::TensorInfo biasDesc;
1384
1385 unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth};
1386 unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth};
1387 unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth};
1388 unsigned int biasShape[] = {outputChannels};
1389
1390 inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
1391 outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
1392 kernelDesc = armnn::TensorInfo(4, kernelShape, ArmnnType);
1393 biasDesc = armnn::TensorInfo(1, biasShape, ArmnnType);
1394
1395 auto input = MakeRandomTensor<T>(inputTensorInfo, 124908);
1396 auto kernel = MakeRandomTensor<T>(kernelDesc, 891234);
1397 auto bias = MakeRandomTensor<T>(biasDesc, 1028);
1398
1399 std::vector<T> actualOutput(outputTensorInfo.GetNumElements());
1400 std::vector<T> expectedOutput(outputTensorInfo.GetNumElements());
1401
1402 std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
1403 std::unique_ptr<armnn::ITensorHandle> biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
1404 std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc);
1405 std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
1406
1407 armnn::Convolution2dQueueDescriptor data;
1408 armnn::WorkloadInfo info;
1409
1410 AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1411 AddInputToWorkload(data, info, kernelDesc, weightsHandle.get());
1412 AddInputToWorkload(data, info, biasDesc, biasHandle.get());
1413 AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1414
1415 AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernel.data());
1416 AllocateAndCopyDataToITensorHandle(biasHandle.get(), bias.data());
1417
1418 data.m_Parameters.m_StrideX = strideX;
1419 data.m_Parameters.m_StrideY = strideY;
1420 data.m_Parameters.m_PadLeft = padX;
1421 data.m_Parameters.m_PadRight = padX;
1422 data.m_Parameters.m_PadTop = padY;
1423 data.m_Parameters.m_PadBottom = padY;
1424 data.m_Parameters.m_BiasEnabled = true;
1425
1426 std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refTensorHandleFactory.CreateTensorHandle(outputTensorInfo);
1427 std::unique_ptr<armnn::ITensorHandle> weightsHandleRef = refTensorHandleFactory.CreateTensorHandle(kernelDesc);
1428 std::unique_ptr<armnn::ITensorHandle> biasHandleRef = refTensorHandleFactory.CreateTensorHandle(biasDesc);
1429 std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refTensorHandleFactory.CreateTensorHandle(inputTensorInfo);
1430
1431 armnn::Convolution2dQueueDescriptor refData = data;
1432 armnn::WorkloadInfo refInfo = info;
1433 SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
1434 SetWorkloadInput(refData, refInfo, 1, kernelDesc, weightsHandleRef.get());
1435 SetWorkloadInput(refData, refInfo, 2, biasDesc, biasHandleRef.get());
1436 SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
1437
1438 std::unique_ptr<armnn::IWorkload> workload
1439 = workloadFactory.CreateWorkload(armnn::LayerType::Convolution2d, data, info);
1440 std::unique_ptr<armnn::IWorkload> workloadRef
1441 = refWorkloadFactory.CreateWorkload(armnn::LayerType::Convolution2d, refData, refInfo);
1442
1443 outputHandleRef->Allocate();
1444 inputHandleRef->Allocate();
1445 weightsHandleRef->Allocate();
1446 biasHandleRef->Allocate();
1447
1448 inputHandle->Allocate();
1449 outputHandle->Allocate();
1450
1451 CopyDataToITensorHandle(inputHandle.get(), input.data());
1452 CopyDataToITensorHandle(inputHandleRef.get(), input.data());
1453 CopyDataToITensorHandle(weightsHandleRef.get(), kernel.data());
1454 CopyDataToITensorHandle(biasHandleRef.get(), bias.data());
1455
1456 ExecuteWorkload(*workload, memoryManager);
1457
1458 workloadRef->PostAllocationConfigure();
1459 workloadRef->Execute();
1460
1461 CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
1462 CopyDataFromITensorHandle(expectedOutput.data(), outputHandleRef.get());
1463
1464 return LayerTestResult<T, 4>(actualOutput,
1465 expectedOutput,
1466 outputHandle->GetShape(),
1467 outputTensorInfo.GetShape());
1468 }
1469
Convolution2d3x3Stride2x2BFloat16Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout & dataLayout)1470 LayerTestResult<float, 4> Convolution2d3x3Stride2x2BFloat16Test(
1471 armnn::IWorkloadFactory& workloadFactory,
1472 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1473 const armnn::ITensorHandleFactory& tensorHandleFactory,
1474 bool biasEnabled,
1475 const armnn::DataLayout& dataLayout)
1476 {
1477 // BFloat16 input and weight, Float32 output
1478 armnn::IgnoreUnused(biasEnabled);
1479
1480 // Input is a single-batch, 1 channel, 5x5 image.
1481 armnn::TensorInfo inputDesc({ 1, 5, 5, 1 }, armnn::DataType::BFloat16);
1482
1483 std::vector<armnn::BFloat16> inputValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
1484 {
1485 10.0367984f, // 10.0625
1486 2.0380895f, // 2.03125
1487 15.0420157f, // 15.0625
1488 22.0675631f, // 22.125
1489 8.0938920f, // 8.125
1490 5.0476106f, // 5.0625
1491 80.1035490f, // 80
1492 100.1260370f, // 100
1493 55.0461647f, // 55
1494 120.0883828f, // 120
1495 9.1159540f, // 9.125
1496 90.0498519f, // 90
1497 200.0104630f, // 200
1498 30.0154114f, // 30
1499 75.00137681f, // 75
1500 30.0344238f, // 30
1501 25.0356445f, // 25
1502 130.0495605f, // 130
1503 60.0683594f, // 60
1504 35.0991211f, // 35
1505 8.0461426f, // 8.0625
1506 12.0996094f, // 12.125
1507 98.1269530f, // 98
1508 125.0393066f, // 125
1509 5.103516f // 5.0937
1510 },
1511 1.0f, 0);
1512
1513 // Use a 3x3 kernel.
1514 armnn::TensorInfo kernelDesc({1, 3, 3, 1}, armnn::DataType::BFloat16);
1515
1516 std::vector<armnn::BFloat16> kernelValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
1517 {
1518 -0.126184f, // -0.125977
1519 -0.150468f, // -0.150391
1520 -0.101412f, // -0.101562
1521 -0.0586369f,// -0.0585938
1522 -0.0865864f,// -0.0864258
1523 -0.0435089f,// -0.043457
1524 0.0347555f, // 0.034668
1525 0.0323111f, // 0.0322266
1526 0.0385381f // 0.0385742
1527 },
1528 1.0f, 0);
1529
1530 // Expected output is a single-batch, 1 channel, 3x3 image.
1531 armnn::TensorInfo outputDesc({ 1, 3, 3, 1 }, armnn::DataType::Float32);
1532
1533 // Expected output (with results if calculated as FP32 in the comments)
1534 const std::vector<float> outputData =
1535 {
1536 2.296875f, // 2.29240716
1537 5.75f, // 5.75851926
1538 3.78125f, // 3.79855026
1539 -11.625f, // -11.65498118
1540 -47.25f, // -47.27316893
1541 -30.0f, // -30.04771684
1542 -8.25f, // -8.28126168
1543 -43.5f, // -43.46531337
1544 -20.625f // -20.63477281
1545 };
1546
1547 uint32_t padLeft = 1;
1548 uint32_t padTop = 1;
1549 uint32_t padRight = 1;
1550 uint32_t padBottom = 1;
1551 uint32_t strideX = 2;
1552 uint32_t strideY = 2;
1553
1554 return SimpleConvolution2dNhwcTestImpl
1555 <armnn::DataType::BFloat16, armnn::DataType::Float32, armnn::BFloat16, float, armnn::DataType::Float32, float>(
1556 workloadFactory,
1557 memoryManager,
1558 tensorHandleFactory,
1559 inputValues,
1560 kernelValues,
1561 std::vector<float>(),
1562 outputData,
1563 inputDesc.GetShape(),
1564 kernelDesc.GetShape(),
1565 outputDesc.GetShape(),
1566 dataLayout,
1567 1.0f,
1568 0,
1569 padLeft,
1570 padTop,
1571 padRight,
1572 padBottom,
1573 strideX,
1574 strideY);
1575 }
1576
Convolution2d3x3Stride2x2BFloat16SmallValueTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout & dataLayout)1577 LayerTestResult<float, 4> Convolution2d3x3Stride2x2BFloat16SmallValueTest(
1578 armnn::IWorkloadFactory& workloadFactory,
1579 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1580 const armnn::ITensorHandleFactory& tensorHandleFactory,
1581 bool biasEnabled,
1582 const armnn::DataLayout& dataLayout)
1583 {
1584 // BFloat16 input and weight, Float32 output
1585 armnn::IgnoreUnused(biasEnabled);
1586
1587 // Input is a single-batch, 1 channel, 5x5 image.
1588 armnn::TensorInfo inputDesc({1, 5, 5, 1}, armnn::DataType::BFloat16);
1589
1590 std::vector<armnn::BFloat16> inputValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
1591 {
1592 0.0367984f, // 0.0368652
1593 0.0380895f, // 0.0380859
1594 0.0420157f, // 0.0419922
1595 0.0675631f, // 0.0673828
1596 0.0938920f, // 0.09375
1597 0.0476106f, // 0.0476074
1598 0.1035490f, // 0.103516
1599 0.1260370f, // 0.125977
1600 0.0461647f, // 0.0461426
1601 0.0883828f, // 0.0883789
1602 0.1159540f, // 0.115723
1603 0.0498519f, // 0.0498047
1604 0.0104630f, // 0.010437
1605 0.0154114f, // 0.0154419
1606 0.00137681f, // 0.00137329
1607 0.0344238f, // 0.0344616
1608 0.0356445f, // 0.0355693
1609 0.0495605f, // 0.0495018
1610 0.0683594f, // 0.0683308
1611 0.0991211f, // 0.0988837
1612 0.0461426f, // 0.0461838
1613 0.0996094f, // 0.0997546
1614 0.1269530f, // 0.127099
1615 0.0393066f, // 0.0392791
1616 0.103516f // 0.103641
1617 },
1618 1.0f, 0);
1619
1620 // Use a 3x3 kernel.
1621 armnn::TensorInfo kernelDesc({1, 3, 3, 1}, armnn::DataType::BFloat16);
1622
1623 std::vector<armnn::BFloat16> kernelValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
1624 {
1625 -0.126184f, // -0.125977
1626 -0.150468f, // -0.150391
1627 -0.101412f, // -0.101562
1628 -0.0586369f,// -0.0585938
1629 -0.0865864f,// -0.0864258
1630 -0.0435089f,// -0.043457
1631 0.0347555f, // 0.034668
1632 0.0323111f, // 0.0322266
1633 0.0385381f // 0.0385742
1634 },
1635 1.0f, 0);
1636
1637 // Expected output is a single-batch, 1 channel, 3x3 image.
1638 armnn::TensorInfo outputDesc({1, 3, 3, 1}, armnn::DataType::Float32);
1639
1640 // Expected output (with results if calculated as FP32 in the comments)
1641 const std::vector<float> outputData =
1642 {
1643 0.000686645508f, // 0.000685
1644 0.000640869141f, // 0.000639
1645 -0.00759887695f, // -0.007631
1646 -0.02734375f, // -0.027388
1647 -0.0356445312f, // -0.035737
1648 -0.0145874023f, // -0.014568
1649 -0.0170898438f, // -0.017124
1650 -0.0373535156f, // -0.037431
1651 -0.0346679688f // -0.034808
1652 };
1653
1654 uint32_t padLeft = 1;
1655 uint32_t padTop = 1;
1656 uint32_t padRight = 1;
1657 uint32_t padBottom = 1;
1658 uint32_t strideX = 2;
1659 uint32_t strideY = 2;
1660
1661 return SimpleConvolution2dNhwcTestImpl
1662 <armnn::DataType::BFloat16, armnn::DataType::Float32, armnn::BFloat16, float, armnn::DataType::Float32, float>(
1663 workloadFactory,
1664 memoryManager,
1665 tensorHandleFactory,
1666 inputValues,
1667 kernelValues,
1668 std::vector<float>(),
1669 outputData,
1670 inputDesc.GetShape(),
1671 kernelDesc.GetShape(),
1672 outputDesc.GetShape(),
1673 dataLayout,
1674 1.0f,
1675 0,
1676 padLeft,
1677 padTop,
1678 padRight,
1679 padBottom,
1680 strideX,
1681 strideY);
1682 }
1683
1684 //
1685 // DepthwiseConvolution2d implementations
1686 //
1687
1688 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
1689 typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
DepthwiseConvolution2dAsymmetricTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const std::vector<T> & input,const std::vector<T> & kernel,const std::vector<B> & bias,const std::vector<T> & outputExpected,const armnn::TensorShape & inputShape,const armnn::TensorShape & kernelShape,const armnn::TensorShape & outputExpectedShape,float qScale,int32_t qOffset,const armnn::DataLayout layout,uint32_t padLeft=0,uint32_t padTop=0,uint32_t padRight=0,uint32_t padBottom=0,uint32_t strideX=1,uint32_t strideY=1)1690 LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(
1691 armnn::IWorkloadFactory& workloadFactory,
1692 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1693 const armnn::ITensorHandleFactory& tensorHandleFactory,
1694 const std::vector<T>& input,
1695 const std::vector<T>& kernel,
1696 const std::vector<B>& bias,
1697 const std::vector<T>& outputExpected,
1698 const armnn::TensorShape& inputShape,
1699 const armnn::TensorShape& kernelShape,
1700 const armnn::TensorShape& outputExpectedShape,
1701 float qScale,
1702 int32_t qOffset,
1703 const armnn::DataLayout layout,
1704 uint32_t padLeft = 0,
1705 uint32_t padTop = 0,
1706 uint32_t padRight = 0,
1707 uint32_t padBottom = 0,
1708 uint32_t strideX = 1,
1709 uint32_t strideY = 1)
1710 {
1711 unsigned int inputNum = armnn::numeric_cast<unsigned int>(inputShape[0]);
1712 unsigned int inputChannels = armnn::numeric_cast<unsigned int>(inputShape[1]);
1713 unsigned int inputHeight = armnn::numeric_cast<unsigned int>(inputShape[2]);
1714 unsigned int inputWidth = armnn::numeric_cast<unsigned int>(inputShape[3]);
1715 unsigned int kernelHeight = armnn::numeric_cast<unsigned int>(kernelShape[1]);
1716 unsigned int kernelWidth = armnn::numeric_cast<unsigned int>(kernelShape[2]);
1717 unsigned int kernelChannels = armnn::numeric_cast<unsigned int>(kernelShape[3]);
1718 unsigned int outputNum = armnn::numeric_cast<unsigned int>(outputExpectedShape[0]);
1719 unsigned int outputChannels = armnn::numeric_cast<unsigned int>(outputExpectedShape[1]);
1720 unsigned int outputHeight = armnn::numeric_cast<unsigned int>(outputExpectedShape[2]);
1721 unsigned int outputWidth = armnn::numeric_cast<unsigned int>(outputExpectedShape[3]);
1722
1723 // If a bias is used, its size must equal the number of output channels.
1724 bool biasEnabled = bias.size() > 0;
1725 ARMNN_ASSERT(!biasEnabled || bias.size() == outputChannels);
1726
1727 // Creates the tensors.
1728 armnn::TensorInfo inputTensorInfo =
1729 armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
1730 armnn::TensorInfo outputTensorInfo =
1731 armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
1732 armnn::TensorInfo kernelDesc({1, kernelHeight, kernelWidth, kernelChannels}, ArmnnType);
1733 armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
1734
1735 // Set quantization parameters if the requested type is a quantized type.
1736 if (armnn::IsQuantizedType<T>())
1737 {
1738 inputTensorInfo.SetQuantizationScale(qScale);
1739 inputTensorInfo.SetQuantizationOffset(qOffset);
1740 outputTensorInfo.SetQuantizationScale(qScale);
1741 outputTensorInfo.SetQuantizationOffset(qOffset);
1742 kernelDesc.SetQuantizationScale(qScale);
1743 kernelDesc.SetQuantizationOffset(qOffset);
1744 biasDesc.SetQuantizationScale(qScale*qScale);
1745 biasDesc.SetQuantizationOffset(0);
1746 }
1747
1748 // Construct the input data.
1749 std::vector<T> inputData;
1750 inputData.assign(input.data(), input.data() + inputChannels*inputHeight*inputWidth);
1751
1752 // At this point if we require it permute the input data
1753 const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
1754 if (layout == armnn::DataLayout::NHWC)
1755 {
1756 std::vector<T> tmp(inputData.size());
1757 armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
1758 inputData = tmp;
1759 }
1760
1761 std::vector<T> kernelData;
1762 kernelData.assign(kernel.data(), kernel.data() + kernelHeight * kernelWidth * outputChannels);
1763 if (workloadFactory.GetBackendId() == armnn::BackendId("GpuAcc") ||
1764 workloadFactory.GetBackendId() == armnn::BackendId("CpuAcc"))
1765 {
1766 if (layout == armnn::DataLayout::NCHW)
1767 {
1768 std::vector<T> tmp(kernelData.size());
1769 kernelDesc.SetShape(armnnUtils::Permuted(kernelDesc.GetShape(), {0, 2, 3, 1}));
1770 armnnUtils::Permute(kernelDesc.GetShape(), {0, 2, 3, 1}, kernelData.data(), tmp.data(), sizeof(T));
1771 kernelData = tmp;
1772 }
1773 }
1774
1775 // Construct the output data, with bias applied, as appropriate.
1776 std::vector<T> outputData;
1777 outputData.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth);
1778 if (biasEnabled)
1779 {
1780 std::vector<T> biasV;
1781 biasV.assign(bias.data(), bias.data() + outputChannels);
1782 ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
1783 biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
1784 outputWidth, outputHeight);
1785 }
1786
1787 std::vector<T> actualOutput(outputTensorInfo.GetNumElements());
1788
1789 // At this point if we require it permute the expected output
1790 if (layout == armnn::DataLayout::NHWC)
1791 {
1792 std::vector<T> tmp(outputData.size());
1793 armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
1794 outputData = tmp;
1795 }
1796
1797 std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
1798 std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc);
1799 std::unique_ptr<armnn::ITensorHandle> biasHandle = nullptr;
1800 std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
1801
1802 armnn::DepthwiseConvolution2dQueueDescriptor data;
1803 armnn::WorkloadInfo info;
1804
1805 AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernelData.data()); // required for ConstantTensor
1806
1807 AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1808 AddInputToWorkload(data, info, kernelDesc, weightsHandle.get());
1809 AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1810
1811 armnn::ScopedTensorHandle biasTensor(biasDesc);
1812 if (biasEnabled)
1813 {
1814 AllocateAndCopyDataToITensorHandle(&biasTensor, bias.data());
1815
1816 biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
1817 AllocateAndCopyDataToITensorHandle(biasHandle.get(), bias.data());
1818 AddInputToWorkload(data, info, biasDesc, biasHandle.get());
1819 }
1820
1821 data.m_Parameters.m_StrideX = strideX;
1822 data.m_Parameters.m_StrideY = strideY;
1823 data.m_Parameters.m_PadLeft = padLeft;
1824 data.m_Parameters.m_PadRight = padRight;
1825 data.m_Parameters.m_PadTop = padTop;
1826 data.m_Parameters.m_PadBottom = padBottom;
1827 data.m_Parameters.m_BiasEnabled = biasEnabled;
1828 data.m_Parameters.m_DataLayout = layout;
1829
1830 std::unique_ptr<armnn::IWorkload> workload
1831 = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, data, info);
1832
1833 inputHandle->Allocate();
1834 outputHandle->Allocate();
1835
1836 CopyDataToITensorHandle(inputHandle.get(), inputData.data());
1837
1838 ExecuteWorkload(*workload, memoryManager);
1839
1840 CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
1841
1842 return LayerTestResult<T, 4>(actualOutput,
1843 outputData,
1844 outputHandle->GetShape(),
1845 outputTensorInfo.GetShape());
1846 }
1847
1848 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
DepthwiseConvolution2dDepthMul1TestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool biasEnabled,const armnn::DataLayout layout)1849 LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(
1850 armnn::IWorkloadFactory& workloadFactory,
1851 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1852 const armnn::ITensorHandleFactory& tensorHandleFactory,
1853 float qScale,
1854 int32_t qOffset,
1855 bool biasEnabled,
1856 const armnn::DataLayout layout)
1857 {
1858 using B = armnn::ResolveType<ArmnnBType>;
1859
1860 unsigned int inputHeight = 3;
1861 unsigned int inputWidth = 3;
1862 unsigned int inputChannels = 2;
1863 unsigned int inputNum = 1;
1864
1865 unsigned int kernelHeight = 3;
1866 unsigned int kernelWidth = 3;
1867
1868 unsigned int outputHeight = 1;
1869 unsigned int outputWidth = 1;
1870 unsigned int outputChannels = inputChannels;
1871 unsigned int outputNum = inputNum;
1872
1873 armnn::TensorInfo inputTensorInfo =
1874 armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
1875 armnn::TensorInfo outputTensorInfo =
1876 armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
1877 armnn::TensorInfo kernelDesc({1, kernelHeight, kernelWidth, outputChannels},
1878 ArmnnType);
1879 armnn::TensorInfo biasDesc({ outputChannels }, ArmnnBType);
1880
1881 // Set quantization parameters if the requested type is a quantized type.
1882 if(armnn::IsQuantizedType<T>())
1883 {
1884 inputTensorInfo.SetQuantizationScale(qScale);
1885 inputTensorInfo.SetQuantizationOffset(qOffset);
1886 outputTensorInfo.SetQuantizationScale(qScale);
1887 outputTensorInfo.SetQuantizationOffset(qOffset);
1888 kernelDesc.SetQuantizationScale(qScale);
1889 kernelDesc.SetQuantizationOffset(qOffset);
1890 biasDesc.SetQuantizationScale(qScale*qScale);
1891 biasDesc.SetQuantizationOffset(0);
1892 }
1893 std::vector<T> inputData = std::vector<T>(
1894 QuantizedVector<T>({
1895 1.f, 2.f, 1.f,
1896 2.f, 1.f, 2.f,
1897 1.f, 2.f, 1.f,
1898
1899 1.f, 2.f, 1.f,
1900 2.f, 1.f, 2.f,
1901 1.f, 2.f, 1.f,
1902 },
1903 inputTensorInfo.GetQuantizationScale(),
1904 inputTensorInfo.GetQuantizationOffset()));
1905
1906 // at this point if we require it permute the input data
1907 const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
1908 if (layout == armnn::DataLayout::NHWC)
1909 {
1910 std::vector<T> tmp(inputData.size());
1911 armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
1912 inputData = tmp;
1913 }
1914
1915 std::vector<B> biasV(QuantizedVector<B>({ 0, 2 },
1916 biasDesc.GetQuantizationScale(),
1917 biasDesc.GetQuantizationOffset()));
1918
1919 std::vector<T> kernelData = std::vector<T>(
1920 QuantizedVector<T>({
1921 1.f, 0.f, 1.f,
1922 0.f, 0.f, 0.f,
1923 -1.f, 0.f, -1.f,
1924
1925 1.f, 0.f, 1.f,
1926 0.f, 0.f, 0.f,
1927 -1.f, 0.f, -1.f,
1928 },
1929 kernelDesc.GetQuantizationScale(),
1930 kernelDesc.GetQuantizationOffset()));
1931
1932 if (workloadFactory.GetBackendId() == armnn::BackendId("GpuAcc") ||
1933 workloadFactory.GetBackendId() == armnn::BackendId("CpuAcc"))
1934 {
1935 if (layout == armnn::DataLayout::NCHW)
1936 {
1937 std::vector<T> tmp(kernelData.size());
1938 kernelDesc.SetShape(armnnUtils::Permuted(kernelDesc.GetShape(), {0, 2, 3, 1}));
1939 armnnUtils::Permute(kernelDesc.GetShape(), {0, 2, 3, 1}, kernelData.data(), tmp.data(), sizeof(T));
1940 kernelData = tmp;
1941 }
1942 }
1943
1944 // Manually calculated.
1945 std::vector<T> outputImage(
1946 QuantizedVector<T>({ 0.f, 0.f },
1947 outputTensorInfo.GetQuantizationScale(),
1948 outputTensorInfo.GetQuantizationOffset())
1949 );
1950
1951 // Optionally apply bias to output image.
1952 if(biasEnabled)
1953 {
1954 ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
1955 biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
1956 outputWidth, outputHeight);
1957 }
1958
1959 if (layout == armnn::DataLayout::NHWC)
1960 {
1961 std::vector<T> tmp(outputImage.size());
1962 armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data(), sizeof(T));
1963 outputImage = tmp;
1964 }
1965
1966 std::vector<T> actualOutput(outputTensorInfo.GetNumElements());
1967
1968 std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
1969 std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc);
1970 std::unique_ptr<armnn::ITensorHandle> biasHandle = nullptr;
1971 std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
1972
1973 armnn::DepthwiseConvolution2dQueueDescriptor data;
1974 armnn::WorkloadInfo info;
1975
1976 AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernelData.data()); // required for ConstantTensor
1977
1978 AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1979 AddInputToWorkload(data, info, kernelDesc, weightsHandle.get());
1980 AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1981
1982 armnn::ScopedTensorHandle biasTensor(biasDesc);
1983 if (biasEnabled)
1984 {
1985 AllocateAndCopyDataToITensorHandle(&biasTensor, biasV.data());
1986
1987 biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
1988 AllocateAndCopyDataToITensorHandle(biasHandle.get(), biasV.data());
1989 AddInputToWorkload(data, info, biasDesc, biasHandle.get());
1990 }
1991
1992 data.m_Parameters.m_StrideX = 1;
1993 data.m_Parameters.m_StrideY = 1;
1994 data.m_Parameters.m_PadLeft = 0;
1995 data.m_Parameters.m_PadRight = 0;
1996 data.m_Parameters.m_PadTop = 0;
1997 data.m_Parameters.m_PadBottom = 0;
1998 data.m_Parameters.m_BiasEnabled = biasEnabled;
1999 data.m_Parameters.m_DataLayout = layout;
2000
2001 std::unique_ptr<armnn::IWorkload> workload
2002 = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, data, info);
2003
2004 inputHandle->Allocate();
2005 outputHandle->Allocate();
2006
2007 CopyDataToITensorHandle(inputHandle.get(), inputData.data());
2008
2009 ExecuteWorkload(*workload, memoryManager);
2010
2011 CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
2012
2013 return LayerTestResult<T, 4>(actualOutput,
2014 outputImage,
2015 outputHandle->GetShape(),
2016 outputTensorInfo.GetShape());
2017 }
2018
2019 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool biasEnabled,const armnn::DataLayout layout)2020 LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
2021 armnn::IWorkloadFactory& workloadFactory,
2022 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2023 const armnn::ITensorHandleFactory& tensorHandleFactory,
2024 float qScale,
2025 int32_t qOffset,
2026 bool biasEnabled,
2027 const armnn::DataLayout layout)
2028 {
2029 using B = armnn::ResolveType<ArmnnBType>;
2030
2031 unsigned int depthMultiplier = 2;
2032
2033 unsigned int inputHeight = 8;
2034 unsigned int inputWidth = 16;
2035 unsigned int inputChannels = 2;
2036 unsigned int inputBatchSize = 1;
2037
2038 unsigned int kernelHeight = 5;
2039 unsigned int kernelWidth = 3;
2040
2041 unsigned int outputHeight = inputHeight - kernelHeight + 1 + 2;
2042 unsigned int outputWidth = (inputWidth - kernelWidth + 1)/2;
2043 unsigned int outputChannels = inputChannels * depthMultiplier;
2044 unsigned int outputBatchSize = inputBatchSize;
2045
2046 armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo(
2047 inputBatchSize, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
2048 armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo(
2049 outputBatchSize, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
2050 armnn::TensorInfo kernelDesc({1, kernelHeight, kernelWidth, outputChannels},
2051 ArmnnType);
2052 armnn::TensorInfo biasDesc({outputChannels}, ArmnnBType);
2053
2054 // Set quantization parameters if the requested type is a quantized type.
2055 if(armnn::IsQuantizedType<T>())
2056 {
2057 inputTensorInfo.SetQuantizationScale(qScale);
2058 inputTensorInfo.SetQuantizationOffset(qOffset);
2059 outputTensorInfo.SetQuantizationScale(qScale);
2060 outputTensorInfo.SetQuantizationOffset(qOffset);
2061 kernelDesc.SetQuantizationScale(qScale);
2062 kernelDesc.SetQuantizationOffset(qOffset);
2063 biasDesc.SetQuantizationScale(qScale*qScale);
2064 biasDesc.SetQuantizationOffset(0);
2065 }
2066
2067 // NOTE: originalInputData is in NCHW format
2068 std::vector<T> originalInputData = std::vector<T>(
2069 QuantizedVector<T>({
2070 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
2071 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2072 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
2073 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
2074 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
2075 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
2076 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
2077 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
2078 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2079 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2080 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2081 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2082 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2083 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2084 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2085 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
2086 },
2087 inputTensorInfo.GetQuantizationScale(),
2088 inputTensorInfo.GetQuantizationOffset()));
2089
2090 std::vector<T> inputData = originalInputData;
2091 // at this point if we require it permute the input data
2092 const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
2093 if (layout == armnn::DataLayout::NHWC)
2094 {
2095 armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC,
2096 originalInputData.data(), inputData.data(), sizeof(T));
2097 }
2098
2099 std::vector<B> biasV = QuantizedVector<B>({ 0, 2, 1, -1 },
2100 biasDesc.GetQuantizationScale(),
2101 biasDesc.GetQuantizationOffset());
2102
2103 std::vector<T> kernelData = std::vector<T>(
2104 QuantizedVector<T>({
2105 1, 1, 1,
2106 1, -1, 1,
2107 1, 1, 1,
2108 1, 1, 1,
2109 1, 1, 1,
2110
2111 2, 2, 2,
2112 2, 2, 2,
2113 2, 2, 2,
2114 2, 2, 2,
2115 2, 2, 2,
2116
2117 0, 0, 0,
2118 0, -1, 0,
2119 0, 0, 0,
2120 0, 0, 0,
2121 0, 0, 0,
2122
2123 0, 0, 0,
2124 0, 0, 0,
2125 0, 1, 0,
2126 0, 0, 0,
2127 0, 0, 0
2128 },
2129 kernelDesc.GetQuantizationScale(),
2130 kernelDesc.GetQuantizationOffset()));
2131
2132 if (workloadFactory.GetBackendId() == armnn::BackendId("GpuAcc") ||
2133 workloadFactory.GetBackendId() == armnn::BackendId("CpuAcc"))
2134 {
2135 if (layout == armnn::DataLayout::NCHW)
2136 {
2137 std::vector<T> tmp(kernelData.size());
2138 kernelDesc.SetShape(armnnUtils::Permuted(kernelDesc.GetShape(), {0, 2, 3, 1}));
2139 armnnUtils::Permute(kernelDesc.GetShape(), {0, 2, 3, 1}, kernelData.data(), tmp.data(), sizeof(T));
2140 kernelData = tmp;
2141 }
2142 }
2143
2144 // Manually calculated.
2145 std::vector<T> originalOutputImage = std::vector<T>(
2146 QuantizedVector<T>({
2147 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2148 5, 5, 5, 5, 5, 5, 5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5,
2149 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5, 5, 5, 5, 5, 5, 5,
2150 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5,
2151 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 6, 6, 6, 6, 6, 6, 6,
2152 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2153 1, 3, 0, 0, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0,
2154 2, 4, 0, 0, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0,
2155 2, 4, 0, 0, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0,
2156 2, 4, 0, 0, 0, 0, 0, 3, 5, 0, 0, 0, 0, 0,
2157 3, 5, 0, 0, 0, 0, 0, 3, 5, 0, 0, 0, 0, 0,
2158 3, 5, 0, 0, 0, 0, 0, 3, 5, 0, 0, 0, 0, 0
2159 },
2160 outputTensorInfo.GetQuantizationScale(),
2161 outputTensorInfo.GetQuantizationOffset()));
2162
2163 // Optionally apply bias to output image.
2164 if(biasEnabled)
2165 {
2166 ApplyBias(originalOutputImage,
2167 outputTensorInfo.GetQuantizationScale(),
2168 outputTensorInfo.GetQuantizationOffset(),
2169 biasV,
2170 biasDesc.GetQuantizationScale(),
2171 biasDesc.GetQuantizationOffset(),
2172 outputWidth,
2173 outputHeight);
2174 }
2175
2176 std::vector<T> outputImage = originalOutputImage;
2177 if (layout == armnn::DataLayout::NHWC)
2178 {
2179 armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC,
2180 originalOutputImage.data(), outputImage.data(), sizeof(T));
2181 }
2182
2183 std::vector<T> actualOutput(outputTensorInfo.GetNumElements());
2184
2185 std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
2186 std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc);
2187 std::unique_ptr<armnn::ITensorHandle> biasHandle = nullptr;
2188 std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
2189
2190 armnn::DepthwiseConvolution2dQueueDescriptor data;
2191 armnn::WorkloadInfo info;
2192
2193 AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernelData.data()); // required for ConstantTensor
2194
2195 AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
2196 AddInputToWorkload(data, info, kernelDesc, weightsHandle.get());
2197 AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2198
2199 armnn::ScopedTensorHandle biasTensor(biasDesc);
2200 if (biasEnabled)
2201 {
2202 AllocateAndCopyDataToITensorHandle(&biasTensor, biasV.data());
2203
2204 biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
2205 AllocateAndCopyDataToITensorHandle(biasHandle.get(), biasV.data());
2206 AddInputToWorkload(data, info, biasDesc, biasHandle.get());
2207 }
2208
2209 data.m_Parameters.m_StrideX = 2;
2210 data.m_Parameters.m_StrideY = 1;
2211 data.m_Parameters.m_PadLeft = 0;
2212 data.m_Parameters.m_PadRight = 0;
2213 data.m_Parameters.m_PadTop = 1;
2214 data.m_Parameters.m_PadBottom = 1;
2215 data.m_Parameters.m_BiasEnabled = biasEnabled;
2216 data.m_Parameters.m_DataLayout = layout;
2217
2218 std::unique_ptr<armnn::IWorkload> workload
2219 = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, data, info);
2220
2221 inputHandle->Allocate();
2222 outputHandle->Allocate();
2223
2224 CopyDataToITensorHandle(inputHandle.get(), inputData.data());
2225
2226 ExecuteWorkload(*workload, memoryManager);
2227
2228 CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
2229
2230 return LayerTestResult<T, 4>(actualOutput,
2231 outputImage,
2232 outputHandle->GetShape(),
2233 outputTensorInfo.GetShape());
2234
2235 }
2236
2237 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2238 typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>>
DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const std::vector<T> & originalInput,const std::vector<T> & originalKernel,const std::vector<B> & bias,const std::vector<T> & originalOutputExpected,const armnn::TensorShape & originalInputShape,const armnn::TensorShape & originalKernelShape,const armnn::TensorShape & originalOutputExpectedShape,float qScale,int32_t qOffset,const armnn::DataLayout layout=armnn::DataLayout::NCHW,uint32_t padLeft=0,uint32_t padTop=0,uint32_t padRight=0,uint32_t padBottom=0,uint32_t strideX=1,uint32_t strideY=1,uint32_t dilationX=1,uint32_t dilationY=1)2239 LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(
2240 armnn::IWorkloadFactory& workloadFactory,
2241 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2242 const armnn::ITensorHandleFactory& tensorHandleFactory,
2243 const std::vector<T>& originalInput,
2244 const std::vector<T>& originalKernel,
2245 const std::vector<B>& bias,
2246 const std::vector<T>& originalOutputExpected,
2247 const armnn::TensorShape& originalInputShape,
2248 const armnn::TensorShape& originalKernelShape,
2249 const armnn::TensorShape& originalOutputExpectedShape,
2250 float qScale,
2251 int32_t qOffset,
2252 const armnn::DataLayout layout = armnn::DataLayout::NCHW,
2253 uint32_t padLeft = 0,
2254 uint32_t padTop = 0,
2255 uint32_t padRight = 0,
2256 uint32_t padBottom = 0,
2257 uint32_t strideX = 1,
2258 uint32_t strideY = 1,
2259 uint32_t dilationX = 1,
2260 uint32_t dilationY = 1)
2261 {
2262 unsigned int inputHeight = armnn::numeric_cast<unsigned int>(originalInputShape[2]);
2263 unsigned int inputWidth = armnn::numeric_cast<unsigned int>(originalInputShape[3]);
2264 unsigned int inputChannels = armnn::numeric_cast<unsigned int>(originalInputShape[1]);
2265 unsigned int inputNum = armnn::numeric_cast<unsigned int>(originalInputShape[0]);
2266
2267 unsigned int outputHeight = armnn::numeric_cast<unsigned int>(originalOutputExpectedShape[2]);
2268 unsigned int outputWidth = armnn::numeric_cast<unsigned int>(originalOutputExpectedShape[3]);
2269 unsigned int outputChannels = armnn::numeric_cast<unsigned int>(originalOutputExpectedShape[1]);
2270 unsigned int outputNum = armnn::numeric_cast<unsigned int>(originalOutputExpectedShape[0]);
2271
2272 unsigned int kernelHeight = armnn::numeric_cast<unsigned int>(originalKernelShape[1]);
2273 unsigned int kernelWidth = armnn::numeric_cast<unsigned int>(originalKernelShape[2]);
2274 unsigned int kernelChannels = armnn::numeric_cast<unsigned int>(originalKernelShape[3]);
2275
2276 bool biasEnabled = bias.size() > 0;
2277
2278 // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches).
2279 ARMNN_ASSERT(inputNum == 1);
2280 ARMNN_ASSERT(outputNum == 1);
2281
2282 // If a bias is used, its size must equal the number of output channels.
2283 ARMNN_ASSERT(!biasEnabled || bias.size() == outputChannels);
2284
2285
2286 // Note these tensors will use two (identical) batches.
2287 armnn::TensorInfo inputTensorInfo =
2288 armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType);
2289 armnn::TensorInfo outputTensorInfo =
2290 armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType);
2291
2292 // Kernel must be NCHW layout always, independently of the layout of the input and output for depthwise convolution.
2293 armnn::TensorInfo kernelDesc({1, kernelHeight, kernelWidth, kernelChannels}, ArmnnType);
2294
2295 armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType);
2296
2297 // Set quantization parameters if the requested type is a quantized type.
2298 if(armnn::IsQuantizedType<T>())
2299 {
2300 inputTensorInfo.SetQuantizationScale(qScale);
2301 inputTensorInfo.SetQuantizationOffset(qOffset);
2302 outputTensorInfo.SetQuantizationScale(qScale);
2303 outputTensorInfo.SetQuantizationOffset(qOffset);
2304 kernelDesc.SetQuantizationScale(qScale);
2305 kernelDesc.SetQuantizationOffset(qOffset);
2306 biasDesc.SetQuantizationScale(qScale*qScale);
2307 biasDesc.SetQuantizationOffset(0);
2308 }
2309
2310 std::vector<T> kernelData;
2311 kernelData.assign(originalKernel.data(), originalKernel.data() + kernelHeight*kernelWidth*outputChannels);
2312 if (workloadFactory.GetBackendId() == armnn::BackendId("GpuAcc") ||
2313 workloadFactory.GetBackendId() == armnn::BackendId("CpuAcc"))
2314 {
2315 if (layout == armnn::DataLayout::NCHW)
2316 {
2317 std::vector<T> tmp(kernelData.size());
2318 kernelDesc.SetShape(armnnUtils::Permuted(kernelDesc.GetShape(), {0, 2, 3, 1}));
2319 armnnUtils::Permute(kernelDesc.GetShape(), {0, 2, 3, 1}, kernelData.data(), tmp.data(), sizeof(T));
2320 kernelData = tmp;
2321 }
2322 }
2323
2324 // Construct input data
2325 std::vector<T> input;
2326 input.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth);
2327 std::vector<T> inputData;
2328 inputData.insert(inputData.end(), input.begin(), input.end());
2329 inputData.insert(inputData.end(), input.begin(), input.end());
2330
2331 // at this point if we require it permute the input data
2332 const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
2333 if (layout == armnn::DataLayout::NHWC)
2334 {
2335 std::vector<T> tmp(inputData.size());
2336 armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
2337 inputData = tmp;
2338 }
2339
2340 std::vector<T> output;
2341 output.assign(originalOutputExpected.data(),
2342 originalOutputExpected.data() + outputChannels*outputHeight*outputWidth);
2343
2344 // Apply bias to output data if it is enabled.
2345 if(biasEnabled)
2346 {
2347 std::vector<T> biasV;
2348 biasV.assign(bias.data(), bias.data() + outputChannels);
2349 ApplyBias(output, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
2350 biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
2351 outputWidth, outputHeight);
2352 }
2353
2354 std::vector<T> actualOutput(outputTensorInfo.GetNumElements());
2355
2356 // Construct expected output data
2357 std::vector<T> outputData;
2358 outputData.insert(outputData.end(), output.begin(), output.end());
2359 outputData.insert(outputData.end(), output.begin(), output.end());
2360
2361 // at this point if we require it permute the expected output
2362 if (layout == armnn::DataLayout::NHWC)
2363 {
2364 std::vector<T> tmp(outputData.size());
2365 armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
2366 outputData = tmp;
2367 }
2368
2369 std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
2370 std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelDesc);
2371 std::unique_ptr<armnn::ITensorHandle> biasHandle = nullptr;
2372 std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
2373
2374 armnn::DepthwiseConvolution2dQueueDescriptor data;
2375 armnn::WorkloadInfo info;
2376
2377 AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernelData.data()); // required for ConstantTensor
2378
2379 AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
2380 AddInputToWorkload(data, info, kernelDesc, weightsHandle.get());
2381 AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2382
2383 armnn::ScopedTensorHandle biasTensor(biasDesc);
2384 if (biasEnabled)
2385 {
2386 AllocateAndCopyDataToITensorHandle(&biasTensor, bias.data());
2387
2388 biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
2389 AllocateAndCopyDataToITensorHandle(biasHandle.get(), bias.data());
2390 AddInputToWorkload(data, info, biasDesc, biasHandle.get());
2391 }
2392
2393 data.m_Parameters.m_StrideX = strideX;
2394 data.m_Parameters.m_StrideY = strideY;
2395 data.m_Parameters.m_PadLeft = padLeft;
2396 data.m_Parameters.m_PadRight = padRight;
2397 data.m_Parameters.m_PadTop = padTop;
2398 data.m_Parameters.m_PadBottom = padBottom;
2399 data.m_Parameters.m_BiasEnabled = biasEnabled;
2400 data.m_Parameters.m_DataLayout = layout;
2401 data.m_Parameters.m_DilationX = dilationX;
2402 data.m_Parameters.m_DilationY = dilationY;
2403
2404 std::unique_ptr<armnn::IWorkload> workload
2405 = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, data, info);
2406
2407 inputHandle->Allocate();
2408 outputHandle->Allocate();
2409
2410 CopyDataToITensorHandle(inputHandle.get(), inputData.data());
2411
2412 ExecuteWorkload(*workload, memoryManager);
2413
2414 CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
2415
2416 return LayerTestResult<T, 4>(actualOutput,
2417 outputData,
2418 outputHandle->GetShape(),
2419 outputTensorInfo.GetShape());
2420 }
2421
2422 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2423 typename T = armnn::ResolveType<ArmnnType>>
DepthwiseConvolution2dAsymmetricTestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool biasEnabled,const armnn::DataLayout layout)2424 LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon(
2425 armnn::IWorkloadFactory& workloadFactory,
2426 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2427 const armnn::ITensorHandleFactory& tensorHandleFactory,
2428 float qScale,
2429 int32_t qOffset,
2430 bool biasEnabled,
2431 const armnn::DataLayout layout)
2432 {
2433 // Use a single-batch 2-channel 5x5 image as input.
2434 armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
2435 auto input = QuantizedVector<T>(
2436 {
2437 0, 1, 2, 3, 4,
2438 5, 6, 7, 8, 9,
2439 10, 11, 12, 13, 14,
2440 15, 16, 17, 18, 19,
2441 20, 21, 22, 23, 24,
2442
2443 25, 26, 27, 28, 29,
2444 30, 31, 32, 33, 34,
2445 35, 36, 37, 38, 39,
2446 40, 41, 42, 43, 44,
2447 45, 46, 47, 48, 49
2448 },
2449 inputTensorInfo.GetQuantizationScale(),
2450 inputTensorInfo.GetQuantizationOffset());
2451
2452 // Use a depth multiplier of 1 on a 2-channel 4x4 kernel.
2453 // Weights layout for depthwise: [1,H,W,I*M]
2454 armnn::TensorInfo kernelTensorInfo({ 1, 4, 4, 2 }, ArmnnType);
2455 auto kernel = QuantizedVector<T>({
2456 32, 31, 30, 29,
2457 28, 27, 26, 25,
2458 24, 23, 22, 21,
2459 20, 19, 18, 17,
2460
2461 16, 15, 14, 13,
2462 12, 11, 10, 9,
2463 8, 7, 6, 5,
2464 4, 3, 2, 1
2465 },
2466 kernelTensorInfo.GetQuantizationScale(),
2467 kernelTensorInfo.GetQuantizationOffset());
2468
2469 // Expected output is 1 batch of a 2-channel 5x5 image.
2470 // Calculated using the python tensorflow library with strideX=1, strideY=1.
2471 armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
2472 auto expectedOutput = QuantizedVector<T>(
2473 {
2474 396, 664, 820, 756, 602, 1016, 1608, 1880, 1652, 1268, 1976, 2968, 3240, 2732,
2475 2028, 2628, 3808, 4060, 3312, 2390, 2596, 3700, 3900, 3130, 2226, 2817, 4186,
2476 4330, 3609, 2651, 5414, 7864, 8120, 6626, 4780, 6314, 9144, 9400, 7646, 5500,
2477 6759, 9610, 9850, 7875, 5579, 5935, 8348, 8540, 6757, 4742
2478 },
2479 outputTensorInfo.GetQuantizationScale(),
2480 outputTensorInfo.GetQuantizationOffset());
2481
2482 return DepthwiseConvolution2dAsymmetricTestImpl<ArmnnType, ArmnnBType>(
2483 workloadFactory,
2484 memoryManager,
2485 tensorHandleFactory,
2486 input,
2487 kernel,
2488 GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
2489 expectedOutput,
2490 inputTensorInfo.GetShape(),
2491 kernelTensorInfo.GetShape(),
2492 outputTensorInfo.GetShape(),
2493 qScale,
2494 qOffset,
2495 layout,
2496 1, // Padding left.
2497 1, // Padding top.
2498 2, // Padding right.
2499 2, // Padding bottom.
2500 1, // strideX
2501 1); // strideY
2502 }
2503
2504 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2505 typename T = armnn::ResolveType<ArmnnType>>
DepthwiseConvolution2dNhwcTestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool biasEnabled)2506 LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestCommon(
2507 armnn::IWorkloadFactory& workloadFactory,
2508 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2509 const armnn::ITensorHandleFactory& tensorHandleFactory,
2510 float qScale,
2511 int32_t qOffset,
2512 bool biasEnabled)
2513 {
2514 auto layout = armnn::DataLayout::NHWC;
2515
2516 armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
2517 auto input = QuantizedVector<T>(
2518 {
2519 0, 1, 2, 3, 4,
2520 5, 6, 7, 8, 9,
2521 10, 11, 12, 13, 14,
2522 15, 16, 17, 18, 19,
2523 20, 21, 22, 23, 24,
2524
2525 25, 26, 27, 28, 29,
2526 30, 31, 32, 33, 34,
2527 35, 36, 37, 38, 39,
2528 40, 41, 42, 43, 44,
2529 45, 46, 47, 48, 49
2530 },
2531 inputTensorInfo.GetQuantizationScale(),
2532 inputTensorInfo.GetQuantizationOffset());
2533
2534 armnn::TensorInfo kernelTensorInfo({ 1, 4, 4, 2 }, ArmnnType);
2535 auto kernel = QuantizedVector<T>({
2536 32, 31, 30, 29,
2537 28, 27, 26, 25,
2538 24, 23, 22, 21,
2539 20, 19, 18, 17,
2540
2541 16, 15, 14, 13,
2542 12, 11, 10, 9,
2543 8, 7, 6, 5,
2544 4, 3, 2, 1
2545 },
2546 kernelTensorInfo.GetQuantizationScale(),
2547 kernelTensorInfo.GetQuantizationOffset());
2548
2549 armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
2550 auto expectedOutput = QuantizedVector<T>(
2551 {
2552 396,664,820,756,602,
2553 1016,1608,1880,1652,1268,
2554 1976,2968,3240,2732,2028,
2555 2628,3808,4060,3312,2390,
2556 2596,3700,3900,3130,2226,
2557
2558 2817,4186,4330,3609,2651,
2559 5414,7864,8120,6626,4780,
2560 6314,9144,9400,7646,5500,
2561 6759,9610,9850,7875,5579,
2562 5935,8348,8540,6757,4742
2563 },
2564 outputTensorInfo.GetQuantizationScale(),
2565 outputTensorInfo.GetQuantizationOffset());
2566
2567 return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
2568 workloadFactory,
2569 memoryManager,
2570 tensorHandleFactory,
2571 input,
2572 kernel,
2573 GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
2574 expectedOutput,
2575 inputTensorInfo.GetShape(),
2576 kernelTensorInfo.GetShape(),
2577 outputTensorInfo.GetShape(),
2578 qScale,
2579 qOffset,
2580 layout,
2581 1, // Padding left.
2582 1, // Padding top.
2583 2, // Padding right.
2584 2, // Padding bottom.
2585 1, // strideX
2586 1); // strideY
2587 }
2588
2589 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
2590 typename T = armnn::ResolveType<ArmnnType>>
SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,float qScale,int32_t qOffset,bool biasEnabled)2591 LayerTestResult<T, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon(
2592 armnn::IWorkloadFactory& workloadFactory,
2593 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2594 const armnn::ITensorHandleFactory& tensorHandleFactory,
2595 float qScale,
2596 int32_t qOffset,
2597 bool biasEnabled)
2598 {
2599 auto layout = armnn::DataLayout::NHWC;
2600
2601 armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9 }, ArmnnType);
2602 auto input = QuantizedVector<T>(
2603 {
2604 0, 0, 0, 0, 0, 0, 0, 0, 0,
2605 0, 0, 0, 0, 0, 0, 0, 0, 0,
2606 0, 0, 0, 0, 0, 0, 0, 0, 0,
2607 0, 0, 0, 1, 1, 1, 0, 0, 0,
2608 0, 0, 0, 1, 1, 1, 0, 0, 0,
2609 0, 0, 0, 1, 1, 1, 0, 0, 0,
2610 0, 0, 0, 0, 0, 0, 0, 0, 0,
2611 0, 0, 0, 0, 0, 0, 0, 0, 0,
2612 0, 0, 0, 0, 0, 0, 0, 0, 0
2613 },
2614 inputTensorInfo.GetQuantizationScale(),
2615 inputTensorInfo.GetQuantizationOffset());
2616
2617 armnn::TensorInfo kernelTensorInfo({ 1, 3, 3, 1}, ArmnnType);
2618 auto kernel = QuantizedVector<T>({
2619 1, 2, 3,
2620 4, 5, 6,
2621 7, 8, 9
2622 },
2623 kernelTensorInfo.GetQuantizationScale(),
2624 kernelTensorInfo.GetQuantizationOffset());
2625
2626 uint32_t padLeft = 0;
2627 uint32_t padTop = 0;
2628 uint32_t padRight = 0;
2629 uint32_t padBottom = 0;
2630 uint32_t strideX = 1;
2631 uint32_t strideY = 1;
2632 uint32_t dilationX = 3;
2633 uint32_t dilationY = 3;
2634
2635 // Since the dilation rate is 3 this will reduce the size of the output from 9x9 to 3x3 of all 5s.
2636 armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, ArmnnType);
2637 auto expectedOutput = QuantizedVector<T>(
2638 {
2639 5, 5, 5,
2640 5, 5, 5,
2641 5, 5, 5
2642 },
2643 outputTensorInfo.GetQuantizationScale(),
2644 outputTensorInfo.GetQuantizationOffset());
2645
2646 return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
2647 workloadFactory,
2648 memoryManager,
2649 tensorHandleFactory,
2650 input,
2651 kernel,
2652 GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
2653 expectedOutput,
2654 inputTensorInfo.GetShape(),
2655 kernelTensorInfo.GetShape(),
2656 outputTensorInfo.GetShape(),
2657 qScale,
2658 qOffset,
2659 layout,
2660 padLeft,
2661 padTop,
2662 padRight,
2663 padBottom,
2664 strideX,
2665 strideY,
2666 dilationX,
2667 dilationY);
2668 }
2669
2670 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
DepthwiseConvolution2d3x3DilationTestCommon(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const std::vector<float> & inputNoQuantizedValues,armnn::TensorInfo & inputTensorInfo,const std::vector<float> & kernelNoQuantizedValues,armnn::TensorInfo & kernelTensorInfo,const std::vector<float> & outputExpectedNoQuantizedValues,armnn::TensorInfo & outputTensorInfo,uint32_t dilationX,uint32_t dilationY,armnn::DataLayout layout=armnn::DataLayout::NCHW,bool biasEnabled=false)2671 LayerTestResult<T, 4> DepthwiseConvolution2d3x3DilationTestCommon(
2672 armnn::IWorkloadFactory& workloadFactory,
2673 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2674 const armnn::ITensorHandleFactory& tensorHandleFactory,
2675 const std::vector<float>& inputNoQuantizedValues,
2676 armnn::TensorInfo& inputTensorInfo,
2677 const std::vector<float>& kernelNoQuantizedValues,
2678 armnn::TensorInfo& kernelTensorInfo,
2679 const std::vector<float>& outputExpectedNoQuantizedValues,
2680 armnn::TensorInfo& outputTensorInfo,
2681 uint32_t dilationX,
2682 uint32_t dilationY,
2683 armnn::DataLayout layout = armnn::DataLayout::NCHW,
2684 bool biasEnabled = false)
2685 {
2686 float qScale;
2687 int32_t qOffset;
2688 switch (ArmnnType)
2689 {
2690 case armnn::DataType::QAsymmS8:
2691 case armnn::DataType::QAsymmU8:
2692 {
2693 qScale = 0.1f;
2694 qOffset = 128;
2695 break;
2696 }
2697 case armnn::DataType::QSymmS16:
2698 {
2699 qScale = 0.1f;
2700 qOffset = 0;
2701 break;
2702 }
2703 case armnn::DataType::Float32:
2704 default:
2705 {
2706 qScale = 0.f;
2707 qOffset = 0;
2708 break;
2709 }
2710 }
2711
2712 inputTensorInfo.SetQuantizationScale(qScale);
2713 inputTensorInfo.SetQuantizationOffset(qOffset);
2714 kernelTensorInfo.SetQuantizationScale(qScale);
2715 kernelTensorInfo.SetQuantizationOffset(qOffset);
2716 outputTensorInfo.SetQuantizationScale(qScale);
2717 outputTensorInfo.SetQuantizationOffset(qOffset);
2718
2719 auto input = QuantizedVector<T>(inputNoQuantizedValues,
2720 inputTensorInfo.GetQuantizationScale(),
2721 inputTensorInfo.GetQuantizationOffset());
2722 auto kernel = QuantizedVector<T>(kernelNoQuantizedValues,
2723 kernelTensorInfo.GetQuantizationScale(),
2724 kernelTensorInfo.GetQuantizationOffset());
2725 auto expectedOutput = QuantizedVector<T>(outputExpectedNoQuantizedValues,
2726 outputTensorInfo.GetQuantizationScale(),
2727 outputTensorInfo.GetQuantizationOffset());
2728
2729 uint32_t padLeft = 0;
2730 uint32_t padTop = 0;
2731 uint32_t padRight = 0;
2732 uint32_t padBottom = 0;
2733 uint32_t strideX = 1;
2734 uint32_t strideY = 1;
2735
2736 return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
2737 workloadFactory,
2738 memoryManager,
2739 tensorHandleFactory,
2740 input,
2741 kernel,
2742 GetBias<ArmnnBType>(biasEnabled, qScale * qScale, outputTensorInfo, layout),
2743 expectedOutput,
2744 inputTensorInfo.GetShape(),
2745 kernelTensorInfo.GetShape(),
2746 outputTensorInfo.GetShape(),
2747 qScale,
2748 qOffset,
2749 layout,
2750 padLeft,
2751 padTop,
2752 padRight,
2753 padBottom,
2754 strideX,
2755 strideY,
2756 dilationX,
2757 dilationY);
2758 }
2759
2760 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
DepthwiseConvolution2d3x3Dilation3x3Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)2761 LayerTestResult<T, 4> DepthwiseConvolution2d3x3Dilation3x3Test(
2762 armnn::IWorkloadFactory& workloadFactory,
2763 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2764 const armnn::ITensorHandleFactory& tensorHandleFactory,
2765 bool biasEnabled,
2766 const armnn::DataLayout layout)
2767 {
2768 armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
2769 std::vector<float> inputNoQuantizedValues =
2770 {
2771 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2772 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2773 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2774 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2775 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2776 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2777 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2778 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2779 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2780 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2781 };
2782
2783 armnn::TensorInfo kernelTensorInfo({ 1, 3, 3, 1}, ArmnnType);
2784 std::vector<float> kernelNoQuantizedValues =
2785 {
2786 1, 2, 3,
2787 4, 5, 6,
2788 7, 8, 9
2789 };
2790
2791 // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
2792 // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
2793 armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
2794 std::vector<float> outputExpectedNoQuantizedValues =
2795 {
2796 6., 5., 5., 5.,
2797 6., 5., 5., 5.,
2798 6., 5., 5., 5.,
2799 3., 2., 2., 2.
2800 };
2801
2802 return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2803 workloadFactory,
2804 memoryManager,
2805 tensorHandleFactory,
2806 inputNoQuantizedValues,
2807 inputTensorInfo,
2808 kernelNoQuantizedValues,
2809 kernelTensorInfo,
2810 outputExpectedNoQuantizedValues,
2811 outputTensorInfo,
2812 3,
2813 3,
2814 layout,
2815 biasEnabled);
2816 }
2817
2818 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
DepthwiseConvolution2d2x3x3Dilation3x3Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)2819 LayerTestResult<T, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test(
2820 armnn::IWorkloadFactory& workloadFactory,
2821 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2822 const armnn::ITensorHandleFactory& tensorHandleFactory,
2823 bool biasEnabled,
2824 const armnn::DataLayout layout)
2825 {
2826 armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
2827 std::vector<float> inputNoQuantizedValues =
2828 {
2829 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2830 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2831 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2832 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2833 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2834 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2835 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2836 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2837 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2838 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2839
2840 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2841 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2842 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2843 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2844 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2845 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
2846 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2847 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2848 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2849 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2850 };
2851
2852 armnn::TensorInfo kernelTensorInfo({ 1, 3, 3, 2}, ArmnnType);
2853 std::vector<float> kernelNoQuantizedValues =
2854 {
2855 1, 2, 3,
2856 4, 5, 6,
2857 7, 8, 9,
2858
2859 1, 2, 3,
2860 4, 5, 6,
2861 7, 8, 9
2862 };
2863
2864 // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
2865 // therefore the output will be 2x4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
2866 armnn::TensorInfo outputTensorInfo({ 1, 2, 4, 4}, ArmnnType);
2867 std::vector<float> outputExpectedNoQuantizedValues =
2868 {
2869 2, 9, 9, 9, 2, 9, 9, 9, 2, 9, 9, 9, 5, 3, 3, 3, 3,
2870
2871 1, 1, 1, 3, 1, 1, 1, 3, 1, 1, 1, 6, 4, 4, 4
2872 };
2873
2874 return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2875 workloadFactory,
2876 memoryManager,
2877 tensorHandleFactory,
2878 inputNoQuantizedValues,
2879 inputTensorInfo,
2880 kernelNoQuantizedValues,
2881 kernelTensorInfo,
2882 outputExpectedNoQuantizedValues,
2883 outputTensorInfo,
2884 3,
2885 3,
2886 layout,
2887 biasEnabled);
2888 }
2889
2890 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
DepthwiseConvolution2dMult4Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)2891 LayerTestResult<T, 4> DepthwiseConvolution2dMult4Test(
2892 armnn::IWorkloadFactory& workloadFactory,
2893 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2894 const armnn::ITensorHandleFactory& tensorHandleFactory,
2895 bool biasEnabled,
2896 const armnn::DataLayout layout)
2897 {
2898 armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
2899 std::vector<float> inputNoQuantizedValues =
2900 {
2901 10.0, 10.0, 10.0,
2902 10.0, 10.0, 10.0,
2903 10.0, 10.0, 10.0,
2904
2905 21.0, 22.0, 23.0,
2906 24.0, 25.0, 26.0,
2907 27.0, 28.0, 29.0
2908 };
2909
2910 armnn::TensorInfo kernelTensorInfo({ 1, 2, 2, 8}, ArmnnType);
2911
2912 std::vector<float> kernelNoQuantizedValues =
2913 {
2914 0.25f, 0.25f,
2915 0.25f, 0.25f,
2916
2917 0.25f, 0.25f,
2918 0.25f, 0.25f,
2919
2920 0.0f , 0.0f,
2921 0.0f , 0.1f,
2922
2923 0.0f , 0.0f,
2924 0.0f , 0.1f,
2925
2926 0.2f , 0.0f,
2927 0.0f , 0.0f,
2928
2929 0.2f , 0.0f,
2930 0.0f , 0.0f,
2931
2932 0.0f , 0.3f,
2933 0.0f , 0.0f,
2934
2935 0.0f , 0.3f,
2936 0.0f , 0.0f
2937 };
2938
2939 armnn::TensorInfo outputTensorInfo({ 1, 8, 2, 2}, ArmnnType);
2940 std::vector<float> outputExpectedNoQuantizedValues =
2941 {
2942 4.5f, 4.5f, 4.5f, 4.5f, 5.5f, 5.5f, 5.5f, 5.5f,
2943 2.5f, 2.5f, 2.5f, 2.5f, 3.5f, 3.5f, 3.5f, 3.5f,
2944 10.05f, 10.5f, 11.4f, 11.85f, 12.75f, 13.3f, 14.4f, 14.95f,
2945 5.25f, 5.5f, 6.0f, 6.25f, 7.45f, 7.8f, 8.5f, 8.85f
2946 };
2947
2948
2949 return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
2950 workloadFactory,
2951 memoryManager,
2952 tensorHandleFactory,
2953 inputNoQuantizedValues,
2954 inputTensorInfo,
2955 kernelNoQuantizedValues,
2956 kernelTensorInfo,
2957 outputExpectedNoQuantizedValues,
2958 outputTensorInfo,
2959 1,
2960 1,
2961 layout,
2962 biasEnabled);
2963 }
2964
2965 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
DepthwiseConvolution2dMult2Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)2966 LayerTestResult<T, 4> DepthwiseConvolution2dMult2Test(
2967 armnn::IWorkloadFactory& workloadFactory,
2968 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2969 const armnn::ITensorHandleFactory& tensorHandleFactory,
2970 bool biasEnabled,
2971 const armnn::DataLayout layout)
2972 {
2973 armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
2974 std::vector<float> inputNoQuantizedValues =
2975 {
2976 10.0, 10.0, 10.0,
2977 10.0, 10.0, 10.0,
2978 10.0, 10.0, 10.0,
2979
2980 21.0, 22.0, 23.0,
2981 24.0, 25.0, 26.0,
2982 27.0, 28.0, 29.0
2983 };
2984
2985 armnn::TensorInfo kernelTensorInfo({ 1, 2, 2, 4}, ArmnnType);
2986
2987 std::vector<float> kernelNoQuantizedValues =
2988 {
2989 0.25f, 0.25f,
2990 0.25f, 0.25f,
2991
2992 0.2f , 0.0f,
2993 0.0f , 0.0f,
2994
2995 0.0f , 0.0f,
2996 0.0f , 0.1f,
2997
2998 0.0f , 0.3f,
2999 0.0f , 0.0f
3000
3001 };
3002
3003 armnn::TensorInfo outputTensorInfo({ 1, 4, 2, 2}, ArmnnType);
3004 std::vector<float> outputExpectedNoQuantizedValues =
3005 {
3006 4.5f, 4.5f, 4.5f, 4.5f,
3007 5.5f, 5.5f, 5.5f, 5.5f,
3008 5.25f, 5.5f, 6.0f, 6.25f,
3009 7.65f, 8.0f, 8.7f, 9.05f
3010 };
3011
3012
3013 return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
3014 workloadFactory,
3015 memoryManager,
3016 tensorHandleFactory,
3017 inputNoQuantizedValues,
3018 inputTensorInfo,
3019 kernelNoQuantizedValues,
3020 kernelTensorInfo,
3021 outputExpectedNoQuantizedValues,
3022 outputTensorInfo,
3023 1,
3024 1,
3025 layout,
3026 biasEnabled);
3027 }
3028
3029 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
CompareDepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,armnn::IWorkloadFactory & refWorkloadFactory,const armnn::ITensorHandleFactory & tensorHandleFactory,const armnn::ITensorHandleFactory & refTensorHandleFactory,const armnnUtils::DataLayoutIndexed & layout)3030 LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(
3031 armnn::IWorkloadFactory& workloadFactory,
3032 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3033 armnn::IWorkloadFactory& refWorkloadFactory,
3034 const armnn::ITensorHandleFactory& tensorHandleFactory,
3035 const armnn::ITensorHandleFactory& refTensorHandleFactory,
3036 const armnnUtils::DataLayoutIndexed& layout)
3037 {
3038 unsigned int inputHeight = 8;
3039 unsigned int inputWidth = 16;
3040 unsigned int inputChannels = 3;
3041 unsigned int inputNum = 5;
3042
3043 unsigned int kernelHeight = 3;
3044 unsigned int kernelWidth = 3;
3045 unsigned int channelMultiplier = 1;
3046
3047 unsigned int strideX = 2;
3048 unsigned int strideY = 3;
3049 unsigned int padX = 1;
3050 unsigned int padY = 1;
3051
3052 unsigned int outputNum = inputNum;
3053 unsigned int outputChannels = inputChannels * channelMultiplier;
3054 unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY;
3055 unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX;
3056
3057 armnn::TensorInfo inputTensorInfo;
3058 armnn::TensorInfo outputTensorInfo;
3059 armnn::TensorInfo kernelDesc;
3060 armnn::TensorInfo biasDesc;
3061
3062 std::vector<unsigned int> inputShape;
3063 std::vector<unsigned int> outputShape;
3064 std::vector<unsigned int> kernelShape{ 1, kernelHeight, kernelWidth, outputChannels };
3065 std::vector<unsigned int> biasShape{ outputChannels };
3066 switch (layout.GetDataLayout())
3067 {
3068 case armnn::DataLayout::NCHW:
3069 inputShape = { inputNum, inputChannels, inputHeight, inputWidth };
3070 outputShape = { outputNum, outputChannels, outputHeight, outputWidth };
3071 break;
3072 case armnn::DataLayout ::NHWC:
3073 inputShape = { inputNum, inputHeight, inputWidth, inputChannels };
3074 outputShape = { outputNum, outputHeight, outputWidth, outputChannels };
3075 break;
3076 default:
3077 throw armnn::InvalidArgumentException("unknown data layout ["
3078 + std::to_string(static_cast<int>(layout.GetDataLayout())) + "]");
3079 }
3080
3081 float inputsQScale = armnn::IsQuantizedType<T>() ? 1.0f : 0;
3082 float outputQScale = armnn::IsQuantizedType<T>() ? 2.0f : 0;
3083 int32_t qOffset = 0;
3084
3085 inputTensorInfo = armnn::TensorInfo(4, inputShape.data(), ArmnnType, inputsQScale, qOffset);
3086 outputTensorInfo = armnn::TensorInfo(4, outputShape.data(), ArmnnType, outputQScale, qOffset);
3087 kernelDesc = armnn::TensorInfo(4, kernelShape.data(), ArmnnType, inputsQScale, qOffset);
3088 biasDesc = armnn::TensorInfo(1, biasShape.data(), armnn::GetBiasDataType(ArmnnType), inputsQScale, qOffset);
3089
3090 auto input = MakeRandomTensor<T>(inputTensorInfo, 124908, 0.0f, 255.0f);
3091 auto kernel = MakeRandomTensor<T>(kernelDesc, 891234, 0.0f, 255.0f);
3092 auto bias = MakeRandomTensor<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasDesc, 1028, 0.0f, 255.0f);
3093
3094 armnn::TensorInfo aclKernelDescriptor = kernelDesc;
3095 std::vector<T> aclKernelData;
3096 aclKernelData.assign(kernel.data(), kernel.data() + kernelHeight * kernelWidth * outputChannels);
3097 if (workloadFactory.GetBackendId() == armnn::BackendId("GpuAcc") ||
3098 workloadFactory.GetBackendId() == armnn::BackendId("CpuAcc"))
3099 {
3100 if (layout == armnn::DataLayout::NCHW)
3101 {
3102 std::vector<T> tmp(kernel.size());
3103 aclKernelDescriptor.SetShape(armnnUtils::Permuted(kernelDesc.GetShape(), {0, 2, 3, 1}));
3104 armnnUtils::Permute(kernelDesc.GetShape(), {0, 2, 3, 1}, kernel.data(), tmp.data(), sizeof(T));
3105 aclKernelData = tmp;
3106 }
3107 }
3108
3109 std::vector<T> actualOutput(outputTensorInfo.GetNumElements());
3110 std::vector<T> expectedOutput(outputTensorInfo.GetNumElements());
3111
3112 std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
3113 std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(aclKernelDescriptor);
3114 std::unique_ptr<armnn::ITensorHandle> biasHandle = tensorHandleFactory.CreateTensorHandle(biasDesc);
3115 std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
3116
3117 armnn::DepthwiseConvolution2dQueueDescriptor data;
3118 armnn::WorkloadInfo info;
3119
3120 AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
3121 AddInputToWorkload(data, info, aclKernelDescriptor, weightsHandle.get());
3122 AddInputToWorkload(data, info, biasDesc, biasHandle.get());
3123 AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
3124
3125 AllocateAndCopyDataToITensorHandle(weightsHandle.get(), aclKernelData.data());
3126 AllocateAndCopyDataToITensorHandle(biasHandle.get(), bias.data());
3127
3128 data.m_Parameters.m_StrideX = strideX;
3129 data.m_Parameters.m_StrideY = strideY;
3130 data.m_Parameters.m_PadLeft = padX;
3131 data.m_Parameters.m_PadRight = padX;
3132 data.m_Parameters.m_PadTop = padY;
3133 data.m_Parameters.m_PadBottom = padY;
3134 data.m_Parameters.m_BiasEnabled = true;
3135 data.m_Parameters.m_DataLayout = layout.GetDataLayout();
3136
3137 std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refTensorHandleFactory.CreateTensorHandle(outputTensorInfo);
3138 std::unique_ptr<armnn::ITensorHandle> weightsHandleRef = refTensorHandleFactory.CreateTensorHandle(kernelDesc);
3139 std::unique_ptr<armnn::ITensorHandle> biasHandleRef = refTensorHandleFactory.CreateTensorHandle(biasDesc);
3140 std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refTensorHandleFactory.CreateTensorHandle(inputTensorInfo);
3141
3142 armnn::DepthwiseConvolution2dQueueDescriptor refData = data;
3143 armnn::WorkloadInfo refInfo = info;
3144 SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
3145 SetWorkloadInput(refData, refInfo, 1, kernelDesc, weightsHandleRef.get());
3146 SetWorkloadInput(refData, refInfo, 2, biasDesc, biasHandleRef.get());
3147 SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
3148
3149 std::unique_ptr<armnn::IWorkload> workload
3150 = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, data, info);
3151 std::unique_ptr<armnn::IWorkload> workloadRef
3152 = refWorkloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, refData, refInfo);
3153
3154 outputHandleRef->Allocate();
3155 weightsHandleRef->Allocate();
3156 biasHandleRef->Allocate();
3157 inputHandleRef->Allocate();
3158
3159 inputHandle->Allocate();
3160 outputHandle->Allocate();
3161
3162 CopyDataToITensorHandle(inputHandle.get(), input.data());
3163 CopyDataToITensorHandle(inputHandleRef.get(), input.data());
3164 CopyDataToITensorHandle(weightsHandleRef.get(), kernel.data());
3165 CopyDataToITensorHandle(biasHandleRef.get(), bias.data());
3166
3167 ExecuteWorkload(*workload, memoryManager);
3168
3169 workloadRef->PostAllocationConfigure();
3170 workloadRef->Execute();
3171
3172 CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
3173 CopyDataFromITensorHandle(expectedOutput.data(), outputHandleRef.get());
3174
3175 return LayerTestResult<T, 4>(actualOutput,
3176 expectedOutput,
3177 outputHandle->GetShape(),
3178 outputTensorInfo.GetShape());
3179 }
3180
3181 //
3182 // Explicit template specializations
3183 //
3184 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3185 Convolution2d3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3186 armnn::IWorkloadFactory&,
3187 const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3188 const armnn::ITensorHandleFactory&,
3189 bool,
3190 armnn::DataLayout);
3191
3192 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3193 Convolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3194 armnn::IWorkloadFactory&,
3195 const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3196 const armnn::ITensorHandleFactory&,
3197 bool,
3198 armnn::DataLayout);
3199
3200 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4>
3201 Convolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmS8, armnn::DataType::Signed32>(
3202 armnn::IWorkloadFactory&,
3203 const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3204 const armnn::ITensorHandleFactory&,
3205 bool,
3206 armnn::DataLayout);
3207
3208 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
3209 Convolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3210 armnn::IWorkloadFactory&,
3211 const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3212 const armnn::ITensorHandleFactory&,
3213 bool,
3214 armnn::DataLayout);
3215
3216 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
3217 Convolution2d3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3218 armnn::IWorkloadFactory&,
3219 const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3220 const armnn::ITensorHandleFactory&,
3221 bool,
3222 armnn::DataLayout);
3223
3224 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3225 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3226 armnn::IWorkloadFactory&,
3227 const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3228 const armnn::ITensorHandleFactory&,
3229 bool,
3230 armnn::DataLayout);
3231
3232 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3233 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3234 armnn::IWorkloadFactory&,
3235 const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3236 const armnn::ITensorHandleFactory&,
3237 bool,
3238 armnn::DataLayout);
3239
3240 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4>
3241 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmS8, armnn::DataType::Signed32>(
3242 armnn::IWorkloadFactory&,
3243 const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3244 const armnn::ITensorHandleFactory&,
3245 bool,
3246 armnn::DataLayout);
3247
3248 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
3249 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3250 armnn::IWorkloadFactory&,
3251 const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3252 const armnn::ITensorHandleFactory&,
3253 bool,
3254 armnn::DataLayout);
3255
3256 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
3257 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3258 armnn::IWorkloadFactory&,
3259 const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3260 const armnn::ITensorHandleFactory&,
3261 bool,
3262 armnn::DataLayout);
3263
3264 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3265 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3266 armnn::IWorkloadFactory &workloadFactory,
3267 const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3268 const armnn::ITensorHandleFactory& tensorHandleFactory,
3269 bool biasEnabled,
3270 const armnn::DataLayout layout);
3271
3272 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3273 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3274 armnn::IWorkloadFactory &workloadFactory,
3275 const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3276 const armnn::ITensorHandleFactory& tensorHandleFactory,
3277 bool biasEnabled,
3278 const armnn::DataLayout layout);
3279
3280 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4>
3281 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QAsymmS8, armnn::DataType::Signed32>(
3282 armnn::IWorkloadFactory &workloadFactory,
3283 const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3284 const armnn::ITensorHandleFactory& tensorHandleFactory,
3285 bool biasEnabled,
3286 const armnn::DataLayout layout);
3287
3288 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
3289 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3290 armnn::IWorkloadFactory &workloadFactory,
3291 const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3292 const armnn::ITensorHandleFactory& tensorHandleFactory,
3293 bool biasEnabled,
3294 const armnn::DataLayout layout);
3295
3296 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
3297 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3298 armnn::IWorkloadFactory &workloadFactory,
3299 const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3300 const armnn::ITensorHandleFactory& tensorHandleFactory,
3301 bool biasEnabled,
3302 const armnn::DataLayout layout);
3303
3304 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3305 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3306 armnn::IWorkloadFactory&,
3307 const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3308 const armnn::ITensorHandleFactory&,
3309 bool,
3310 armnn::DataLayout);
3311
3312 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3313 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3314 armnn::IWorkloadFactory&,
3315 const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3316 const armnn::ITensorHandleFactory&,
3317 bool,
3318 armnn::DataLayout);
3319
3320 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4>
3321 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmS8, armnn::DataType::Signed32>(
3322 armnn::IWorkloadFactory&,
3323 const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3324 const armnn::ITensorHandleFactory&,
3325 bool,
3326 armnn::DataLayout);
3327
3328 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
3329 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3330 armnn::IWorkloadFactory&,
3331 const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3332 const armnn::ITensorHandleFactory&,
3333 bool,
3334 armnn::DataLayout);
3335
3336 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
3337 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3338 armnn::IWorkloadFactory&,
3339 const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3340 const armnn::ITensorHandleFactory&,
3341 bool,
3342 armnn::DataLayout);
3343
3344 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3345 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3346 armnn::IWorkloadFactory&,
3347 const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3348 const armnn::ITensorHandleFactory&,
3349 bool,
3350 armnn::DataLayout);
3351
3352 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3353 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3354 armnn::IWorkloadFactory&,
3355 const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3356 const armnn::ITensorHandleFactory&,
3357 bool,
3358 armnn::DataLayout);
3359
3360 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmS8>, 4>
3361 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmS8, armnn::DataType::Signed32>(
3362 armnn::IWorkloadFactory&,
3363 const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3364 const armnn::ITensorHandleFactory&,
3365 bool,
3366 armnn::DataLayout);
3367
3368 template LayerTestResult<armnn::ResolveType<armnn::DataType::QAsymmU8>, 4>
3369 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3370 armnn::IWorkloadFactory&,
3371 const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3372 const armnn::ITensorHandleFactory&,
3373 bool,
3374 armnn::DataLayout);
3375
3376 template LayerTestResult<armnn::ResolveType<armnn::DataType::QSymmS16>, 4>
3377 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3378 armnn::IWorkloadFactory&,
3379 const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
3380 const armnn::ITensorHandleFactory&,
3381 bool,
3382 armnn::DataLayout);
3383
3384 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3385 DepthwiseConvolution2dMult4Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3386 armnn::IWorkloadFactory &workloadFactory,
3387 const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3388 const armnn::ITensorHandleFactory& tensorHandleFactory,
3389 bool biasEnabled,
3390 const armnn::DataLayout layout);
3391
3392 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3393 DepthwiseConvolution2dMult4Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3394 armnn::IWorkloadFactory &workloadFactory,
3395 const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3396 const armnn::ITensorHandleFactory& tensorHandleFactory,
3397 bool biasEnabled,
3398 const armnn::DataLayout layout);
3399
3400 template LayerTestResult<armnn::ResolveType<armnn::DataType::BFloat16>, 4>
3401 DepthwiseConvolution2dMult2Test<armnn::DataType::BFloat16, armnn::DataType::BFloat16>(
3402 armnn::IWorkloadFactory &workloadFactory,
3403 const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3404 const armnn::ITensorHandleFactory& tensorHandleFactory,
3405 bool biasEnabled,
3406 const armnn::DataLayout layout);
3407
3408 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
3409 DepthwiseConvolution2dMult2Test<armnn::DataType::Float32, armnn::DataType::Float32>(
3410 armnn::IWorkloadFactory &workloadFactory,
3411 const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
3412 const armnn::ITensorHandleFactory& tensorHandleFactory,
3413 bool biasEnabled,
3414 const armnn::DataLayout layout);
3415
3416 //
3417 // Implementation functions
3418 //
3419
SimpleConvolution2d3x5Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3420 LayerTestResult<float, 4> SimpleConvolution2d3x5Test(
3421 armnn::IWorkloadFactory& workloadFactory,
3422 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3423 const armnn::ITensorHandleFactory& tensorHandleFactory,
3424 bool biasEnabled,
3425 const armnn::DataLayout layout)
3426 {
3427 return SimpleConvolution2d3x5TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3428 workloadFactory, memoryManager, tensorHandleFactory, 0.f, 0, biasEnabled, layout);
3429 }
3430
SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3431 LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(
3432 armnn::IWorkloadFactory& workloadFactory,
3433 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3434 const armnn::ITensorHandleFactory& tensorHandleFactory,
3435 bool biasEnabled,
3436 const armnn::DataLayout layout)
3437 {
3438 return SimpleConvolution2d3x5TestCommon<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3439 workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3440 }
3441
SimpleConvolution2d3x3Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3442 LayerTestResult<float, 4> SimpleConvolution2d3x3Test(
3443 armnn::IWorkloadFactory& workloadFactory,
3444 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3445 const armnn::ITensorHandleFactory& tensorHandleFactory,
3446 bool biasEnabled,
3447 const armnn::DataLayout layout)
3448 {
3449 return SimpleConvolution2d3x3TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3450 workloadFactory, memoryManager, tensorHandleFactory, 0.f, 0, biasEnabled, layout);
3451 }
3452
SimpleConvolution2d3x3NhwcTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled)3453 LayerTestResult<float, 4> SimpleConvolution2d3x3NhwcTest(
3454 armnn::IWorkloadFactory& workloadFactory,
3455 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3456 const armnn::ITensorHandleFactory& tensorHandleFactory,
3457 bool biasEnabled)
3458 {
3459 return SimpleConvolution2d3x3NhwcTestCommon<armnn::DataType::Float32>(
3460 workloadFactory,
3461 memoryManager,
3462 tensorHandleFactory,
3463 0.f,
3464 0,
3465 biasEnabled,
3466 armnn::DataLayout::NHWC);
3467 }
3468
SimpleConvolution2d3x3Stride2x2Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3469 LayerTestResult<float, 4> SimpleConvolution2d3x3Stride2x2Test(
3470 armnn::IWorkloadFactory& workloadFactory,
3471 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3472 const armnn::ITensorHandleFactory& tensorHandleFactory,
3473 bool biasEnabled,
3474 const armnn::DataLayout layout)
3475 {
3476 return SimpleConvolution2d3x3Stride2x2TestCommon<armnn::DataType::Float32>(
3477 workloadFactory,
3478 memoryManager,
3479 tensorHandleFactory,
3480 0.f,
3481 0,
3482 biasEnabled,
3483 layout);
3484 }
3485
SimpleConvolution2d3x3Uint8Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3486 LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(
3487 armnn::IWorkloadFactory& workloadFactory,
3488 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3489 const armnn::ITensorHandleFactory& tensorHandleFactory,
3490 bool biasEnabled,
3491 const armnn::DataLayout layout)
3492 {
3493 return SimpleConvolution2d3x3TestCommon<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3494 workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3495 }
3496
SimpleConvolution2d3x5QSymm16Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3497 LayerTestResult<int16_t, 4> SimpleConvolution2d3x5QSymm16Test(
3498 armnn::IWorkloadFactory& workloadFactory,
3499 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3500 const armnn::ITensorHandleFactory& tensorHandleFactory,
3501 bool biasEnabled,
3502 const armnn::DataLayout layout)
3503 {
3504 return SimpleConvolution2d3x5TestCommon<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3505 workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3506 }
3507
SimpleConvolution2d3x3QSymm16Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3508 LayerTestResult<int16_t, 4> SimpleConvolution2d3x3QSymm16Test(
3509 armnn::IWorkloadFactory& workloadFactory,
3510 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3511 const armnn::ITensorHandleFactory& tensorHandleFactory,
3512 bool biasEnabled,
3513 const armnn::DataLayout layout)
3514 {
3515 return SimpleConvolution2d3x3TestCommon<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3516 workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3517 }
3518
Convolution2dAsymmetricPaddingTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,armnn::DataLayout layout)3519 LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(
3520 armnn::IWorkloadFactory& workloadFactory,
3521 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3522 const armnn::ITensorHandleFactory& tensorHandleFactory,
3523 armnn::DataLayout layout)
3524 {
3525 return SimpleConvolution2dAsymmetricPaddingTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3526 workloadFactory, memoryManager, tensorHandleFactory, layout, 0.0f, 0);
3527 }
3528
Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,armnn::DataLayout layout)3529 LayerTestResult<float, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(
3530 armnn::IWorkloadFactory& workloadFactory,
3531 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3532 const armnn::ITensorHandleFactory& tensorHandleFactory,
3533 armnn::DataLayout layout)
3534 {
3535 return Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon
3536 <armnn::DataType::Float32, armnn::DataType::Float32>(
3537 workloadFactory, memoryManager, tensorHandleFactory, layout, 0.0f, 0);
3538 }
3539
Convolution1dTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled)3540 LayerTestResult<float, 4> Convolution1dTest(
3541 armnn::IWorkloadFactory& workloadFactory,
3542 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3543 const armnn::ITensorHandleFactory& tensorHandleFactory,
3544 bool biasEnabled)
3545 {
3546 return Convolution1dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3547 workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, biasEnabled);
3548 }
3549
Convolution1dUint8Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled)3550 LayerTestResult<uint8_t, 4> Convolution1dUint8Test(
3551 armnn::IWorkloadFactory& workloadFactory,
3552 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3553 const armnn::ITensorHandleFactory& tensorHandleFactory,
3554 bool biasEnabled)
3555 {
3556 return Convolution1dTestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3557 workloadFactory, memoryManager, tensorHandleFactory, 0.1f, 128, biasEnabled);
3558 }
3559
Convolution2dPerAxisQuantTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const armnn::DataLayout layout)3560 LayerTestResult<uint8_t, 4> Convolution2dPerAxisQuantTest(
3561 armnn::IWorkloadFactory& workloadFactory,
3562 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3563 const armnn::ITensorHandleFactory& tensorHandleFactory,
3564 const armnn::DataLayout layout)
3565 {
3566 using namespace armnn;
3567
3568 const DataType inputType = DataType::QAsymmU8;
3569 const DataType kernelType = DataType::QSymmS8;
3570 const DataType biasType = DataType::Signed32;
3571
3572 TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128);
3573 TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128);
3574
3575 const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f };
3576 constexpr unsigned int quantDimension = 0;
3577
3578 TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension);
3579
3580 const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f };
3581 TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension);
3582
3583 std::vector<uint8_t> inputData =
3584 {
3585 138, 108, 138, 108, 138, 108
3586 };
3587
3588 std::vector<int8_t> kernelData =
3589 {
3590 1, 2, 1, 2, 1, 2
3591 };
3592
3593 std::vector<int32_t> biasData =
3594 {
3595 4, 4, 4
3596 };
3597
3598 std::vector<uint8_t> expectedOutputData =
3599 {
3600 121, 118, 115, 121, 118, 115, 121, 118, 115
3601 };
3602
3603 if (layout == DataLayout::NCHW)
3604 {
3605 PermuteTensorNhwcToNchw(inputInfo, inputData);
3606 PermuteTensorNhwcToNchw(kernelInfo, kernelData);
3607 PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
3608 }
3609
3610 std::vector<uint8_t> actualOutput(outputInfo.GetNumElements());
3611
3612 Convolution2dDescriptor descriptor;
3613 descriptor.m_StrideX = 1;
3614 descriptor.m_StrideY = 1;
3615 descriptor.m_PadLeft = 0;
3616 descriptor.m_PadRight = 0;
3617 descriptor.m_PadTop = 0;
3618 descriptor.m_PadBottom = 0;
3619 descriptor.m_BiasEnabled = true;
3620 descriptor.m_DataLayout = layout;
3621
3622 std::unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
3623 std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
3624 std::unique_ptr<armnn::ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelInfo);
3625 std::unique_ptr<armnn::ITensorHandle> biasHandle = nullptr;
3626
3627 WorkloadInfo workloadInfo;
3628 // ScopedTensorHandle weightTensor(kernelInfo);
3629 // ScopedTensorHandle biasTensor(biasInfo);
3630 //
3631 // AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
3632 // AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
3633
3634 Convolution2dQueueDescriptor queueDescriptor;
3635 queueDescriptor.m_Parameters = descriptor;
3636
3637 AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
3638 AddInputToWorkload(queueDescriptor, workloadInfo, kernelInfo, weightsHandle.get());
3639
3640 if (descriptor.m_BiasEnabled)
3641 {
3642 biasHandle = tensorHandleFactory.CreateTensorHandle(biasInfo);
3643 AddInputToWorkload(queueDescriptor, workloadInfo, biasInfo, biasHandle.get());
3644 }
3645
3646 AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
3647
3648 std::unique_ptr<IWorkload> workload= workloadFactory.CreateWorkload(armnn::LayerType::Convolution2d,
3649 queueDescriptor,
3650 workloadInfo);
3651 inputHandle->Allocate();
3652 outputHandle->Allocate();
3653 weightsHandle->Allocate();
3654
3655 if (descriptor.m_BiasEnabled)
3656 {
3657 biasHandle->Allocate();
3658 CopyDataToITensorHandle(biasHandle.get(), biasData.data());
3659 }
3660 CopyDataToITensorHandle(inputHandle.get(), inputData.data());
3661 CopyDataToITensorHandle(weightsHandle.get(), kernelData.data());
3662
3663
3664 ExecuteWorkload(*workload, memoryManager);
3665
3666 CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
3667
3668 return LayerTestResult<uint8_t, 4>(actualOutput,
3669 expectedOutputData,
3670 outputHandle->GetShape(),
3671 outputInfo.GetShape());
3672 }
3673
CompareConvolution2dTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,armnn::IWorkloadFactory & refWorkloadFactory,const armnn::ITensorHandleFactory & tensorHandleFactory,const armnn::ITensorHandleFactory & refTensorHandleFactory)3674 LayerTestResult<float,4> CompareConvolution2dTest(
3675 armnn::IWorkloadFactory& workloadFactory,
3676 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3677 armnn::IWorkloadFactory& refWorkloadFactory,
3678 const armnn::ITensorHandleFactory& tensorHandleFactory,
3679 const armnn::ITensorHandleFactory& refTensorHandleFactory)
3680 {
3681 return CompareConvolution2dTestImpl<armnn::DataType::Float32>(
3682 workloadFactory, memoryManager, refWorkloadFactory, tensorHandleFactory, refTensorHandleFactory);
3683 }
3684
DepthwiseConvolution2dTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3685 LayerTestResult<float, 4> DepthwiseConvolution2dTest(
3686 armnn::IWorkloadFactory& workloadFactory,
3687 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3688 const armnn::ITensorHandleFactory& tensorHandleFactory,
3689 bool biasEnabled,
3690 const armnn::DataLayout layout)
3691 {
3692 return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3693 workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, biasEnabled, layout);
3694 }
3695
DepthwiseConvolution2dDepthNhwcTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled)3696 LayerTestResult<float, 4> DepthwiseConvolution2dDepthNhwcTest(
3697 armnn::IWorkloadFactory& workloadFactory,
3698 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3699 const armnn::ITensorHandleFactory& tensorHandleFactory,
3700 bool biasEnabled)
3701 {
3702 return DepthwiseConvolution2dNhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3703 workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, biasEnabled);
3704 }
3705
DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3706 LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(
3707 armnn::IWorkloadFactory& workloadFactory,
3708 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3709 const armnn::ITensorHandleFactory& tensorHandleFactory,
3710 bool biasEnabled,
3711 const armnn::DataLayout layout)
3712 {
3713 return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3714 workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, biasEnabled, layout);
3715 }
3716
DepthwiseConvolution2dDepthMul64Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)3717 LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul64Test(
3718 armnn::IWorkloadFactory& workloadFactory,
3719 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3720 const armnn::ITensorHandleFactory& tensorHandleFactory)
3721 {
3722 armnn::TensorInfo inputTensorInfo({ 1, 1, 2, 2 }, armnn::DataType::Float32);
3723 std::vector<float> input = { 1.f, 2.f, 3.f, 4.f };
3724
3725 std::vector<float> kernelData;
3726 std::vector<float> singleDepthKernel{ 1.f, -1.f, -1.f, 1.f };
3727 for (unsigned int i = 0; i < 64; ++i)
3728 {
3729 kernelData.insert(kernelData.end(), singleDepthKernel.begin(), singleDepthKernel.end());
3730 }
3731 armnn::TensorInfo kernelTensorInfo({ 64, 1, 2, 2 }, armnn::DataType::Float32);
3732
3733 // permute from [O,1,H,W] --> [1,H,W,O]
3734 armnn::PermutationVector permutationVector {3,0,1,2};
3735 kernelTensorInfo = armnnUtils::Permuted(kernelTensorInfo, permutationVector);
3736 std::vector<float> kernelPermuted(kernelTensorInfo.GetNumElements());
3737 armnnUtils::Permute(kernelTensorInfo.GetShape(), permutationVector,
3738 kernelData.data(), kernelPermuted.data(),
3739 GetDataTypeSize(kernelTensorInfo.GetDataType()));
3740
3741 std::vector<float> expectedOutputData(64, 0.f);
3742 armnn::TensorInfo outputTensorInfo({ 1, 64, 1, 1 }, armnn::DataType::Float32);
3743
3744 return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
3745 workloadFactory,
3746 memoryManager,
3747 tensorHandleFactory,
3748 input,
3749 kernelPermuted,
3750 std::vector<float>(),
3751 expectedOutputData,
3752 inputTensorInfo.GetShape(),
3753 kernelTensorInfo.GetShape(),
3754 outputTensorInfo.GetShape(),
3755 0.f,
3756 0,
3757 armnn::DataLayout::NCHW);
3758 }
3759
DepthwiseConvolution2dAsymmetricTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3760 LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(
3761 armnn::IWorkloadFactory& workloadFactory,
3762 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3763 const armnn::ITensorHandleFactory& tensorHandleFactory,
3764 bool biasEnabled,
3765 const armnn::DataLayout layout)
3766 {
3767 return DepthwiseConvolution2dAsymmetricTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3768 workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, biasEnabled, layout);
3769 }
3770
DepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3771 LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(
3772 armnn::IWorkloadFactory& workloadFactory,
3773 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3774 const armnn::ITensorHandleFactory& tensorHandleFactory,
3775 bool biasEnabled,
3776 const armnn::DataLayout layout)
3777 {
3778 return DepthwiseConvolution2dTestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3779 workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3780 }
3781
DepthwiseConvolution2dDepthMul1Uint8Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3782 LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(
3783 armnn::IWorkloadFactory& workloadFactory,
3784 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3785 const armnn::ITensorHandleFactory& tensorHandleFactory,
3786 bool biasEnabled,
3787 const armnn::DataLayout layout)
3788 {
3789 return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QAsymmU8, armnn::DataType::Signed32>(
3790 workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3791 }
3792
SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)3793 LayerTestResult<float, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest(
3794 armnn::IWorkloadFactory& workloadFactory,
3795 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3796 const armnn::ITensorHandleFactory& tensorHandleFactory)
3797 {
3798 return SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
3799 workloadFactory,
3800 memoryManager,
3801 tensorHandleFactory,
3802 0.f,
3803 0,
3804 false);
3805 }
3806
DepthwiseConvolution2dInt16Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3807 LayerTestResult<int16_t, 4> DepthwiseConvolution2dInt16Test(
3808 armnn::IWorkloadFactory& workloadFactory,
3809 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3810 const armnn::ITensorHandleFactory& tensorHandleFactory,
3811 bool biasEnabled,
3812 const armnn::DataLayout layout)
3813 {
3814 return DepthwiseConvolution2dTestImpl<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3815 workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3816 }
3817
DepthwiseConvolution2dDepthMul1Int16Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,bool biasEnabled,const armnn::DataLayout layout)3818 LayerTestResult<int16_t, 4> DepthwiseConvolution2dDepthMul1Int16Test(
3819 armnn::IWorkloadFactory& workloadFactory,
3820 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3821 const armnn::ITensorHandleFactory& tensorHandleFactory,
3822 bool biasEnabled,
3823 const armnn::DataLayout layout)
3824 {
3825 return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QSymmS16, armnn::DataType::Signed32>(
3826 workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout);
3827 }
3828
DepthwiseConvolution2dPerAxisQuantTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const armnn::DataLayout layout)3829 LayerTestResult<uint8_t, 4> DepthwiseConvolution2dPerAxisQuantTest(
3830 armnn::IWorkloadFactory& workloadFactory,
3831 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3832 const armnn::ITensorHandleFactory& tensorHandleFactory,
3833 const armnn::DataLayout layout)
3834 {
3835 using namespace armnn;
3836
3837 const DataType inputType = DataType::QAsymmU8;
3838 const DataType kernelType = DataType::QSymmS8;
3839 const DataType biasType = DataType::Signed32;
3840
3841 TensorInfo inputInfo ({ 1, 3, 3, 2 }, inputType, 0.5f, 128); // N H W C
3842 TensorInfo outputInfo({ 1, 2, 2, 4 }, inputType, 1.0f, 128); // N H W C
3843
3844 const std::vector<float> quantScales{ 1.0f, 0.5f, 1.0f, 0.5f };
3845 const unsigned int quantDimension = 3;
3846 TensorInfo kernelInfo({ 1, 2, 2, 4 }, kernelType, quantScales, quantDimension); // [1, H, W, I*M]
3847
3848 const std::vector<float> biasQuantScales{ 0.5f, 0.25f, 0.5f, 0.25f };
3849 constexpr unsigned int biasQuantDimension = 0;
3850 TensorInfo biasInfo({ 4 }, biasType, biasQuantScales, biasQuantDimension);
3851
3852 std::vector<uint8_t> inputData =
3853 {
3854 129, 130,
3855 129, 130,
3856 129, 130,
3857 129, 130,
3858 129, 130,
3859 129, 130,
3860 129, 130,
3861 129, 130,
3862 129, 130
3863 };
3864
3865 std::vector<int8_t> kernelData =
3866 {
3867 1, 1, 1, 1,
3868 1, 1, 1, 1,
3869 1, 1, 1, 1,
3870 1, 1, 1, 1
3871 };
3872
3873 if (workloadFactory.GetBackendId() == armnn::BackendId("GpuAcc") ||
3874 workloadFactory.GetBackendId() == armnn::BackendId("CpuAcc"))
3875 {
3876 if (layout == armnn::DataLayout::NCHW)
3877 {
3878 std::vector<int8_t> tmp(kernelData.size());
3879 kernelInfo.SetShape(armnnUtils::Permuted(kernelInfo.GetShape(), {0, 2, 3, 1}));
3880 armnnUtils::Permute(kernelInfo.GetShape(), {0, 2, 3, 1}, kernelData.data(), tmp.data(), sizeof(int8_t));
3881 kernelData = tmp;
3882 }
3883 }
3884
3885 std::vector<int32_t> biasData =
3886 {
3887 4, 4, 4, 4
3888 };
3889
3890 std::vector<uint8_t> expectedOutputData =
3891 {
3892 132, 130, 134, 131,
3893 132, 130, 134, 131,
3894 132, 130, 134, 131,
3895 132, 130, 134, 131
3896 };
3897
3898 if (layout == DataLayout::NCHW)
3899 {
3900 PermuteTensorNhwcToNchw(inputInfo, inputData);
3901 PermuteTensorNhwcToNchw(outputInfo, expectedOutputData);
3902 }
3903
3904 std::vector<uint8_t> actualOutput(outputInfo.GetNumElements());
3905
3906 DepthwiseConvolution2dDescriptor descriptor;
3907 descriptor.m_StrideX = 1;
3908 descriptor.m_StrideY = 1;
3909 descriptor.m_PadLeft = 0;
3910 descriptor.m_PadRight = 0;
3911 descriptor.m_PadTop = 0;
3912 descriptor.m_PadBottom = 0;
3913 descriptor.m_DilationX = 1;
3914 descriptor.m_DilationY = 1;
3915 descriptor.m_BiasEnabled = true;
3916 descriptor.m_DataLayout = layout;
3917
3918 std::unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
3919 std::unique_ptr<ITensorHandle> weightsHandle = tensorHandleFactory.CreateTensorHandle(kernelInfo);
3920 std::unique_ptr<ITensorHandle> biasHandle = tensorHandleFactory.CreateTensorHandle(biasInfo);
3921 std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
3922
3923 DepthwiseConvolution2dQueueDescriptor queueDescriptor;
3924 WorkloadInfo workloadInfo;
3925
3926 AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
3927 AddInputToWorkload(queueDescriptor, workloadInfo, kernelInfo, weightsHandle.get());
3928 AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
3929 AddInputToWorkload(queueDescriptor, workloadInfo, biasInfo, biasHandle.get());
3930
3931 AllocateAndCopyDataToITensorHandle(weightsHandle.get(), kernelData.data());
3932 AllocateAndCopyDataToITensorHandle(biasHandle.get(), biasData.data());
3933
3934 queueDescriptor.m_Parameters = descriptor;
3935
3936 std::unique_ptr<IWorkload> workload = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d,
3937 queueDescriptor,
3938 workloadInfo);
3939 inputHandle->Allocate();
3940 outputHandle->Allocate();
3941
3942 CopyDataToITensorHandle(inputHandle.get(), inputData.data());
3943
3944 ExecuteWorkload(*workload, memoryManager);
3945
3946 LayerTestResult<uint8_t, 4> ret(outputInfo);
3947
3948 CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
3949
3950 return LayerTestResult<uint8_t, 4>(actualOutput,
3951 expectedOutputData,
3952 outputHandle->GetShape(),
3953 outputInfo.GetShape());
3954 }
3955
CompareDepthwiseConvolution2dFloatTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,armnn::IWorkloadFactory & refWorkloadFactory,const armnn::ITensorHandleFactory & tensorHandleFactory,const armnn::ITensorHandleFactory & refTensorHandleFactory,const armnn::DataLayout layout)3956 LayerTestResult<float, 4> CompareDepthwiseConvolution2dFloatTest(
3957 armnn::IWorkloadFactory& workloadFactory,
3958 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3959 armnn::IWorkloadFactory& refWorkloadFactory,
3960 const armnn::ITensorHandleFactory& tensorHandleFactory,
3961 const armnn::ITensorHandleFactory& refTensorHandleFactory,
3962 const armnn::DataLayout layout)
3963 {
3964 return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::Float32>(
3965 workloadFactory, memoryManager, refWorkloadFactory, tensorHandleFactory, refTensorHandleFactory, layout);
3966 }
3967
CompareDepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,armnn::IWorkloadFactory & refWorkloadFactory,const armnn::ITensorHandleFactory & tensorHandleFactory,const armnn::ITensorHandleFactory & refTensorHandleFactory,const armnn::DataLayout layout)3968 LayerTestResult<uint8_t, 4> CompareDepthwiseConvolution2dUint8Test(
3969 armnn::IWorkloadFactory& workloadFactory,
3970 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3971 armnn::IWorkloadFactory& refWorkloadFactory,
3972 const armnn::ITensorHandleFactory& tensorHandleFactory,
3973 const armnn::ITensorHandleFactory& refTensorHandleFactory,
3974 const armnn::DataLayout layout)
3975 {
3976 return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::QAsymmU8>(
3977 workloadFactory, memoryManager, refWorkloadFactory, tensorHandleFactory, refTensorHandleFactory, layout);
3978 }
3979