xref: /aosp_15_r20/external/armnn/src/backends/reference/RefWorkloadFactory.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include <Layer.hpp>
6 #include <armnn/backends/MemCopyWorkload.hpp>
7 #include <backendsCommon/MemImportWorkload.hpp>
8 #include <backendsCommon/MakeWorkloadHelper.hpp>
9 #include <armnn/backends/TensorHandle.hpp>
10 #include "RefWorkloadFactory.hpp"
11 #include "RefBackendId.hpp"
12 #include "workloads/RefWorkloads.hpp"
13 #include "RefTensorHandle.hpp"
14 
15 
16 namespace armnn
17 {
18 
19 namespace
20 {
21 static const BackendId s_Id{RefBackendId()};
22 }
23 template <typename F32Workload, typename U8Workload, typename QueueDescriptorType>
MakeWorkload(const QueueDescriptorType & descriptor,const WorkloadInfo & info) const24 std::unique_ptr<IWorkload> RefWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor,
25                                                             const WorkloadInfo& info) const
26 {
27     return MakeWorkloadHelper<NullWorkload, F32Workload, U8Workload, NullWorkload, NullWorkload, NullWorkload>
28            (descriptor, info);
29 }
30 
31 template <DataType ArmnnType>
IsDataType(const WorkloadInfo & info)32 bool IsDataType(const WorkloadInfo& info)
33 {
34     auto checkType = [](const TensorInfo& tensorInfo) {return tensorInfo.GetDataType() == ArmnnType;};
35     auto it = std::find_if(std::begin(info.m_InputTensorInfos), std::end(info.m_InputTensorInfos), checkType);
36     if (it != std::end(info.m_InputTensorInfos))
37     {
38         return true;
39     }
40     it = std::find_if(std::begin(info.m_OutputTensorInfos), std::end(info.m_OutputTensorInfos), checkType);
41     if (it != std::end(info.m_OutputTensorInfos))
42     {
43         return true;
44     }
45     return false;
46 }
47 
IsSigned32(const WorkloadInfo & info)48 bool IsSigned32(const WorkloadInfo& info)
49 {
50     return IsDataType<DataType::Signed32>(info);
51 }
52 
IsBFloat16(const WorkloadInfo & info)53 bool IsBFloat16(const WorkloadInfo& info)
54 {
55     return IsDataType<DataType::BFloat16>(info);
56 }
57 
IsFloat16(const WorkloadInfo & info)58 bool IsFloat16(const WorkloadInfo& info)
59 {
60     return IsDataType<DataType::Float16>(info);
61 }
62 
IsQSymmS16(const WorkloadInfo & info)63 bool IsQSymmS16(const WorkloadInfo& info)
64 {
65     return IsDataType<DataType::QSymmS16>(info);
66 }
67 
IsQSymmS8(const WorkloadInfo & info)68 bool IsQSymmS8(const WorkloadInfo& info)
69 {
70     return IsDataType<DataType::QSymmS8>(info);
71 }
72 
IsQAsymmS8(const WorkloadInfo & info)73 bool IsQAsymmS8(const WorkloadInfo& info)
74 {
75     return IsDataType<DataType::QAsymmS8>(info);
76 }
77 
IsQAsymmU8(const WorkloadInfo & info)78 bool IsQAsymmU8(const WorkloadInfo& info)
79 {
80     return IsDataType<DataType::QAsymmU8>(info);
81 }
82 
RefWorkloadFactory(const std::shared_ptr<RefMemoryManager> & memoryManager)83 RefWorkloadFactory::RefWorkloadFactory(const std::shared_ptr<RefMemoryManager>& memoryManager)
84     : m_MemoryManager(memoryManager)
85 {
86 }
87 
RefWorkloadFactory()88 RefWorkloadFactory::RefWorkloadFactory()
89     : m_MemoryManager(new RefMemoryManager())
90 {
91 }
92 
GetBackendId() const93 const BackendId& RefWorkloadFactory::GetBackendId() const
94 {
95     return s_Id;
96 }
97 
IsLayerSupported(const Layer & layer,Optional<DataType> dataType,std::string & outReasonIfUnsupported)98 bool RefWorkloadFactory::IsLayerSupported(const Layer& layer,
99                                           Optional<DataType> dataType,
100                                           std::string& outReasonIfUnsupported)
101 {
102     return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
103 }
104 
IsLayerSupported(const IConnectableLayer & layer,Optional<DataType> dataType,std::string & outReasonIfUnsupported,const ModelOptions & modelOptions)105 bool RefWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer,
106                                           Optional<DataType> dataType,
107                                           std::string& outReasonIfUnsupported,
108                                           const ModelOptions& modelOptions)
109 {
110     return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
111 }
112 
CreateTensorHandle(const TensorInfo & tensorInfo,const bool isMemoryManaged) const113 std::unique_ptr<ITensorHandle> RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
114                                                                       const bool isMemoryManaged) const
115 {
116     if (isMemoryManaged)
117     {
118         return std::make_unique<RefTensorHandle>(tensorInfo, m_MemoryManager);
119     }
120     else
121     {
122         return std::make_unique<RefTensorHandle>(tensorInfo);
123     }
124 }
125 
CreateTensorHandle(const TensorInfo & tensorInfo,DataLayout dataLayout,const bool isMemoryManaged) const126 std::unique_ptr<ITensorHandle> RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
127                                                                       DataLayout dataLayout,
128                                                                       const bool isMemoryManaged) const
129 {
130     // For Ref it is okay to make the TensorHandle memory managed as it can also store a pointer
131     // to unmanaged memory. This also ensures memory alignment.
132     IgnoreUnused(isMemoryManaged, dataLayout);
133 
134     if (isMemoryManaged)
135     {
136         return std::make_unique<RefTensorHandle>(tensorInfo, m_MemoryManager);
137     }
138     else
139     {
140         return std::make_unique<RefTensorHandle>(tensorInfo);
141     }
142 }
143 
CreateWorkload(LayerType type,const QueueDescriptor & descriptor,const WorkloadInfo & info) const144 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateWorkload(LayerType type,
145                                                               const QueueDescriptor& descriptor,
146                                                               const WorkloadInfo& info) const
147 {
148     switch(type)
149     {
150         case LayerType::Activation :
151         {
152             auto activationQueueDescriptor = PolymorphicDowncast<const ActivationQueueDescriptor*>(&descriptor);
153             return std::make_unique<RefActivationWorkload>(*activationQueueDescriptor, info);
154         }
155         case LayerType::Addition :
156         {
157             auto additionQueueDescriptor = PolymorphicDowncast<const AdditionQueueDescriptor*>(&descriptor);
158 
159             if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
160             {
161                 return std::make_unique<RefAdditionWorkload<int32_t>>(*additionQueueDescriptor, info);
162             }
163             else
164             {
165                 return std::make_unique<RefAdditionWorkload<float>>(*additionQueueDescriptor, info);
166             }
167         }
168         case LayerType::ArgMinMax :
169         {
170             auto argMinMaxQueueDescriptor = PolymorphicDowncast<const ArgMinMaxQueueDescriptor*>(&descriptor);
171             return std::make_unique<RefArgMinMaxWorkload>(*argMinMaxQueueDescriptor, info);
172         }
173         case LayerType::BatchMatMul:
174         {
175             auto batchMatMulQueueDescriptor = PolymorphicDowncast<const BatchMatMulQueueDescriptor*>(&descriptor);
176             return std::make_unique<RefBatchMatMulWorkload>(*batchMatMulQueueDescriptor, info);
177         }
178         case LayerType::BatchNormalization :
179         {
180             auto batchNormQueueDescriptor = PolymorphicDowncast<const BatchNormalizationQueueDescriptor*>(&descriptor);
181             return std::make_unique<RefBatchNormalizationWorkload>(*batchNormQueueDescriptor, info);
182         }
183         case LayerType::BatchToSpaceNd :
184         {
185             auto batchToSpaceNdQueueDescriptor
186                     = PolymorphicDowncast<const BatchToSpaceNdQueueDescriptor*>(&descriptor);
187             return std::make_unique<RefBatchToSpaceNdWorkload>(*batchToSpaceNdQueueDescriptor, info);
188        }
189         case LayerType::Cast :
190         {
191             auto castQueueDescriptor = PolymorphicDowncast<const CastQueueDescriptor*>(&descriptor);
192             return std::make_unique<RefCastWorkload>(*castQueueDescriptor, info);
193         }
194         case LayerType::ChannelShuffle :
195         {
196             auto channelShuffleQueueDescriptor
197                     = PolymorphicDowncast<const ChannelShuffleQueueDescriptor*>(&descriptor);
198             return std::make_unique<RefChannelShuffleWorkload>(*channelShuffleQueueDescriptor, info);
199         }
200         case LayerType::Comparison :
201         {
202             auto comparisonQueueDescriptor = PolymorphicDowncast<const ComparisonQueueDescriptor*>(&descriptor);
203             return std::make_unique<RefComparisonWorkload>(*comparisonQueueDescriptor, info);
204         }
205         case LayerType::Concat :
206         {
207             auto concatQueueDescriptor = PolymorphicDowncast<const ConcatQueueDescriptor*>(&descriptor);
208             return std::make_unique<RefConcatWorkload>(*concatQueueDescriptor, info);
209         }
210         case LayerType::Constant :
211         {
212             auto constantQueueDescriptor = PolymorphicDowncast<const ConstantQueueDescriptor*>(&descriptor);
213             return std::make_unique<RefConstantWorkload>(*constantQueueDescriptor, info);
214         }
215         case LayerType::ConvertFp16ToFp32:
216         {
217             auto convertFp16ToFp32QueueDescriptor
218                     = PolymorphicDowncast<const ConvertFp16ToFp32QueueDescriptor*>(&descriptor);
219             return std::make_unique<RefConvertFp16ToFp32Workload>(*convertFp16ToFp32QueueDescriptor, info);
220         }
221         case LayerType::ConvertFp32ToFp16:
222         {
223             auto convertFp32ToFp16QueueDescriptor
224                     = PolymorphicDowncast<const ConvertFp32ToFp16QueueDescriptor*>(&descriptor);
225             return std::make_unique<RefConvertFp32ToFp16Workload>(*convertFp32ToFp16QueueDescriptor, info);
226         }
227         case LayerType::Convolution2d:
228         {
229             auto convolution2dQueueDescriptor = PolymorphicDowncast<const Convolution2dQueueDescriptor*>(&descriptor);
230             return std::make_unique<RefConvolution2dWorkload>(*convolution2dQueueDescriptor, info);
231         }
232         case LayerType::Convolution3d:
233         {
234             auto convolution3dQueueDescriptor = PolymorphicDowncast<const Convolution3dQueueDescriptor*>(&descriptor);
235             return std::make_unique<RefConvolution3dWorkload>(*convolution3dQueueDescriptor, info);
236         }
237         case LayerType::Debug:
238         {
239             auto debugQueueDescriptor = PolymorphicDowncast<const DebugQueueDescriptor*>(&descriptor);
240             if (IsBFloat16(info))
241             {
242                 return std::make_unique<RefDebugBFloat16Workload>(*debugQueueDescriptor, info);
243             }
244             if (IsFloat16(info))
245             {
246                 return std::make_unique<RefDebugFloat16Workload>(*debugQueueDescriptor, info);
247             }
248             if (IsQSymmS16(info))
249             {
250                 return std::make_unique<RefDebugQSymmS16Workload>(*debugQueueDescriptor, info);
251             }
252             if (IsQSymmS8(info))
253             {
254                 return std::make_unique<RefDebugQSymmS8Workload>(*debugQueueDescriptor, info);
255             }
256             if (IsQAsymmU8(info))
257             {
258                 return std::make_unique<RefDebugQAsymmU8Workload>(*debugQueueDescriptor, info);
259             }
260             if (IsQAsymmS8(info))
261             {
262                 return std::make_unique<RefDebugQAsymmS8Workload>(*debugQueueDescriptor, info);
263             }
264             if (IsSigned32(info))
265             {
266                 return std::make_unique<RefDebugSigned32Workload>(*debugQueueDescriptor, info);
267             }
268 
269             return MakeWorkload<RefDebugFloat32Workload, RefDebugQAsymmU8Workload>(*debugQueueDescriptor, info);
270         }
271         case LayerType::DepthToSpace:
272         {
273             auto depthToSpaceQueueDescriptor = PolymorphicDowncast<const DepthToSpaceQueueDescriptor*>(&descriptor);
274             return std::make_unique<RefDepthToSpaceWorkload>(*depthToSpaceQueueDescriptor, info);
275         }
276         case LayerType::DepthwiseConvolution2d:
277         {
278             auto depthwiseConvolution2DQueueDescriptor
279                 = PolymorphicDowncast<const DepthwiseConvolution2dQueueDescriptor*>(&descriptor);
280             return std::make_unique<RefDepthwiseConvolution2dWorkload>(*depthwiseConvolution2DQueueDescriptor, info);
281         }
282         case LayerType::Dequantize:
283         {
284             auto dequantizeQueueDescriptor = PolymorphicDowncast<const DequantizeQueueDescriptor*>(&descriptor);
285             return std::make_unique<RefDequantizeWorkload>(*dequantizeQueueDescriptor, info);
286         }
287         case LayerType::DetectionPostProcess:
288         {
289             auto detectionPostProcessQueueDescriptor
290                 = PolymorphicDowncast<const DetectionPostProcessQueueDescriptor*>(&descriptor);
291             return std::make_unique<RefDetectionPostProcessWorkload>(*detectionPostProcessQueueDescriptor, info);
292         }
293         case LayerType::Division:
294         {
295             auto divisionQueueDescriptor = PolymorphicDowncast<const DivisionQueueDescriptor*>(&descriptor);
296             if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
297             {
298                 return std::make_unique<RefDivisionWorkload<int32_t>>(*divisionQueueDescriptor, info);
299             }
300             else
301             {
302                 return std::make_unique<RefDivisionWorkload<float>>(*divisionQueueDescriptor, info);
303             }
304         }
305         case LayerType::ElementwiseBinary:
306         {
307             auto elementwiseBinaryQueueDescriptor
308                     = PolymorphicDowncast<const ElementwiseBinaryQueueDescriptor*>(&descriptor);
309             return std::make_unique<RefElementwiseBinaryWorkload>(*elementwiseBinaryQueueDescriptor, info);
310         }
311         case LayerType::ElementwiseUnary:
312         {
313             auto elementwiseUnaryQueueDescriptor
314                 = PolymorphicDowncast<const ElementwiseUnaryQueueDescriptor*>(&descriptor);
315             if ((*elementwiseUnaryQueueDescriptor).m_Parameters.m_Operation == UnaryOperation::LogicalNot)
316             {
317                 return std::make_unique<RefLogicalUnaryWorkload>(*elementwiseUnaryQueueDescriptor, info);
318             }
319             return std::make_unique<RefElementwiseUnaryWorkload>(*elementwiseUnaryQueueDescriptor, info);
320         }
321         case LayerType::FakeQuantization:
322         {
323             auto fakeQuantizationQueueDescriptor
324                 = PolymorphicDowncast<const FakeQuantizationQueueDescriptor*>(&descriptor);
325             return std::make_unique<RefFakeQuantizationFloat32Workload>(*fakeQuantizationQueueDescriptor, info);
326         }
327         case LayerType::Fill:
328         {
329             auto fillQueueDescriptor = PolymorphicDowncast<const FillQueueDescriptor*>(&descriptor);
330             return std::make_unique<RefFillWorkload>(*fillQueueDescriptor, info);
331         }
332         case LayerType::Floor:
333         {
334             auto floorQueueDescriptor = PolymorphicDowncast<const FloorQueueDescriptor*>(&descriptor);
335             if(IsQuantizedType(info.m_InputTensorInfos[0].GetDataType()))
336             {
337                 return nullptr;
338             }
339             else
340             {
341                 return std::make_unique<RefFloorWorkload>(*floorQueueDescriptor, info);
342             }
343         }
344         case LayerType::FullyConnected:
345         {
346             auto fullyConnectedQueueDescriptor
347                     = PolymorphicDowncast<const FullyConnectedQueueDescriptor*>(&descriptor);
348             return std::make_unique<RefFullyConnectedWorkload>(*fullyConnectedQueueDescriptor, info);
349         }
350         case LayerType::Gather:
351         {
352             auto gatherQueueDescriptor = PolymorphicDowncast<const GatherQueueDescriptor*>(&descriptor);
353             return std::make_unique<RefGatherWorkload>(*gatherQueueDescriptor, info);
354         }
355         case LayerType::GatherNd:
356         {
357             auto gatherNdQueueDescriptor = PolymorphicDowncast<const GatherNdQueueDescriptor*>(&descriptor);
358             return std::make_unique<RefGatherNdWorkload>(*gatherNdQueueDescriptor, info);
359         }
360         case LayerType::Input:
361         {
362             auto inputQueueDescriptor = PolymorphicDowncast<const InputQueueDescriptor*>(&descriptor);
363             if (info.m_InputTensorInfos.empty() )
364             {
365                 throw InvalidArgumentException("RefWorkloadFactory::CreateInput: Input cannot be zero length");
366             }
367             if (info.m_OutputTensorInfos.empty())
368             {
369                 throw InvalidArgumentException("RefWorkloadFactory::CreateInput: Output cannot be zero length");
370             }
371 
372             if (info.m_InputTensorInfos[0].GetNumBytes() != info.m_OutputTensorInfos[0].GetNumBytes())
373             {
374                 throw InvalidArgumentException("RefWorkloadFactory::CreateInput: "
375                                                "data input and output differ in byte count.");
376             }
377 
378             return std::make_unique<CopyMemGenericWorkload>(*inputQueueDescriptor, info);
379         }
380         case LayerType::InstanceNormalization:
381         {
382             auto instanceNormalizationQueueDescriptor
383                     = PolymorphicDowncast<const InstanceNormalizationQueueDescriptor*>(&descriptor);
384             return std::make_unique<RefInstanceNormalizationWorkload>(*instanceNormalizationQueueDescriptor, info);
385         }
386         case LayerType::L2Normalization:
387         {
388             auto l2NormalizationQueueDescriptor
389                     = PolymorphicDowncast<const L2NormalizationQueueDescriptor*>(&descriptor);
390             return std::make_unique<RefL2NormalizationWorkload>(*l2NormalizationQueueDescriptor, info);
391         }
392         case LayerType::LogicalBinary:
393         {
394             auto logicalBinaryQueueDescriptor = PolymorphicDowncast<const LogicalBinaryQueueDescriptor*>(&descriptor);
395             return std::make_unique<RefLogicalBinaryWorkload>(*logicalBinaryQueueDescriptor, info);
396         }
397         case LayerType::LogSoftmax:
398         {
399             auto logSoftmaxQueueDescriptor = PolymorphicDowncast<const LogSoftmaxQueueDescriptor*>(&descriptor);
400             return std::make_unique<RefLogSoftmaxWorkload>(*logSoftmaxQueueDescriptor, info);
401         }
402         case LayerType::Lstm:
403         {
404             auto lstmQueueDescriptor = PolymorphicDowncast<const LstmQueueDescriptor*>(&descriptor);
405             return std::make_unique<RefLstmWorkload>(*lstmQueueDescriptor, info);
406         }
407         case LayerType::Maximum:
408         {
409             auto maximumQueueDescriptor = PolymorphicDowncast<const MaximumQueueDescriptor*>(&descriptor);
410             if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
411             {
412                 return std::make_unique<RefMaximumWorkload<int32_t>>(*maximumQueueDescriptor, info);
413             }
414             else
415             {
416                 return std::make_unique<RefMaximumWorkload<float>>(*maximumQueueDescriptor, info);
417             }
418         }
419         case LayerType::Mean:
420         {
421             auto meanQueueDescriptor = PolymorphicDowncast<const MeanQueueDescriptor*>(&descriptor);
422             return  std::make_unique<RefMeanWorkload>(*meanQueueDescriptor, info);
423         }
424         case LayerType::MemCopy:
425         {
426             auto memCopyQueueDescriptor = PolymorphicDowncast<const MemCopyQueueDescriptor*>(&descriptor);
427             if (descriptor.m_Inputs.empty())
428             {
429                 throw InvalidArgumentException("RefWorkloadFactory: CreateMemCopy() expected an input tensor.");
430             }
431             return std::make_unique<CopyMemGenericWorkload>(*memCopyQueueDescriptor, info);
432         }
433         case LayerType::MemImport:
434         {
435             auto memImportQueueDescriptor = PolymorphicDowncast<const MemImportQueueDescriptor*>(&descriptor);
436             if (descriptor.m_Inputs.empty())
437             {
438                 throw InvalidArgumentException("RefWorkloadFactory: CreateMemImport() expected an input tensor.");
439             }
440             return std::make_unique<ImportMemGenericWorkload>(*memImportQueueDescriptor, info);
441         }
442         case LayerType::Minimum:
443         {
444             auto minimumQueueDescriptor = PolymorphicDowncast<const MinimumQueueDescriptor*>(&descriptor);
445             if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
446             {
447                 return std::make_unique<RefMinimumWorkload<int32_t>>(*minimumQueueDescriptor, info);
448             }
449             else
450             {
451                 return std::make_unique<RefMinimumWorkload<float>>(*minimumQueueDescriptor, info);
452             }
453         }
454         case LayerType::Multiplication:
455         {
456             auto multiplicationQueueDescriptor
457                     = PolymorphicDowncast<const MultiplicationQueueDescriptor*>(&descriptor);
458             if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
459             {
460                 return std::make_unique<RefMultiplicationWorkload<int32_t>>(*multiplicationQueueDescriptor, info);
461             }
462             else
463             {
464                 return std::make_unique<RefMultiplicationWorkload<float>>(*multiplicationQueueDescriptor, info);
465             }
466         }
467         case LayerType::Normalization:
468         {
469             auto normalizationQueueDescriptor = PolymorphicDowncast<const NormalizationQueueDescriptor*>(&descriptor);
470             return std::make_unique<RefNormalizationWorkload>(*normalizationQueueDescriptor, info);
471         }
472         case LayerType::Output:
473         {
474             auto outputQueueDescriptor = PolymorphicDowncast<const OutputQueueDescriptor*>(&descriptor);
475             if (info.m_InputTensorInfos.empty() )
476             {
477                 throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: Input cannot be zero length");
478             }
479             if (info.m_OutputTensorInfos.empty())
480             {
481                 throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: Output cannot be zero length");
482             }
483             if (info.m_InputTensorInfos[0].GetNumBytes() != info.m_OutputTensorInfos[0].GetNumBytes())
484             {
485                 throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: data input and output "
486                                                "differ in byte count.");
487             }
488 
489             return std::make_unique<CopyMemGenericWorkload>(*outputQueueDescriptor, info);
490         }
491         case LayerType::Pad:
492         {
493             auto padQueueDescriptor = PolymorphicDowncast<const PadQueueDescriptor*>(&descriptor);
494             return std::make_unique<RefPadWorkload>(*padQueueDescriptor, info);
495         }
496         case LayerType::Permute:
497         {
498             auto permuteQueueDescriptor = PolymorphicDowncast<const PermuteQueueDescriptor*>(&descriptor);
499             if (IsQSymmS16(info))
500             {
501                 return std::make_unique<RefPermuteQSymm16Workload>(*permuteQueueDescriptor, info);
502             }
503             else if (IsBFloat16(info))
504             {
505                 return std::make_unique<RefPermuteBFloat16Workload>(*permuteQueueDescriptor, info);
506             }
507             else if (IsQAsymmS8(info))
508             {
509                 return std::make_unique<RefPermuteQAsymmS8Workload>(*permuteQueueDescriptor, info);
510             }
511             return MakeWorkloadHelper<RefPermuteFloat16Workload, RefPermuteFloat32Workload, RefPermuteQAsymm8Workload,
512                     NullWorkload, NullWorkload, NullWorkload>(*permuteQueueDescriptor, info);
513         }
514         case LayerType::Pooling2d:
515         {
516             auto pooling2dQueueDescriptor = PolymorphicDowncast<const Pooling2dQueueDescriptor*>(&descriptor);
517             return std::make_unique<RefPooling2dWorkload>(*pooling2dQueueDescriptor, info);
518         }
519         case LayerType::Pooling3d:
520         {
521             auto pooling3dQueueDescriptor = PolymorphicDowncast<const Pooling3dQueueDescriptor*>(&descriptor);
522             return std::make_unique<RefPooling3dWorkload>(*pooling3dQueueDescriptor, info);
523         }
524         case LayerType::PreCompiled:
525         {
526             return nullptr;
527         }
528         case LayerType::Prelu:
529         {
530             auto preluQueueDescriptor = PolymorphicDowncast<const PreluQueueDescriptor*>(&descriptor);
531             return std::make_unique<RefPreluWorkload>(*preluQueueDescriptor, info);
532         }
533         case LayerType::QLstm:
534         {
535             auto qlstmQueueDescriptor = PolymorphicDowncast<const QLstmQueueDescriptor*>(&descriptor);
536             return std::make_unique<RefQLstmWorkload>(*qlstmQueueDescriptor, info);
537         }
538         case LayerType::Quantize:
539         {
540             auto quantizeQueueDescriptor = PolymorphicDowncast<const QuantizeQueueDescriptor*>(&descriptor);
541             return std::make_unique<RefQuantizeWorkload>(*quantizeQueueDescriptor, info);
542         }
543         case LayerType::Rank:
544         {
545             auto rankQueueDescriptor = PolymorphicDowncast<const RankQueueDescriptor*>(&descriptor);
546             return std::make_unique<RefRankWorkload>(*rankQueueDescriptor, info);
547         }
548         case LayerType::Reduce:
549         {
550             auto reduceQueueDescriptor = PolymorphicDowncast<const ReduceQueueDescriptor*>(&descriptor);
551             return std::make_unique<RefReduceWorkload>(*reduceQueueDescriptor, info);
552         }
553         case LayerType::Reshape:
554         {
555             auto reshapeQueueDescriptor = PolymorphicDowncast<const ReshapeQueueDescriptor*>(&descriptor);
556             return std::make_unique<RefReshapeWorkload>(*reshapeQueueDescriptor, info);
557         }
558         case LayerType::Resize:
559         {
560             auto resizeQueueDescriptor = PolymorphicDowncast<const ResizeQueueDescriptor*>(&descriptor);
561             return std::make_unique<RefResizeWorkload>(*resizeQueueDescriptor, info);
562         }
563         case LayerType::Shape:
564         {
565             auto shapeQueueDescriptor = PolymorphicDowncast<const ShapeQueueDescriptor*>(&descriptor);
566             return std::make_unique<RefShapeWorkload>(*shapeQueueDescriptor, info);
567         }
568         case LayerType::Slice:
569         {
570             auto sliceQueueDescriptor = PolymorphicDowncast<const SliceQueueDescriptor*>(&descriptor);
571             return std::make_unique<RefSliceWorkload>(*sliceQueueDescriptor, info);
572         }
573         case LayerType::Softmax:
574         {
575             auto softmaxQueueDescriptor = PolymorphicDowncast<const SoftmaxQueueDescriptor*>(&descriptor);
576             return std::make_unique<RefSoftmaxWorkload>(*softmaxQueueDescriptor, info);
577         }
578         case LayerType::SpaceToBatchNd:
579         {
580             auto spaceToBatchNdQueueDescriptor
581                     = PolymorphicDowncast<const SpaceToBatchNdQueueDescriptor*>(&descriptor);
582             return std::make_unique<RefSpaceToBatchNdWorkload>(*spaceToBatchNdQueueDescriptor, info);
583         }
584         case LayerType::SpaceToDepth:
585         {
586             auto spaceToDepthQueueDescriptor = PolymorphicDowncast<const SpaceToDepthQueueDescriptor*>(&descriptor);
587             return std::make_unique<RefSpaceToDepthWorkload>(*spaceToDepthQueueDescriptor, info);
588         }
589         case LayerType::Splitter:
590         {
591             auto splitterQueueDescriptor = PolymorphicDowncast<const SplitterQueueDescriptor*>(&descriptor);
592             return std::make_unique<RefSplitterWorkload>(*splitterQueueDescriptor, info);
593         }
594         case LayerType::Stack:
595         {
596             auto stackQueueDescriptor = PolymorphicDowncast<const StackQueueDescriptor*>(&descriptor);
597             return std::make_unique<RefStackWorkload>(*stackQueueDescriptor, info);
598         }
599         case LayerType::StridedSlice:
600         {
601             auto stridedSliceQueueDescriptor = PolymorphicDowncast<const StridedSliceQueueDescriptor*>(&descriptor);
602             return std::make_unique<RefStridedSliceWorkload>(*stridedSliceQueueDescriptor, info);
603         }
604         case LayerType::Subtraction:
605         {
606             auto subtractionQueueDescriptor = PolymorphicDowncast<const SubtractionQueueDescriptor*>(&descriptor);
607             if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
608             {
609                 return std::make_unique<RefSubtractionWorkload<int32_t>>(*subtractionQueueDescriptor, info);
610             }
611             else
612             {
613                 return std::make_unique<RefSubtractionWorkload<float>>(*subtractionQueueDescriptor, info);
614             }
615         }
616         case LayerType::Transpose:
617         {
618             auto transposeQueueDescriptor = PolymorphicDowncast<const TransposeQueueDescriptor*>(&descriptor);
619             if (IsQSymmS16(info))
620             {
621                 return std::make_unique<RefTransposeQSymm16Workload>(*transposeQueueDescriptor, info);
622             }
623             else if (IsBFloat16(info))
624             {
625                 return std::make_unique<RefTransposeBFloat16Workload>(*transposeQueueDescriptor, info);
626             }
627             else if (IsQAsymmS8(info))
628             {
629                 return std::make_unique<RefTransposeQAsymmS8Workload>(*transposeQueueDescriptor, info);
630             }
631             return MakeWorkloadHelper<RefTransposeFloat16Workload, RefTransposeFloat32Workload,
632                     RefTransposeQAsymm8Workload, NullWorkload, NullWorkload, NullWorkload>
633                     (*transposeQueueDescriptor, info);
634         }
635         case LayerType::TransposeConvolution2d:
636         {
637             auto transposeConvolution2dQueueDescriptor
638                     = PolymorphicDowncast<const TransposeConvolution2dQueueDescriptor*>(&descriptor);
639             return std::make_unique<RefTransposeConvolution2dWorkload>(*transposeConvolution2dQueueDescriptor, info);
640         }
641         case LayerType::UnidirectionalSequenceLstm:
642         {
643             auto unidirectionalSequenceLstmQueueDescriptor
644                     = PolymorphicDowncast<const UnidirectionalSequenceLstmQueueDescriptor*>(&descriptor);
645             return std::make_unique<RefUnidirectionalSequenceLstmWorkload>(*unidirectionalSequenceLstmQueueDescriptor,
646                                                                            info);
647         }
648         default:
649             return nullptr;
650     }
651 }
652 
CreateActivation(const ActivationQueueDescriptor & descriptor,const WorkloadInfo & info) const653 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
654                                                                 const WorkloadInfo& info) const
655 {
656     return std::make_unique<RefActivationWorkload>(descriptor, info);
657 }
658 
CreateAddition(const AdditionQueueDescriptor & descriptor,const WorkloadInfo & info) const659 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
660                                                               const WorkloadInfo& info) const
661 {
662     if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
663     {
664         return std::make_unique<RefAdditionWorkload<int32_t>>(descriptor, info);
665     }
666     else
667     {
668         return std::make_unique<RefAdditionWorkload<float>>(descriptor, info);
669     }
670 }
671 
CreateArgMinMax(const ArgMinMaxQueueDescriptor & descriptor,const WorkloadInfo & info) const672 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor,
673                                                                const WorkloadInfo& info) const
674 {
675     return std::make_unique<RefArgMinMaxWorkload>(descriptor, info);
676 }
677 
CreateBatchNormalization(const BatchNormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const678 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateBatchNormalization(
679     const BatchNormalizationQueueDescriptor& descriptor,
680     const WorkloadInfo& info) const
681 {
682     return std::make_unique<RefBatchNormalizationWorkload>(descriptor, info);
683 }
684 
CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor & descriptor,const WorkloadInfo & info) const685 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
686                                                                     const WorkloadInfo& info) const
687 {
688     return std::make_unique<RefBatchToSpaceNdWorkload>(descriptor, info);
689 }
690 
CreateCast(const CastQueueDescriptor & descriptor,const WorkloadInfo & info) const691 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateCast(const CastQueueDescriptor& descriptor,
692                                                           const WorkloadInfo& info) const
693 {
694     return std::make_unique<RefCastWorkload>(descriptor, info);
695 }
696 
CreateChannelShuffle(const ChannelShuffleQueueDescriptor & descriptor,const WorkloadInfo & info) const697 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateChannelShuffle(const ChannelShuffleQueueDescriptor &descriptor,
698                                                                     const WorkloadInfo &info) const
699 {
700     return std::make_unique<RefChannelShuffleWorkload>(descriptor,info);
701 }
702 
CreateComparison(const ComparisonQueueDescriptor & descriptor,const WorkloadInfo & info) const703 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateComparison(const ComparisonQueueDescriptor& descriptor,
704                                                                 const WorkloadInfo& info) const
705 {
706     return std::make_unique<RefComparisonWorkload>(descriptor, info);
707 }
708 
CreateConcat(const ConcatQueueDescriptor & descriptor,const WorkloadInfo & info) const709 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateConcat(const ConcatQueueDescriptor& descriptor,
710                                                             const WorkloadInfo& info) const
711 {
712     return std::make_unique<RefConcatWorkload>(descriptor, info);
713 }
714 
CreateConstant(const ConstantQueueDescriptor & descriptor,const WorkloadInfo & info) const715 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
716                                                               const WorkloadInfo& info) const
717 {
718     return std::make_unique<RefConstantWorkload>(descriptor, info);
719 }
720 
CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor & descriptor,const WorkloadInfo & info) const721 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateConvertFp16ToFp32(
722     const ConvertFp16ToFp32QueueDescriptor& descriptor,
723     const WorkloadInfo& info) const
724 {
725     return std::make_unique<RefConvertFp16ToFp32Workload>(descriptor, info);
726 }
727 
CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor & descriptor,const WorkloadInfo & info) const728 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateConvertFp32ToFp16(
729     const ConvertFp32ToFp16QueueDescriptor& descriptor,
730     const WorkloadInfo& info) const
731 {
732     return std::make_unique<RefConvertFp32ToFp16Workload>(descriptor, info);
733 }
734 
CreateConvolution2d(const Convolution2dQueueDescriptor & descriptor,const WorkloadInfo & info) const735 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
736                                                                    const WorkloadInfo& info) const
737 {
738     return std::make_unique<RefConvolution2dWorkload>(descriptor, info);
739 }
740 
CreateConvolution3d(const Convolution3dQueueDescriptor & descriptor,const WorkloadInfo & info) const741 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateConvolution3d(const Convolution3dQueueDescriptor& descriptor,
742                                                                    const WorkloadInfo& info) const
743 {
744     return std::make_unique<RefConvolution3dWorkload>(descriptor, info);
745 }
746 
CreateDebug(const DebugQueueDescriptor & descriptor,const WorkloadInfo & info) const747 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
748                                                            const WorkloadInfo& info) const
749 {
750     if (IsBFloat16(info))
751     {
752         return std::make_unique<RefDebugBFloat16Workload>(descriptor, info);
753     }
754     if (IsFloat16(info))
755     {
756         return std::make_unique<RefDebugFloat16Workload>(descriptor, info);
757     }
758     if (IsQSymmS16(info))
759     {
760         return std::make_unique<RefDebugQSymmS16Workload>(descriptor, info);
761     }
762     if (IsQSymmS8(info))
763     {
764         return std::make_unique<RefDebugQSymmS8Workload>(descriptor, info);
765     }
766     if (IsQAsymmU8(info))
767     {
768         return std::make_unique<RefDebugQAsymmU8Workload>(descriptor, info);
769     }
770     if (IsQAsymmS8(info))
771     {
772         return std::make_unique<RefDebugQAsymmS8Workload>(descriptor, info);
773     }
774     if (IsSigned32(info))
775     {
776         return std::make_unique<RefDebugSigned32Workload>(descriptor, info);
777     }
778 
779     return MakeWorkload<RefDebugFloat32Workload, RefDebugQAsymmU8Workload>(descriptor, info);
780 }
781 
CreateDepthToSpace(const DepthToSpaceQueueDescriptor & descriptor,const WorkloadInfo & info) const782 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor,
783                                                                   const WorkloadInfo& info) const
784 {
785     return std::make_unique<RefDepthToSpaceWorkload>(descriptor, info);
786 }
787 
CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor & descriptor,const WorkloadInfo & info) const788 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDepthwiseConvolution2d(
789     const DepthwiseConvolution2dQueueDescriptor& descriptor,
790     const WorkloadInfo& info) const
791 {
792     return std::make_unique<RefDepthwiseConvolution2dWorkload>(descriptor, info);
793 }
794 
CreateDequantize(const DequantizeQueueDescriptor & descriptor,const WorkloadInfo & info) const795 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDequantize(const DequantizeQueueDescriptor& descriptor,
796                                                                 const WorkloadInfo& info) const
797 {
798     return std::make_unique<RefDequantizeWorkload>(descriptor, info);
799 }
800 
CreateDetectionPostProcess(const DetectionPostProcessQueueDescriptor & descriptor,const WorkloadInfo & info) const801 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDetectionPostProcess(
802     const DetectionPostProcessQueueDescriptor& descriptor,
803     const WorkloadInfo& info) const
804 {
805     return std::make_unique<RefDetectionPostProcessWorkload>(descriptor, info);
806 }
807 
CreateDivision(const DivisionQueueDescriptor & descriptor,const WorkloadInfo & info) const808 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& descriptor,
809                                                               const WorkloadInfo& info) const
810 {
811     if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
812     {
813         return std::make_unique<RefDivisionWorkload<int32_t>>(descriptor, info);
814     }
815     else
816     {
817         return std::make_unique<RefDivisionWorkload<float>>(descriptor, info);
818     }
819 }
820 
CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor & descriptor,const WorkloadInfo & info) const821 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor& descriptor,
822                                                                       const WorkloadInfo& info) const
823 {
824     if (descriptor.m_Parameters.m_Operation == UnaryOperation::LogicalNot)
825     {
826         return std::make_unique<RefLogicalUnaryWorkload>(descriptor, info);
827     }
828     return std::make_unique<RefElementwiseUnaryWorkload>(descriptor, info);
829 }
830 
CreateFakeQuantization(const FakeQuantizationQueueDescriptor & descriptor,const WorkloadInfo & info) const831 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor,
832                                                                       const WorkloadInfo& info) const
833 {
834     return MakeWorkload<RefFakeQuantizationFloat32Workload, NullWorkload>(descriptor, info);
835 }
836 
CreateFill(const FillQueueDescriptor & descriptor,const WorkloadInfo & info) const837 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateFill(const FillQueueDescriptor& descriptor,
838                                                           const WorkloadInfo& info) const
839 {
840     return std::make_unique<RefFillWorkload>(descriptor, info);
841 }
842 
CreateFloor(const FloorQueueDescriptor & descriptor,const WorkloadInfo & info) const843 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
844                                                            const WorkloadInfo& info) const
845 {
846     if(IsQuantizedType(info.m_InputTensorInfos[0].GetDataType()))
847     {
848         return nullptr;
849     }
850     else
851     {
852         return std::make_unique<RefFloorWorkload>(descriptor, info);
853     }
854 }
855 
CreateFullyConnected(const FullyConnectedQueueDescriptor & descriptor,const WorkloadInfo & info) const856 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateFullyConnected(
857     const FullyConnectedQueueDescriptor& descriptor,
858     const WorkloadInfo& info) const
859 {
860     return std::make_unique<RefFullyConnectedWorkload>(descriptor, info);
861 }
862 
CreateGather(const GatherQueueDescriptor & descriptor,const WorkloadInfo & info) const863 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateGather(const GatherQueueDescriptor& descriptor,
864                                                             const WorkloadInfo& info) const
865 {
866     return std::make_unique<RefGatherWorkload>(descriptor, info);
867 }
868 
CreateInput(const InputQueueDescriptor & descriptor,const WorkloadInfo & info) const869 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
870                                                            const WorkloadInfo& info) const
871 {
872     if (info.m_InputTensorInfos.empty() )
873     {
874         throw InvalidArgumentException("RefWorkloadFactory::CreateInput: Input cannot be zero length");
875     }
876     if (info.m_OutputTensorInfos.empty())
877     {
878         throw InvalidArgumentException("RefWorkloadFactory::CreateInput: Output cannot be zero length");
879     }
880 
881     if (info.m_InputTensorInfos[0].GetNumBytes() != info.m_OutputTensorInfos[0].GetNumBytes())
882     {
883         throw InvalidArgumentException("RefWorkloadFactory::CreateInput: data input and output differ in byte count.");
884     }
885 
886     return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
887 }
888 
CreateInstanceNormalization(const InstanceNormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const889 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateInstanceNormalization(
890     const InstanceNormalizationQueueDescriptor& descriptor,
891     const WorkloadInfo& info) const
892 {
893     return std::make_unique<RefInstanceNormalizationWorkload>(descriptor, info);
894 }
895 
CreateL2Normalization(const L2NormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const896 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
897                                                                      const WorkloadInfo& info) const
898 {
899     return std::make_unique<RefL2NormalizationWorkload>(descriptor, info);
900 }
901 
CreateLogicalBinary(const LogicalBinaryQueueDescriptor & descriptor,const WorkloadInfo & info) const902 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateLogicalBinary(const LogicalBinaryQueueDescriptor& descriptor,
903                                                                    const WorkloadInfo& info) const
904 {
905     return std::make_unique<RefLogicalBinaryWorkload>(descriptor, info);
906 }
907 
CreateLogSoftmax(const LogSoftmaxQueueDescriptor & descriptor,const WorkloadInfo & info) const908 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateLogSoftmax(const LogSoftmaxQueueDescriptor& descriptor,
909                                                                 const WorkloadInfo& info) const
910 {
911     return std::make_unique<RefLogSoftmaxWorkload>(descriptor, info);
912 }
913 
CreateLstm(const LstmQueueDescriptor & descriptor,const WorkloadInfo & info) const914 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
915                                                           const WorkloadInfo& info) const
916 {
917     return std::make_unique<RefLstmWorkload>(descriptor, info);
918 }
919 
CreateMaximum(const MaximumQueueDescriptor & descriptor,const WorkloadInfo & info) const920 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateMaximum(const MaximumQueueDescriptor& descriptor,
921                                                              const WorkloadInfo& info) const
922 {
923     if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
924     {
925         return std::make_unique<RefMaximumWorkload<int32_t>>(descriptor, info);
926     }
927     else
928     {
929         return std::make_unique<RefMaximumWorkload<float>>(descriptor, info);
930     }
931 }
932 
CreateMean(const MeanQueueDescriptor & descriptor,const WorkloadInfo & info) const933 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
934                                                           const WorkloadInfo& info) const
935 {
936     return  std::make_unique<RefMeanWorkload>(descriptor, info);
937 }
938 
CreateMemCopy(const MemCopyQueueDescriptor & descriptor,const WorkloadInfo & info) const939 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
940                                                              const WorkloadInfo& info) const
941 {
942     if (descriptor.m_Inputs.empty())
943     {
944         throw InvalidArgumentException("RefWorkloadFactory: CreateMemCopy() expected an input tensor.");
945     }
946     return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
947 }
948 
CreateMemImport(const MemImportQueueDescriptor & descriptor,const WorkloadInfo & info) const949 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor,
950                                                                const WorkloadInfo& info) const
951 {
952     if (descriptor.m_Inputs.empty())
953     {
954         throw InvalidArgumentException("RefWorkloadFactory: CreateMemImport() expected an input tensor.");
955     }
956     return std::make_unique<ImportMemGenericWorkload>(descriptor, info);
957 }
958 
CreateMinimum(const MinimumQueueDescriptor & descriptor,const WorkloadInfo & info) const959 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateMinimum(const MinimumQueueDescriptor& descriptor,
960                                                              const WorkloadInfo& info) const
961 {
962     if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
963     {
964         return std::make_unique<RefMinimumWorkload<int32_t>>(descriptor, info);
965     }
966     else
967     {
968         return std::make_unique<RefMinimumWorkload<float>>(descriptor, info);
969     }
970 }
971 
CreateMultiplication(const MultiplicationQueueDescriptor & descriptor,const WorkloadInfo & info) const972 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& descriptor,
973                                                                     const WorkloadInfo& info) const
974 {
975     if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
976     {
977         return std::make_unique<RefMultiplicationWorkload<int32_t>>(descriptor, info);
978     }
979     else
980     {
981         return std::make_unique<RefMultiplicationWorkload<float>>(descriptor, info);
982     }
983 }
984 
CreateNormalization(const NormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const985 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor,
986                                                                    const WorkloadInfo& info) const
987 {
988     return std::make_unique<RefNormalizationWorkload>(descriptor, info);
989 }
990 
CreateOutput(const OutputQueueDescriptor & descriptor,const WorkloadInfo & info) const991 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
992                                                             const WorkloadInfo& info) const
993 {
994     if (info.m_InputTensorInfos.empty() )
995     {
996         throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: Input cannot be zero length");
997     }
998     if (info.m_OutputTensorInfos.empty())
999     {
1000         throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: Output cannot be zero length");
1001     }
1002     if (info.m_InputTensorInfos[0].GetNumBytes() != info.m_OutputTensorInfos[0].GetNumBytes())
1003     {
1004         throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: data input and output differ in byte count.");
1005     }
1006 
1007     return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
1008 }
1009 
CreatePad(const PadQueueDescriptor & descriptor,const WorkloadInfo & info) const1010 std::unique_ptr<IWorkload> RefWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
1011                                                          const WorkloadInfo& info) const
1012 {
1013     return std::make_unique<RefPadWorkload>(descriptor, info);
1014 }
1015 
CreatePermute(const PermuteQueueDescriptor & descriptor,const WorkloadInfo & info) const1016 std::unique_ptr<IWorkload> RefWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
1017                                                              const WorkloadInfo& info) const
1018 {
1019     if (IsQSymmS16(info))
1020     {
1021         return std::make_unique<RefPermuteQSymm16Workload>(descriptor, info);
1022     }
1023     else if (IsBFloat16(info))
1024     {
1025         return std::make_unique<RefPermuteBFloat16Workload>(descriptor, info);
1026     }
1027     else if (IsQAsymmS8(info))
1028     {
1029         return std::make_unique<RefPermuteQAsymmS8Workload>(descriptor, info);
1030     }
1031     return MakeWorkloadHelper<RefPermuteFloat16Workload, RefPermuteFloat32Workload, RefPermuteQAsymm8Workload,
1032         NullWorkload, NullWorkload, NullWorkload>(descriptor, info);
1033 }
1034 
CreatePooling2d(const Pooling2dQueueDescriptor & descriptor,const WorkloadInfo & info) const1035 std::unique_ptr<IWorkload> RefWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
1036                                                                const WorkloadInfo& info) const
1037 {
1038     return std::make_unique<RefPooling2dWorkload>(descriptor, info);
1039 }
1040 
CreatePooling3d(const Pooling3dQueueDescriptor & descriptor,const WorkloadInfo & info) const1041 std::unique_ptr<IWorkload> RefWorkloadFactory::CreatePooling3d(const Pooling3dQueueDescriptor& descriptor,
1042                                                                const WorkloadInfo& info) const
1043 {
1044     return std::make_unique<RefPooling3dWorkload>(descriptor, info);
1045 }
1046 
CreatePreCompiled(const PreCompiledQueueDescriptor &,const WorkloadInfo &) const1047 std::unique_ptr<IWorkload> RefWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& /*descriptor*/,
1048                                                                  const WorkloadInfo& /*info*/) const
1049 {
1050     return nullptr;
1051 }
1052 
CreatePrelu(const PreluQueueDescriptor & descriptor,const WorkloadInfo & info) const1053 std::unique_ptr<IWorkload> RefWorkloadFactory::CreatePrelu(const PreluQueueDescriptor& descriptor,
1054                                                            const WorkloadInfo& info) const
1055 {
1056     return std::make_unique<RefPreluWorkload>(descriptor, info);
1057 }
1058 
CreateQLstm(const QLstmQueueDescriptor & descriptor,const WorkloadInfo & info) const1059 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateQLstm(const QLstmQueueDescriptor& descriptor,
1060                                                            const WorkloadInfo& info) const
1061 {
1062     return std::make_unique<RefQLstmWorkload>(descriptor, info);
1063 }
1064 
CreateQuantize(const QuantizeQueueDescriptor & descriptor,const WorkloadInfo & info) const1065 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateQuantize(const QuantizeQueueDescriptor& descriptor,
1066                                                               const WorkloadInfo& info) const
1067 {
1068     return std::make_unique<RefQuantizeWorkload>(descriptor, info);
1069 }
1070 
CreateRank(const RankQueueDescriptor & descriptor,const WorkloadInfo & info) const1071 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateRank(const RankQueueDescriptor& descriptor,
1072                                                           const WorkloadInfo& info) const
1073 {
1074     return std::make_unique<RefRankWorkload>(descriptor, info);
1075 }
1076 
CreateReduce(const ReduceQueueDescriptor & descriptor,const WorkloadInfo & info) const1077 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor,
1078                                                             const WorkloadInfo& info) const
1079 {
1080     return std::make_unique<RefReduceWorkload>(descriptor, info);
1081 }
1082 
CreateReshape(const ReshapeQueueDescriptor & descriptor,const WorkloadInfo & info) const1083 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
1084                                                              const WorkloadInfo& info) const
1085 {
1086     return std::make_unique<RefReshapeWorkload>(descriptor, info);
1087 }
1088 
CreateResize(const ResizeQueueDescriptor & descriptor,const WorkloadInfo & info) const1089 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor,
1090                                                             const WorkloadInfo& info) const
1091 {
1092     return std::make_unique<RefResizeWorkload>(descriptor, info);
1093 }
1094 
CreateShape(const ShapeQueueDescriptor & descriptor,const WorkloadInfo & info) const1095 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateShape(const ShapeQueueDescriptor& descriptor,
1096                                                            const WorkloadInfo& info) const
1097 {
1098     return std::make_unique<RefShapeWorkload>(descriptor, info);
1099 }
1100 
CreateSlice(const SliceQueueDescriptor & descriptor,const WorkloadInfo & info) const1101 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateSlice(const SliceQueueDescriptor& descriptor,
1102                                                            const WorkloadInfo& info) const
1103 {
1104     return std::make_unique<RefSliceWorkload>(descriptor, info);
1105 }
1106 
CreateSoftmax(const SoftmaxQueueDescriptor & descriptor,const WorkloadInfo & info) const1107 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
1108                                                              const WorkloadInfo& info) const
1109 {
1110     return std::make_unique<RefSoftmaxWorkload>(descriptor, info);
1111 }
1112 
CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor & descriptor,const WorkloadInfo & info) const1113 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
1114                                                                     const WorkloadInfo& info) const
1115 {
1116     return std::make_unique<RefSpaceToBatchNdWorkload>(descriptor, info);
1117 }
1118 
CreateSpaceToDepth(const SpaceToDepthQueueDescriptor & descriptor,const WorkloadInfo & info) const1119 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor,
1120                                                                   const WorkloadInfo& info) const
1121 {
1122     return std::make_unique<RefSpaceToDepthWorkload>(descriptor, info);
1123 }
1124 
CreateSplitter(const SplitterQueueDescriptor & descriptor,const WorkloadInfo & info) const1125 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
1126                                                               const WorkloadInfo& info) const
1127 {
1128     return std::make_unique<RefSplitterWorkload>(descriptor, info);
1129 }
1130 
CreateStack(const StackQueueDescriptor & descriptor,const WorkloadInfo & info) const1131 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateStack(const StackQueueDescriptor& descriptor,
1132                                                            const WorkloadInfo& info) const
1133 {
1134     return std::make_unique<RefStackWorkload>(descriptor, info);
1135 }
1136 
CreateStridedSlice(const StridedSliceQueueDescriptor & descriptor,const WorkloadInfo & info) const1137 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
1138                                                                   const WorkloadInfo& info) const
1139 {
1140     return std::make_unique<RefStridedSliceWorkload>(descriptor, info);
1141 }
1142 
CreateSubtraction(const SubtractionQueueDescriptor & descriptor,const WorkloadInfo & info) const1143 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor,
1144                                                                  const WorkloadInfo& info) const
1145 {
1146     if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
1147     {
1148         return std::make_unique<RefSubtractionWorkload<int32_t>>(descriptor, info);
1149     }
1150     else
1151     {
1152         return std::make_unique<RefSubtractionWorkload<float>>(descriptor, info);
1153     }
1154 }
1155 
CreateTranspose(const TransposeQueueDescriptor & descriptor,const WorkloadInfo & info) const1156 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateTranspose(const TransposeQueueDescriptor& descriptor,
1157                                                                const WorkloadInfo& info) const
1158 {
1159     if (IsQSymmS16(info))
1160     {
1161         return std::make_unique<RefTransposeQSymm16Workload>(descriptor, info);
1162     }
1163     else if (IsBFloat16(info))
1164     {
1165         return std::make_unique<RefTransposeBFloat16Workload>(descriptor, info);
1166     }
1167     else if (IsQAsymmS8(info))
1168     {
1169         return std::make_unique<RefTransposeQAsymmS8Workload>(descriptor, info);
1170     }
1171     return MakeWorkloadHelper<RefTransposeFloat16Workload, RefTransposeFloat32Workload, RefTransposeQAsymm8Workload,
1172             NullWorkload, NullWorkload, NullWorkload>(descriptor, info);
1173 }
1174 
CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor & descriptor,const WorkloadInfo & info) const1175 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateTransposeConvolution2d(
1176     const TransposeConvolution2dQueueDescriptor& descriptor,
1177     const WorkloadInfo& info) const
1178 {
1179     return std::make_unique<RefTransposeConvolution2dWorkload>(descriptor, info);
1180 }
1181 
CreateUnidirectionalSequenceLstm(const UnidirectionalSequenceLstmQueueDescriptor & descriptor,const WorkloadInfo & info) const1182 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateUnidirectionalSequenceLstm(
1183     const UnidirectionalSequenceLstmQueueDescriptor& descriptor,
1184     const WorkloadInfo& info) const
1185 {
1186     return std::make_unique<RefUnidirectionalSequenceLstmWorkload>(descriptor, info);;
1187 }
1188 
1189 } // namespace armnn
1190