1 //
2 // Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include <Layer.hpp>
6 #include <armnn/backends/MemCopyWorkload.hpp>
7 #include <backendsCommon/MemImportWorkload.hpp>
8 #include <backendsCommon/MakeWorkloadHelper.hpp>
9 #include <armnn/backends/TensorHandle.hpp>
10 #include "RefWorkloadFactory.hpp"
11 #include "RefBackendId.hpp"
12 #include "workloads/RefWorkloads.hpp"
13 #include "RefTensorHandle.hpp"
14
15
16 namespace armnn
17 {
18
19 namespace
20 {
21 static const BackendId s_Id{RefBackendId()};
22 }
23 template <typename F32Workload, typename U8Workload, typename QueueDescriptorType>
MakeWorkload(const QueueDescriptorType & descriptor,const WorkloadInfo & info) const24 std::unique_ptr<IWorkload> RefWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor,
25 const WorkloadInfo& info) const
26 {
27 return MakeWorkloadHelper<NullWorkload, F32Workload, U8Workload, NullWorkload, NullWorkload, NullWorkload>
28 (descriptor, info);
29 }
30
31 template <DataType ArmnnType>
IsDataType(const WorkloadInfo & info)32 bool IsDataType(const WorkloadInfo& info)
33 {
34 auto checkType = [](const TensorInfo& tensorInfo) {return tensorInfo.GetDataType() == ArmnnType;};
35 auto it = std::find_if(std::begin(info.m_InputTensorInfos), std::end(info.m_InputTensorInfos), checkType);
36 if (it != std::end(info.m_InputTensorInfos))
37 {
38 return true;
39 }
40 it = std::find_if(std::begin(info.m_OutputTensorInfos), std::end(info.m_OutputTensorInfos), checkType);
41 if (it != std::end(info.m_OutputTensorInfos))
42 {
43 return true;
44 }
45 return false;
46 }
47
IsSigned32(const WorkloadInfo & info)48 bool IsSigned32(const WorkloadInfo& info)
49 {
50 return IsDataType<DataType::Signed32>(info);
51 }
52
IsBFloat16(const WorkloadInfo & info)53 bool IsBFloat16(const WorkloadInfo& info)
54 {
55 return IsDataType<DataType::BFloat16>(info);
56 }
57
IsFloat16(const WorkloadInfo & info)58 bool IsFloat16(const WorkloadInfo& info)
59 {
60 return IsDataType<DataType::Float16>(info);
61 }
62
IsQSymmS16(const WorkloadInfo & info)63 bool IsQSymmS16(const WorkloadInfo& info)
64 {
65 return IsDataType<DataType::QSymmS16>(info);
66 }
67
IsQSymmS8(const WorkloadInfo & info)68 bool IsQSymmS8(const WorkloadInfo& info)
69 {
70 return IsDataType<DataType::QSymmS8>(info);
71 }
72
IsQAsymmS8(const WorkloadInfo & info)73 bool IsQAsymmS8(const WorkloadInfo& info)
74 {
75 return IsDataType<DataType::QAsymmS8>(info);
76 }
77
IsQAsymmU8(const WorkloadInfo & info)78 bool IsQAsymmU8(const WorkloadInfo& info)
79 {
80 return IsDataType<DataType::QAsymmU8>(info);
81 }
82
RefWorkloadFactory(const std::shared_ptr<RefMemoryManager> & memoryManager)83 RefWorkloadFactory::RefWorkloadFactory(const std::shared_ptr<RefMemoryManager>& memoryManager)
84 : m_MemoryManager(memoryManager)
85 {
86 }
87
RefWorkloadFactory()88 RefWorkloadFactory::RefWorkloadFactory()
89 : m_MemoryManager(new RefMemoryManager())
90 {
91 }
92
GetBackendId() const93 const BackendId& RefWorkloadFactory::GetBackendId() const
94 {
95 return s_Id;
96 }
97
IsLayerSupported(const Layer & layer,Optional<DataType> dataType,std::string & outReasonIfUnsupported)98 bool RefWorkloadFactory::IsLayerSupported(const Layer& layer,
99 Optional<DataType> dataType,
100 std::string& outReasonIfUnsupported)
101 {
102 return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
103 }
104
IsLayerSupported(const IConnectableLayer & layer,Optional<DataType> dataType,std::string & outReasonIfUnsupported,const ModelOptions & modelOptions)105 bool RefWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer,
106 Optional<DataType> dataType,
107 std::string& outReasonIfUnsupported,
108 const ModelOptions& modelOptions)
109 {
110 return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
111 }
112
CreateTensorHandle(const TensorInfo & tensorInfo,const bool isMemoryManaged) const113 std::unique_ptr<ITensorHandle> RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
114 const bool isMemoryManaged) const
115 {
116 if (isMemoryManaged)
117 {
118 return std::make_unique<RefTensorHandle>(tensorInfo, m_MemoryManager);
119 }
120 else
121 {
122 return std::make_unique<RefTensorHandle>(tensorInfo);
123 }
124 }
125
CreateTensorHandle(const TensorInfo & tensorInfo,DataLayout dataLayout,const bool isMemoryManaged) const126 std::unique_ptr<ITensorHandle> RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
127 DataLayout dataLayout,
128 const bool isMemoryManaged) const
129 {
130 // For Ref it is okay to make the TensorHandle memory managed as it can also store a pointer
131 // to unmanaged memory. This also ensures memory alignment.
132 IgnoreUnused(isMemoryManaged, dataLayout);
133
134 if (isMemoryManaged)
135 {
136 return std::make_unique<RefTensorHandle>(tensorInfo, m_MemoryManager);
137 }
138 else
139 {
140 return std::make_unique<RefTensorHandle>(tensorInfo);
141 }
142 }
143
CreateWorkload(LayerType type,const QueueDescriptor & descriptor,const WorkloadInfo & info) const144 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateWorkload(LayerType type,
145 const QueueDescriptor& descriptor,
146 const WorkloadInfo& info) const
147 {
148 switch(type)
149 {
150 case LayerType::Activation :
151 {
152 auto activationQueueDescriptor = PolymorphicDowncast<const ActivationQueueDescriptor*>(&descriptor);
153 return std::make_unique<RefActivationWorkload>(*activationQueueDescriptor, info);
154 }
155 case LayerType::Addition :
156 {
157 auto additionQueueDescriptor = PolymorphicDowncast<const AdditionQueueDescriptor*>(&descriptor);
158
159 if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
160 {
161 return std::make_unique<RefAdditionWorkload<int32_t>>(*additionQueueDescriptor, info);
162 }
163 else
164 {
165 return std::make_unique<RefAdditionWorkload<float>>(*additionQueueDescriptor, info);
166 }
167 }
168 case LayerType::ArgMinMax :
169 {
170 auto argMinMaxQueueDescriptor = PolymorphicDowncast<const ArgMinMaxQueueDescriptor*>(&descriptor);
171 return std::make_unique<RefArgMinMaxWorkload>(*argMinMaxQueueDescriptor, info);
172 }
173 case LayerType::BatchMatMul:
174 {
175 auto batchMatMulQueueDescriptor = PolymorphicDowncast<const BatchMatMulQueueDescriptor*>(&descriptor);
176 return std::make_unique<RefBatchMatMulWorkload>(*batchMatMulQueueDescriptor, info);
177 }
178 case LayerType::BatchNormalization :
179 {
180 auto batchNormQueueDescriptor = PolymorphicDowncast<const BatchNormalizationQueueDescriptor*>(&descriptor);
181 return std::make_unique<RefBatchNormalizationWorkload>(*batchNormQueueDescriptor, info);
182 }
183 case LayerType::BatchToSpaceNd :
184 {
185 auto batchToSpaceNdQueueDescriptor
186 = PolymorphicDowncast<const BatchToSpaceNdQueueDescriptor*>(&descriptor);
187 return std::make_unique<RefBatchToSpaceNdWorkload>(*batchToSpaceNdQueueDescriptor, info);
188 }
189 case LayerType::Cast :
190 {
191 auto castQueueDescriptor = PolymorphicDowncast<const CastQueueDescriptor*>(&descriptor);
192 return std::make_unique<RefCastWorkload>(*castQueueDescriptor, info);
193 }
194 case LayerType::ChannelShuffle :
195 {
196 auto channelShuffleQueueDescriptor
197 = PolymorphicDowncast<const ChannelShuffleQueueDescriptor*>(&descriptor);
198 return std::make_unique<RefChannelShuffleWorkload>(*channelShuffleQueueDescriptor, info);
199 }
200 case LayerType::Comparison :
201 {
202 auto comparisonQueueDescriptor = PolymorphicDowncast<const ComparisonQueueDescriptor*>(&descriptor);
203 return std::make_unique<RefComparisonWorkload>(*comparisonQueueDescriptor, info);
204 }
205 case LayerType::Concat :
206 {
207 auto concatQueueDescriptor = PolymorphicDowncast<const ConcatQueueDescriptor*>(&descriptor);
208 return std::make_unique<RefConcatWorkload>(*concatQueueDescriptor, info);
209 }
210 case LayerType::Constant :
211 {
212 auto constantQueueDescriptor = PolymorphicDowncast<const ConstantQueueDescriptor*>(&descriptor);
213 return std::make_unique<RefConstantWorkload>(*constantQueueDescriptor, info);
214 }
215 case LayerType::ConvertFp16ToFp32:
216 {
217 auto convertFp16ToFp32QueueDescriptor
218 = PolymorphicDowncast<const ConvertFp16ToFp32QueueDescriptor*>(&descriptor);
219 return std::make_unique<RefConvertFp16ToFp32Workload>(*convertFp16ToFp32QueueDescriptor, info);
220 }
221 case LayerType::ConvertFp32ToFp16:
222 {
223 auto convertFp32ToFp16QueueDescriptor
224 = PolymorphicDowncast<const ConvertFp32ToFp16QueueDescriptor*>(&descriptor);
225 return std::make_unique<RefConvertFp32ToFp16Workload>(*convertFp32ToFp16QueueDescriptor, info);
226 }
227 case LayerType::Convolution2d:
228 {
229 auto convolution2dQueueDescriptor = PolymorphicDowncast<const Convolution2dQueueDescriptor*>(&descriptor);
230 return std::make_unique<RefConvolution2dWorkload>(*convolution2dQueueDescriptor, info);
231 }
232 case LayerType::Convolution3d:
233 {
234 auto convolution3dQueueDescriptor = PolymorphicDowncast<const Convolution3dQueueDescriptor*>(&descriptor);
235 return std::make_unique<RefConvolution3dWorkload>(*convolution3dQueueDescriptor, info);
236 }
237 case LayerType::Debug:
238 {
239 auto debugQueueDescriptor = PolymorphicDowncast<const DebugQueueDescriptor*>(&descriptor);
240 if (IsBFloat16(info))
241 {
242 return std::make_unique<RefDebugBFloat16Workload>(*debugQueueDescriptor, info);
243 }
244 if (IsFloat16(info))
245 {
246 return std::make_unique<RefDebugFloat16Workload>(*debugQueueDescriptor, info);
247 }
248 if (IsQSymmS16(info))
249 {
250 return std::make_unique<RefDebugQSymmS16Workload>(*debugQueueDescriptor, info);
251 }
252 if (IsQSymmS8(info))
253 {
254 return std::make_unique<RefDebugQSymmS8Workload>(*debugQueueDescriptor, info);
255 }
256 if (IsQAsymmU8(info))
257 {
258 return std::make_unique<RefDebugQAsymmU8Workload>(*debugQueueDescriptor, info);
259 }
260 if (IsQAsymmS8(info))
261 {
262 return std::make_unique<RefDebugQAsymmS8Workload>(*debugQueueDescriptor, info);
263 }
264 if (IsSigned32(info))
265 {
266 return std::make_unique<RefDebugSigned32Workload>(*debugQueueDescriptor, info);
267 }
268
269 return MakeWorkload<RefDebugFloat32Workload, RefDebugQAsymmU8Workload>(*debugQueueDescriptor, info);
270 }
271 case LayerType::DepthToSpace:
272 {
273 auto depthToSpaceQueueDescriptor = PolymorphicDowncast<const DepthToSpaceQueueDescriptor*>(&descriptor);
274 return std::make_unique<RefDepthToSpaceWorkload>(*depthToSpaceQueueDescriptor, info);
275 }
276 case LayerType::DepthwiseConvolution2d:
277 {
278 auto depthwiseConvolution2DQueueDescriptor
279 = PolymorphicDowncast<const DepthwiseConvolution2dQueueDescriptor*>(&descriptor);
280 return std::make_unique<RefDepthwiseConvolution2dWorkload>(*depthwiseConvolution2DQueueDescriptor, info);
281 }
282 case LayerType::Dequantize:
283 {
284 auto dequantizeQueueDescriptor = PolymorphicDowncast<const DequantizeQueueDescriptor*>(&descriptor);
285 return std::make_unique<RefDequantizeWorkload>(*dequantizeQueueDescriptor, info);
286 }
287 case LayerType::DetectionPostProcess:
288 {
289 auto detectionPostProcessQueueDescriptor
290 = PolymorphicDowncast<const DetectionPostProcessQueueDescriptor*>(&descriptor);
291 return std::make_unique<RefDetectionPostProcessWorkload>(*detectionPostProcessQueueDescriptor, info);
292 }
293 case LayerType::Division:
294 {
295 auto divisionQueueDescriptor = PolymorphicDowncast<const DivisionQueueDescriptor*>(&descriptor);
296 if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
297 {
298 return std::make_unique<RefDivisionWorkload<int32_t>>(*divisionQueueDescriptor, info);
299 }
300 else
301 {
302 return std::make_unique<RefDivisionWorkload<float>>(*divisionQueueDescriptor, info);
303 }
304 }
305 case LayerType::ElementwiseBinary:
306 {
307 auto elementwiseBinaryQueueDescriptor
308 = PolymorphicDowncast<const ElementwiseBinaryQueueDescriptor*>(&descriptor);
309 return std::make_unique<RefElementwiseBinaryWorkload>(*elementwiseBinaryQueueDescriptor, info);
310 }
311 case LayerType::ElementwiseUnary:
312 {
313 auto elementwiseUnaryQueueDescriptor
314 = PolymorphicDowncast<const ElementwiseUnaryQueueDescriptor*>(&descriptor);
315 if ((*elementwiseUnaryQueueDescriptor).m_Parameters.m_Operation == UnaryOperation::LogicalNot)
316 {
317 return std::make_unique<RefLogicalUnaryWorkload>(*elementwiseUnaryQueueDescriptor, info);
318 }
319 return std::make_unique<RefElementwiseUnaryWorkload>(*elementwiseUnaryQueueDescriptor, info);
320 }
321 case LayerType::FakeQuantization:
322 {
323 auto fakeQuantizationQueueDescriptor
324 = PolymorphicDowncast<const FakeQuantizationQueueDescriptor*>(&descriptor);
325 return std::make_unique<RefFakeQuantizationFloat32Workload>(*fakeQuantizationQueueDescriptor, info);
326 }
327 case LayerType::Fill:
328 {
329 auto fillQueueDescriptor = PolymorphicDowncast<const FillQueueDescriptor*>(&descriptor);
330 return std::make_unique<RefFillWorkload>(*fillQueueDescriptor, info);
331 }
332 case LayerType::Floor:
333 {
334 auto floorQueueDescriptor = PolymorphicDowncast<const FloorQueueDescriptor*>(&descriptor);
335 if(IsQuantizedType(info.m_InputTensorInfos[0].GetDataType()))
336 {
337 return nullptr;
338 }
339 else
340 {
341 return std::make_unique<RefFloorWorkload>(*floorQueueDescriptor, info);
342 }
343 }
344 case LayerType::FullyConnected:
345 {
346 auto fullyConnectedQueueDescriptor
347 = PolymorphicDowncast<const FullyConnectedQueueDescriptor*>(&descriptor);
348 return std::make_unique<RefFullyConnectedWorkload>(*fullyConnectedQueueDescriptor, info);
349 }
350 case LayerType::Gather:
351 {
352 auto gatherQueueDescriptor = PolymorphicDowncast<const GatherQueueDescriptor*>(&descriptor);
353 return std::make_unique<RefGatherWorkload>(*gatherQueueDescriptor, info);
354 }
355 case LayerType::GatherNd:
356 {
357 auto gatherNdQueueDescriptor = PolymorphicDowncast<const GatherNdQueueDescriptor*>(&descriptor);
358 return std::make_unique<RefGatherNdWorkload>(*gatherNdQueueDescriptor, info);
359 }
360 case LayerType::Input:
361 {
362 auto inputQueueDescriptor = PolymorphicDowncast<const InputQueueDescriptor*>(&descriptor);
363 if (info.m_InputTensorInfos.empty() )
364 {
365 throw InvalidArgumentException("RefWorkloadFactory::CreateInput: Input cannot be zero length");
366 }
367 if (info.m_OutputTensorInfos.empty())
368 {
369 throw InvalidArgumentException("RefWorkloadFactory::CreateInput: Output cannot be zero length");
370 }
371
372 if (info.m_InputTensorInfos[0].GetNumBytes() != info.m_OutputTensorInfos[0].GetNumBytes())
373 {
374 throw InvalidArgumentException("RefWorkloadFactory::CreateInput: "
375 "data input and output differ in byte count.");
376 }
377
378 return std::make_unique<CopyMemGenericWorkload>(*inputQueueDescriptor, info);
379 }
380 case LayerType::InstanceNormalization:
381 {
382 auto instanceNormalizationQueueDescriptor
383 = PolymorphicDowncast<const InstanceNormalizationQueueDescriptor*>(&descriptor);
384 return std::make_unique<RefInstanceNormalizationWorkload>(*instanceNormalizationQueueDescriptor, info);
385 }
386 case LayerType::L2Normalization:
387 {
388 auto l2NormalizationQueueDescriptor
389 = PolymorphicDowncast<const L2NormalizationQueueDescriptor*>(&descriptor);
390 return std::make_unique<RefL2NormalizationWorkload>(*l2NormalizationQueueDescriptor, info);
391 }
392 case LayerType::LogicalBinary:
393 {
394 auto logicalBinaryQueueDescriptor = PolymorphicDowncast<const LogicalBinaryQueueDescriptor*>(&descriptor);
395 return std::make_unique<RefLogicalBinaryWorkload>(*logicalBinaryQueueDescriptor, info);
396 }
397 case LayerType::LogSoftmax:
398 {
399 auto logSoftmaxQueueDescriptor = PolymorphicDowncast<const LogSoftmaxQueueDescriptor*>(&descriptor);
400 return std::make_unique<RefLogSoftmaxWorkload>(*logSoftmaxQueueDescriptor, info);
401 }
402 case LayerType::Lstm:
403 {
404 auto lstmQueueDescriptor = PolymorphicDowncast<const LstmQueueDescriptor*>(&descriptor);
405 return std::make_unique<RefLstmWorkload>(*lstmQueueDescriptor, info);
406 }
407 case LayerType::Maximum:
408 {
409 auto maximumQueueDescriptor = PolymorphicDowncast<const MaximumQueueDescriptor*>(&descriptor);
410 if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
411 {
412 return std::make_unique<RefMaximumWorkload<int32_t>>(*maximumQueueDescriptor, info);
413 }
414 else
415 {
416 return std::make_unique<RefMaximumWorkload<float>>(*maximumQueueDescriptor, info);
417 }
418 }
419 case LayerType::Mean:
420 {
421 auto meanQueueDescriptor = PolymorphicDowncast<const MeanQueueDescriptor*>(&descriptor);
422 return std::make_unique<RefMeanWorkload>(*meanQueueDescriptor, info);
423 }
424 case LayerType::MemCopy:
425 {
426 auto memCopyQueueDescriptor = PolymorphicDowncast<const MemCopyQueueDescriptor*>(&descriptor);
427 if (descriptor.m_Inputs.empty())
428 {
429 throw InvalidArgumentException("RefWorkloadFactory: CreateMemCopy() expected an input tensor.");
430 }
431 return std::make_unique<CopyMemGenericWorkload>(*memCopyQueueDescriptor, info);
432 }
433 case LayerType::MemImport:
434 {
435 auto memImportQueueDescriptor = PolymorphicDowncast<const MemImportQueueDescriptor*>(&descriptor);
436 if (descriptor.m_Inputs.empty())
437 {
438 throw InvalidArgumentException("RefWorkloadFactory: CreateMemImport() expected an input tensor.");
439 }
440 return std::make_unique<ImportMemGenericWorkload>(*memImportQueueDescriptor, info);
441 }
442 case LayerType::Minimum:
443 {
444 auto minimumQueueDescriptor = PolymorphicDowncast<const MinimumQueueDescriptor*>(&descriptor);
445 if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
446 {
447 return std::make_unique<RefMinimumWorkload<int32_t>>(*minimumQueueDescriptor, info);
448 }
449 else
450 {
451 return std::make_unique<RefMinimumWorkload<float>>(*minimumQueueDescriptor, info);
452 }
453 }
454 case LayerType::Multiplication:
455 {
456 auto multiplicationQueueDescriptor
457 = PolymorphicDowncast<const MultiplicationQueueDescriptor*>(&descriptor);
458 if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
459 {
460 return std::make_unique<RefMultiplicationWorkload<int32_t>>(*multiplicationQueueDescriptor, info);
461 }
462 else
463 {
464 return std::make_unique<RefMultiplicationWorkload<float>>(*multiplicationQueueDescriptor, info);
465 }
466 }
467 case LayerType::Normalization:
468 {
469 auto normalizationQueueDescriptor = PolymorphicDowncast<const NormalizationQueueDescriptor*>(&descriptor);
470 return std::make_unique<RefNormalizationWorkload>(*normalizationQueueDescriptor, info);
471 }
472 case LayerType::Output:
473 {
474 auto outputQueueDescriptor = PolymorphicDowncast<const OutputQueueDescriptor*>(&descriptor);
475 if (info.m_InputTensorInfos.empty() )
476 {
477 throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: Input cannot be zero length");
478 }
479 if (info.m_OutputTensorInfos.empty())
480 {
481 throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: Output cannot be zero length");
482 }
483 if (info.m_InputTensorInfos[0].GetNumBytes() != info.m_OutputTensorInfos[0].GetNumBytes())
484 {
485 throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: data input and output "
486 "differ in byte count.");
487 }
488
489 return std::make_unique<CopyMemGenericWorkload>(*outputQueueDescriptor, info);
490 }
491 case LayerType::Pad:
492 {
493 auto padQueueDescriptor = PolymorphicDowncast<const PadQueueDescriptor*>(&descriptor);
494 return std::make_unique<RefPadWorkload>(*padQueueDescriptor, info);
495 }
496 case LayerType::Permute:
497 {
498 auto permuteQueueDescriptor = PolymorphicDowncast<const PermuteQueueDescriptor*>(&descriptor);
499 if (IsQSymmS16(info))
500 {
501 return std::make_unique<RefPermuteQSymm16Workload>(*permuteQueueDescriptor, info);
502 }
503 else if (IsBFloat16(info))
504 {
505 return std::make_unique<RefPermuteBFloat16Workload>(*permuteQueueDescriptor, info);
506 }
507 else if (IsQAsymmS8(info))
508 {
509 return std::make_unique<RefPermuteQAsymmS8Workload>(*permuteQueueDescriptor, info);
510 }
511 return MakeWorkloadHelper<RefPermuteFloat16Workload, RefPermuteFloat32Workload, RefPermuteQAsymm8Workload,
512 NullWorkload, NullWorkload, NullWorkload>(*permuteQueueDescriptor, info);
513 }
514 case LayerType::Pooling2d:
515 {
516 auto pooling2dQueueDescriptor = PolymorphicDowncast<const Pooling2dQueueDescriptor*>(&descriptor);
517 return std::make_unique<RefPooling2dWorkload>(*pooling2dQueueDescriptor, info);
518 }
519 case LayerType::Pooling3d:
520 {
521 auto pooling3dQueueDescriptor = PolymorphicDowncast<const Pooling3dQueueDescriptor*>(&descriptor);
522 return std::make_unique<RefPooling3dWorkload>(*pooling3dQueueDescriptor, info);
523 }
524 case LayerType::PreCompiled:
525 {
526 return nullptr;
527 }
528 case LayerType::Prelu:
529 {
530 auto preluQueueDescriptor = PolymorphicDowncast<const PreluQueueDescriptor*>(&descriptor);
531 return std::make_unique<RefPreluWorkload>(*preluQueueDescriptor, info);
532 }
533 case LayerType::QLstm:
534 {
535 auto qlstmQueueDescriptor = PolymorphicDowncast<const QLstmQueueDescriptor*>(&descriptor);
536 return std::make_unique<RefQLstmWorkload>(*qlstmQueueDescriptor, info);
537 }
538 case LayerType::Quantize:
539 {
540 auto quantizeQueueDescriptor = PolymorphicDowncast<const QuantizeQueueDescriptor*>(&descriptor);
541 return std::make_unique<RefQuantizeWorkload>(*quantizeQueueDescriptor, info);
542 }
543 case LayerType::Rank:
544 {
545 auto rankQueueDescriptor = PolymorphicDowncast<const RankQueueDescriptor*>(&descriptor);
546 return std::make_unique<RefRankWorkload>(*rankQueueDescriptor, info);
547 }
548 case LayerType::Reduce:
549 {
550 auto reduceQueueDescriptor = PolymorphicDowncast<const ReduceQueueDescriptor*>(&descriptor);
551 return std::make_unique<RefReduceWorkload>(*reduceQueueDescriptor, info);
552 }
553 case LayerType::Reshape:
554 {
555 auto reshapeQueueDescriptor = PolymorphicDowncast<const ReshapeQueueDescriptor*>(&descriptor);
556 return std::make_unique<RefReshapeWorkload>(*reshapeQueueDescriptor, info);
557 }
558 case LayerType::Resize:
559 {
560 auto resizeQueueDescriptor = PolymorphicDowncast<const ResizeQueueDescriptor*>(&descriptor);
561 return std::make_unique<RefResizeWorkload>(*resizeQueueDescriptor, info);
562 }
563 case LayerType::Shape:
564 {
565 auto shapeQueueDescriptor = PolymorphicDowncast<const ShapeQueueDescriptor*>(&descriptor);
566 return std::make_unique<RefShapeWorkload>(*shapeQueueDescriptor, info);
567 }
568 case LayerType::Slice:
569 {
570 auto sliceQueueDescriptor = PolymorphicDowncast<const SliceQueueDescriptor*>(&descriptor);
571 return std::make_unique<RefSliceWorkload>(*sliceQueueDescriptor, info);
572 }
573 case LayerType::Softmax:
574 {
575 auto softmaxQueueDescriptor = PolymorphicDowncast<const SoftmaxQueueDescriptor*>(&descriptor);
576 return std::make_unique<RefSoftmaxWorkload>(*softmaxQueueDescriptor, info);
577 }
578 case LayerType::SpaceToBatchNd:
579 {
580 auto spaceToBatchNdQueueDescriptor
581 = PolymorphicDowncast<const SpaceToBatchNdQueueDescriptor*>(&descriptor);
582 return std::make_unique<RefSpaceToBatchNdWorkload>(*spaceToBatchNdQueueDescriptor, info);
583 }
584 case LayerType::SpaceToDepth:
585 {
586 auto spaceToDepthQueueDescriptor = PolymorphicDowncast<const SpaceToDepthQueueDescriptor*>(&descriptor);
587 return std::make_unique<RefSpaceToDepthWorkload>(*spaceToDepthQueueDescriptor, info);
588 }
589 case LayerType::Splitter:
590 {
591 auto splitterQueueDescriptor = PolymorphicDowncast<const SplitterQueueDescriptor*>(&descriptor);
592 return std::make_unique<RefSplitterWorkload>(*splitterQueueDescriptor, info);
593 }
594 case LayerType::Stack:
595 {
596 auto stackQueueDescriptor = PolymorphicDowncast<const StackQueueDescriptor*>(&descriptor);
597 return std::make_unique<RefStackWorkload>(*stackQueueDescriptor, info);
598 }
599 case LayerType::StridedSlice:
600 {
601 auto stridedSliceQueueDescriptor = PolymorphicDowncast<const StridedSliceQueueDescriptor*>(&descriptor);
602 return std::make_unique<RefStridedSliceWorkload>(*stridedSliceQueueDescriptor, info);
603 }
604 case LayerType::Subtraction:
605 {
606 auto subtractionQueueDescriptor = PolymorphicDowncast<const SubtractionQueueDescriptor*>(&descriptor);
607 if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
608 {
609 return std::make_unique<RefSubtractionWorkload<int32_t>>(*subtractionQueueDescriptor, info);
610 }
611 else
612 {
613 return std::make_unique<RefSubtractionWorkload<float>>(*subtractionQueueDescriptor, info);
614 }
615 }
616 case LayerType::Transpose:
617 {
618 auto transposeQueueDescriptor = PolymorphicDowncast<const TransposeQueueDescriptor*>(&descriptor);
619 if (IsQSymmS16(info))
620 {
621 return std::make_unique<RefTransposeQSymm16Workload>(*transposeQueueDescriptor, info);
622 }
623 else if (IsBFloat16(info))
624 {
625 return std::make_unique<RefTransposeBFloat16Workload>(*transposeQueueDescriptor, info);
626 }
627 else if (IsQAsymmS8(info))
628 {
629 return std::make_unique<RefTransposeQAsymmS8Workload>(*transposeQueueDescriptor, info);
630 }
631 return MakeWorkloadHelper<RefTransposeFloat16Workload, RefTransposeFloat32Workload,
632 RefTransposeQAsymm8Workload, NullWorkload, NullWorkload, NullWorkload>
633 (*transposeQueueDescriptor, info);
634 }
635 case LayerType::TransposeConvolution2d:
636 {
637 auto transposeConvolution2dQueueDescriptor
638 = PolymorphicDowncast<const TransposeConvolution2dQueueDescriptor*>(&descriptor);
639 return std::make_unique<RefTransposeConvolution2dWorkload>(*transposeConvolution2dQueueDescriptor, info);
640 }
641 case LayerType::UnidirectionalSequenceLstm:
642 {
643 auto unidirectionalSequenceLstmQueueDescriptor
644 = PolymorphicDowncast<const UnidirectionalSequenceLstmQueueDescriptor*>(&descriptor);
645 return std::make_unique<RefUnidirectionalSequenceLstmWorkload>(*unidirectionalSequenceLstmQueueDescriptor,
646 info);
647 }
648 default:
649 return nullptr;
650 }
651 }
652
CreateActivation(const ActivationQueueDescriptor & descriptor,const WorkloadInfo & info) const653 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
654 const WorkloadInfo& info) const
655 {
656 return std::make_unique<RefActivationWorkload>(descriptor, info);
657 }
658
CreateAddition(const AdditionQueueDescriptor & descriptor,const WorkloadInfo & info) const659 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
660 const WorkloadInfo& info) const
661 {
662 if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
663 {
664 return std::make_unique<RefAdditionWorkload<int32_t>>(descriptor, info);
665 }
666 else
667 {
668 return std::make_unique<RefAdditionWorkload<float>>(descriptor, info);
669 }
670 }
671
CreateArgMinMax(const ArgMinMaxQueueDescriptor & descriptor,const WorkloadInfo & info) const672 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor,
673 const WorkloadInfo& info) const
674 {
675 return std::make_unique<RefArgMinMaxWorkload>(descriptor, info);
676 }
677
CreateBatchNormalization(const BatchNormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const678 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateBatchNormalization(
679 const BatchNormalizationQueueDescriptor& descriptor,
680 const WorkloadInfo& info) const
681 {
682 return std::make_unique<RefBatchNormalizationWorkload>(descriptor, info);
683 }
684
CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor & descriptor,const WorkloadInfo & info) const685 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
686 const WorkloadInfo& info) const
687 {
688 return std::make_unique<RefBatchToSpaceNdWorkload>(descriptor, info);
689 }
690
CreateCast(const CastQueueDescriptor & descriptor,const WorkloadInfo & info) const691 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateCast(const CastQueueDescriptor& descriptor,
692 const WorkloadInfo& info) const
693 {
694 return std::make_unique<RefCastWorkload>(descriptor, info);
695 }
696
CreateChannelShuffle(const ChannelShuffleQueueDescriptor & descriptor,const WorkloadInfo & info) const697 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateChannelShuffle(const ChannelShuffleQueueDescriptor &descriptor,
698 const WorkloadInfo &info) const
699 {
700 return std::make_unique<RefChannelShuffleWorkload>(descriptor,info);
701 }
702
CreateComparison(const ComparisonQueueDescriptor & descriptor,const WorkloadInfo & info) const703 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateComparison(const ComparisonQueueDescriptor& descriptor,
704 const WorkloadInfo& info) const
705 {
706 return std::make_unique<RefComparisonWorkload>(descriptor, info);
707 }
708
CreateConcat(const ConcatQueueDescriptor & descriptor,const WorkloadInfo & info) const709 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateConcat(const ConcatQueueDescriptor& descriptor,
710 const WorkloadInfo& info) const
711 {
712 return std::make_unique<RefConcatWorkload>(descriptor, info);
713 }
714
CreateConstant(const ConstantQueueDescriptor & descriptor,const WorkloadInfo & info) const715 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
716 const WorkloadInfo& info) const
717 {
718 return std::make_unique<RefConstantWorkload>(descriptor, info);
719 }
720
CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor & descriptor,const WorkloadInfo & info) const721 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateConvertFp16ToFp32(
722 const ConvertFp16ToFp32QueueDescriptor& descriptor,
723 const WorkloadInfo& info) const
724 {
725 return std::make_unique<RefConvertFp16ToFp32Workload>(descriptor, info);
726 }
727
CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor & descriptor,const WorkloadInfo & info) const728 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateConvertFp32ToFp16(
729 const ConvertFp32ToFp16QueueDescriptor& descriptor,
730 const WorkloadInfo& info) const
731 {
732 return std::make_unique<RefConvertFp32ToFp16Workload>(descriptor, info);
733 }
734
CreateConvolution2d(const Convolution2dQueueDescriptor & descriptor,const WorkloadInfo & info) const735 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
736 const WorkloadInfo& info) const
737 {
738 return std::make_unique<RefConvolution2dWorkload>(descriptor, info);
739 }
740
CreateConvolution3d(const Convolution3dQueueDescriptor & descriptor,const WorkloadInfo & info) const741 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateConvolution3d(const Convolution3dQueueDescriptor& descriptor,
742 const WorkloadInfo& info) const
743 {
744 return std::make_unique<RefConvolution3dWorkload>(descriptor, info);
745 }
746
CreateDebug(const DebugQueueDescriptor & descriptor,const WorkloadInfo & info) const747 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
748 const WorkloadInfo& info) const
749 {
750 if (IsBFloat16(info))
751 {
752 return std::make_unique<RefDebugBFloat16Workload>(descriptor, info);
753 }
754 if (IsFloat16(info))
755 {
756 return std::make_unique<RefDebugFloat16Workload>(descriptor, info);
757 }
758 if (IsQSymmS16(info))
759 {
760 return std::make_unique<RefDebugQSymmS16Workload>(descriptor, info);
761 }
762 if (IsQSymmS8(info))
763 {
764 return std::make_unique<RefDebugQSymmS8Workload>(descriptor, info);
765 }
766 if (IsQAsymmU8(info))
767 {
768 return std::make_unique<RefDebugQAsymmU8Workload>(descriptor, info);
769 }
770 if (IsQAsymmS8(info))
771 {
772 return std::make_unique<RefDebugQAsymmS8Workload>(descriptor, info);
773 }
774 if (IsSigned32(info))
775 {
776 return std::make_unique<RefDebugSigned32Workload>(descriptor, info);
777 }
778
779 return MakeWorkload<RefDebugFloat32Workload, RefDebugQAsymmU8Workload>(descriptor, info);
780 }
781
CreateDepthToSpace(const DepthToSpaceQueueDescriptor & descriptor,const WorkloadInfo & info) const782 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor,
783 const WorkloadInfo& info) const
784 {
785 return std::make_unique<RefDepthToSpaceWorkload>(descriptor, info);
786 }
787
CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor & descriptor,const WorkloadInfo & info) const788 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDepthwiseConvolution2d(
789 const DepthwiseConvolution2dQueueDescriptor& descriptor,
790 const WorkloadInfo& info) const
791 {
792 return std::make_unique<RefDepthwiseConvolution2dWorkload>(descriptor, info);
793 }
794
CreateDequantize(const DequantizeQueueDescriptor & descriptor,const WorkloadInfo & info) const795 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDequantize(const DequantizeQueueDescriptor& descriptor,
796 const WorkloadInfo& info) const
797 {
798 return std::make_unique<RefDequantizeWorkload>(descriptor, info);
799 }
800
CreateDetectionPostProcess(const DetectionPostProcessQueueDescriptor & descriptor,const WorkloadInfo & info) const801 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDetectionPostProcess(
802 const DetectionPostProcessQueueDescriptor& descriptor,
803 const WorkloadInfo& info) const
804 {
805 return std::make_unique<RefDetectionPostProcessWorkload>(descriptor, info);
806 }
807
CreateDivision(const DivisionQueueDescriptor & descriptor,const WorkloadInfo & info) const808 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& descriptor,
809 const WorkloadInfo& info) const
810 {
811 if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
812 {
813 return std::make_unique<RefDivisionWorkload<int32_t>>(descriptor, info);
814 }
815 else
816 {
817 return std::make_unique<RefDivisionWorkload<float>>(descriptor, info);
818 }
819 }
820
CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor & descriptor,const WorkloadInfo & info) const821 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor& descriptor,
822 const WorkloadInfo& info) const
823 {
824 if (descriptor.m_Parameters.m_Operation == UnaryOperation::LogicalNot)
825 {
826 return std::make_unique<RefLogicalUnaryWorkload>(descriptor, info);
827 }
828 return std::make_unique<RefElementwiseUnaryWorkload>(descriptor, info);
829 }
830
CreateFakeQuantization(const FakeQuantizationQueueDescriptor & descriptor,const WorkloadInfo & info) const831 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor,
832 const WorkloadInfo& info) const
833 {
834 return MakeWorkload<RefFakeQuantizationFloat32Workload, NullWorkload>(descriptor, info);
835 }
836
CreateFill(const FillQueueDescriptor & descriptor,const WorkloadInfo & info) const837 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateFill(const FillQueueDescriptor& descriptor,
838 const WorkloadInfo& info) const
839 {
840 return std::make_unique<RefFillWorkload>(descriptor, info);
841 }
842
CreateFloor(const FloorQueueDescriptor & descriptor,const WorkloadInfo & info) const843 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
844 const WorkloadInfo& info) const
845 {
846 if(IsQuantizedType(info.m_InputTensorInfos[0].GetDataType()))
847 {
848 return nullptr;
849 }
850 else
851 {
852 return std::make_unique<RefFloorWorkload>(descriptor, info);
853 }
854 }
855
CreateFullyConnected(const FullyConnectedQueueDescriptor & descriptor,const WorkloadInfo & info) const856 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateFullyConnected(
857 const FullyConnectedQueueDescriptor& descriptor,
858 const WorkloadInfo& info) const
859 {
860 return std::make_unique<RefFullyConnectedWorkload>(descriptor, info);
861 }
862
CreateGather(const GatherQueueDescriptor & descriptor,const WorkloadInfo & info) const863 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateGather(const GatherQueueDescriptor& descriptor,
864 const WorkloadInfo& info) const
865 {
866 return std::make_unique<RefGatherWorkload>(descriptor, info);
867 }
868
CreateInput(const InputQueueDescriptor & descriptor,const WorkloadInfo & info) const869 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
870 const WorkloadInfo& info) const
871 {
872 if (info.m_InputTensorInfos.empty() )
873 {
874 throw InvalidArgumentException("RefWorkloadFactory::CreateInput: Input cannot be zero length");
875 }
876 if (info.m_OutputTensorInfos.empty())
877 {
878 throw InvalidArgumentException("RefWorkloadFactory::CreateInput: Output cannot be zero length");
879 }
880
881 if (info.m_InputTensorInfos[0].GetNumBytes() != info.m_OutputTensorInfos[0].GetNumBytes())
882 {
883 throw InvalidArgumentException("RefWorkloadFactory::CreateInput: data input and output differ in byte count.");
884 }
885
886 return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
887 }
888
CreateInstanceNormalization(const InstanceNormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const889 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateInstanceNormalization(
890 const InstanceNormalizationQueueDescriptor& descriptor,
891 const WorkloadInfo& info) const
892 {
893 return std::make_unique<RefInstanceNormalizationWorkload>(descriptor, info);
894 }
895
CreateL2Normalization(const L2NormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const896 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
897 const WorkloadInfo& info) const
898 {
899 return std::make_unique<RefL2NormalizationWorkload>(descriptor, info);
900 }
901
CreateLogicalBinary(const LogicalBinaryQueueDescriptor & descriptor,const WorkloadInfo & info) const902 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateLogicalBinary(const LogicalBinaryQueueDescriptor& descriptor,
903 const WorkloadInfo& info) const
904 {
905 return std::make_unique<RefLogicalBinaryWorkload>(descriptor, info);
906 }
907
CreateLogSoftmax(const LogSoftmaxQueueDescriptor & descriptor,const WorkloadInfo & info) const908 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateLogSoftmax(const LogSoftmaxQueueDescriptor& descriptor,
909 const WorkloadInfo& info) const
910 {
911 return std::make_unique<RefLogSoftmaxWorkload>(descriptor, info);
912 }
913
CreateLstm(const LstmQueueDescriptor & descriptor,const WorkloadInfo & info) const914 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
915 const WorkloadInfo& info) const
916 {
917 return std::make_unique<RefLstmWorkload>(descriptor, info);
918 }
919
CreateMaximum(const MaximumQueueDescriptor & descriptor,const WorkloadInfo & info) const920 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateMaximum(const MaximumQueueDescriptor& descriptor,
921 const WorkloadInfo& info) const
922 {
923 if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
924 {
925 return std::make_unique<RefMaximumWorkload<int32_t>>(descriptor, info);
926 }
927 else
928 {
929 return std::make_unique<RefMaximumWorkload<float>>(descriptor, info);
930 }
931 }
932
CreateMean(const MeanQueueDescriptor & descriptor,const WorkloadInfo & info) const933 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
934 const WorkloadInfo& info) const
935 {
936 return std::make_unique<RefMeanWorkload>(descriptor, info);
937 }
938
CreateMemCopy(const MemCopyQueueDescriptor & descriptor,const WorkloadInfo & info) const939 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
940 const WorkloadInfo& info) const
941 {
942 if (descriptor.m_Inputs.empty())
943 {
944 throw InvalidArgumentException("RefWorkloadFactory: CreateMemCopy() expected an input tensor.");
945 }
946 return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
947 }
948
CreateMemImport(const MemImportQueueDescriptor & descriptor,const WorkloadInfo & info) const949 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor,
950 const WorkloadInfo& info) const
951 {
952 if (descriptor.m_Inputs.empty())
953 {
954 throw InvalidArgumentException("RefWorkloadFactory: CreateMemImport() expected an input tensor.");
955 }
956 return std::make_unique<ImportMemGenericWorkload>(descriptor, info);
957 }
958
CreateMinimum(const MinimumQueueDescriptor & descriptor,const WorkloadInfo & info) const959 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateMinimum(const MinimumQueueDescriptor& descriptor,
960 const WorkloadInfo& info) const
961 {
962 if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
963 {
964 return std::make_unique<RefMinimumWorkload<int32_t>>(descriptor, info);
965 }
966 else
967 {
968 return std::make_unique<RefMinimumWorkload<float>>(descriptor, info);
969 }
970 }
971
CreateMultiplication(const MultiplicationQueueDescriptor & descriptor,const WorkloadInfo & info) const972 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& descriptor,
973 const WorkloadInfo& info) const
974 {
975 if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
976 {
977 return std::make_unique<RefMultiplicationWorkload<int32_t>>(descriptor, info);
978 }
979 else
980 {
981 return std::make_unique<RefMultiplicationWorkload<float>>(descriptor, info);
982 }
983 }
984
CreateNormalization(const NormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const985 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor,
986 const WorkloadInfo& info) const
987 {
988 return std::make_unique<RefNormalizationWorkload>(descriptor, info);
989 }
990
CreateOutput(const OutputQueueDescriptor & descriptor,const WorkloadInfo & info) const991 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
992 const WorkloadInfo& info) const
993 {
994 if (info.m_InputTensorInfos.empty() )
995 {
996 throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: Input cannot be zero length");
997 }
998 if (info.m_OutputTensorInfos.empty())
999 {
1000 throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: Output cannot be zero length");
1001 }
1002 if (info.m_InputTensorInfos[0].GetNumBytes() != info.m_OutputTensorInfos[0].GetNumBytes())
1003 {
1004 throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: data input and output differ in byte count.");
1005 }
1006
1007 return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
1008 }
1009
CreatePad(const PadQueueDescriptor & descriptor,const WorkloadInfo & info) const1010 std::unique_ptr<IWorkload> RefWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
1011 const WorkloadInfo& info) const
1012 {
1013 return std::make_unique<RefPadWorkload>(descriptor, info);
1014 }
1015
CreatePermute(const PermuteQueueDescriptor & descriptor,const WorkloadInfo & info) const1016 std::unique_ptr<IWorkload> RefWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
1017 const WorkloadInfo& info) const
1018 {
1019 if (IsQSymmS16(info))
1020 {
1021 return std::make_unique<RefPermuteQSymm16Workload>(descriptor, info);
1022 }
1023 else if (IsBFloat16(info))
1024 {
1025 return std::make_unique<RefPermuteBFloat16Workload>(descriptor, info);
1026 }
1027 else if (IsQAsymmS8(info))
1028 {
1029 return std::make_unique<RefPermuteQAsymmS8Workload>(descriptor, info);
1030 }
1031 return MakeWorkloadHelper<RefPermuteFloat16Workload, RefPermuteFloat32Workload, RefPermuteQAsymm8Workload,
1032 NullWorkload, NullWorkload, NullWorkload>(descriptor, info);
1033 }
1034
CreatePooling2d(const Pooling2dQueueDescriptor & descriptor,const WorkloadInfo & info) const1035 std::unique_ptr<IWorkload> RefWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
1036 const WorkloadInfo& info) const
1037 {
1038 return std::make_unique<RefPooling2dWorkload>(descriptor, info);
1039 }
1040
CreatePooling3d(const Pooling3dQueueDescriptor & descriptor,const WorkloadInfo & info) const1041 std::unique_ptr<IWorkload> RefWorkloadFactory::CreatePooling3d(const Pooling3dQueueDescriptor& descriptor,
1042 const WorkloadInfo& info) const
1043 {
1044 return std::make_unique<RefPooling3dWorkload>(descriptor, info);
1045 }
1046
CreatePreCompiled(const PreCompiledQueueDescriptor &,const WorkloadInfo &) const1047 std::unique_ptr<IWorkload> RefWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& /*descriptor*/,
1048 const WorkloadInfo& /*info*/) const
1049 {
1050 return nullptr;
1051 }
1052
CreatePrelu(const PreluQueueDescriptor & descriptor,const WorkloadInfo & info) const1053 std::unique_ptr<IWorkload> RefWorkloadFactory::CreatePrelu(const PreluQueueDescriptor& descriptor,
1054 const WorkloadInfo& info) const
1055 {
1056 return std::make_unique<RefPreluWorkload>(descriptor, info);
1057 }
1058
CreateQLstm(const QLstmQueueDescriptor & descriptor,const WorkloadInfo & info) const1059 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateQLstm(const QLstmQueueDescriptor& descriptor,
1060 const WorkloadInfo& info) const
1061 {
1062 return std::make_unique<RefQLstmWorkload>(descriptor, info);
1063 }
1064
CreateQuantize(const QuantizeQueueDescriptor & descriptor,const WorkloadInfo & info) const1065 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateQuantize(const QuantizeQueueDescriptor& descriptor,
1066 const WorkloadInfo& info) const
1067 {
1068 return std::make_unique<RefQuantizeWorkload>(descriptor, info);
1069 }
1070
CreateRank(const RankQueueDescriptor & descriptor,const WorkloadInfo & info) const1071 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateRank(const RankQueueDescriptor& descriptor,
1072 const WorkloadInfo& info) const
1073 {
1074 return std::make_unique<RefRankWorkload>(descriptor, info);
1075 }
1076
CreateReduce(const ReduceQueueDescriptor & descriptor,const WorkloadInfo & info) const1077 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor,
1078 const WorkloadInfo& info) const
1079 {
1080 return std::make_unique<RefReduceWorkload>(descriptor, info);
1081 }
1082
CreateReshape(const ReshapeQueueDescriptor & descriptor,const WorkloadInfo & info) const1083 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
1084 const WorkloadInfo& info) const
1085 {
1086 return std::make_unique<RefReshapeWorkload>(descriptor, info);
1087 }
1088
CreateResize(const ResizeQueueDescriptor & descriptor,const WorkloadInfo & info) const1089 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor,
1090 const WorkloadInfo& info) const
1091 {
1092 return std::make_unique<RefResizeWorkload>(descriptor, info);
1093 }
1094
CreateShape(const ShapeQueueDescriptor & descriptor,const WorkloadInfo & info) const1095 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateShape(const ShapeQueueDescriptor& descriptor,
1096 const WorkloadInfo& info) const
1097 {
1098 return std::make_unique<RefShapeWorkload>(descriptor, info);
1099 }
1100
CreateSlice(const SliceQueueDescriptor & descriptor,const WorkloadInfo & info) const1101 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateSlice(const SliceQueueDescriptor& descriptor,
1102 const WorkloadInfo& info) const
1103 {
1104 return std::make_unique<RefSliceWorkload>(descriptor, info);
1105 }
1106
CreateSoftmax(const SoftmaxQueueDescriptor & descriptor,const WorkloadInfo & info) const1107 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
1108 const WorkloadInfo& info) const
1109 {
1110 return std::make_unique<RefSoftmaxWorkload>(descriptor, info);
1111 }
1112
CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor & descriptor,const WorkloadInfo & info) const1113 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
1114 const WorkloadInfo& info) const
1115 {
1116 return std::make_unique<RefSpaceToBatchNdWorkload>(descriptor, info);
1117 }
1118
CreateSpaceToDepth(const SpaceToDepthQueueDescriptor & descriptor,const WorkloadInfo & info) const1119 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor,
1120 const WorkloadInfo& info) const
1121 {
1122 return std::make_unique<RefSpaceToDepthWorkload>(descriptor, info);
1123 }
1124
CreateSplitter(const SplitterQueueDescriptor & descriptor,const WorkloadInfo & info) const1125 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
1126 const WorkloadInfo& info) const
1127 {
1128 return std::make_unique<RefSplitterWorkload>(descriptor, info);
1129 }
1130
CreateStack(const StackQueueDescriptor & descriptor,const WorkloadInfo & info) const1131 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateStack(const StackQueueDescriptor& descriptor,
1132 const WorkloadInfo& info) const
1133 {
1134 return std::make_unique<RefStackWorkload>(descriptor, info);
1135 }
1136
CreateStridedSlice(const StridedSliceQueueDescriptor & descriptor,const WorkloadInfo & info) const1137 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
1138 const WorkloadInfo& info) const
1139 {
1140 return std::make_unique<RefStridedSliceWorkload>(descriptor, info);
1141 }
1142
CreateSubtraction(const SubtractionQueueDescriptor & descriptor,const WorkloadInfo & info) const1143 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor,
1144 const WorkloadInfo& info) const
1145 {
1146 if (info.m_InputTensorInfos[0].GetDataType() == armnn::DataType::Signed32)
1147 {
1148 return std::make_unique<RefSubtractionWorkload<int32_t>>(descriptor, info);
1149 }
1150 else
1151 {
1152 return std::make_unique<RefSubtractionWorkload<float>>(descriptor, info);
1153 }
1154 }
1155
CreateTranspose(const TransposeQueueDescriptor & descriptor,const WorkloadInfo & info) const1156 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateTranspose(const TransposeQueueDescriptor& descriptor,
1157 const WorkloadInfo& info) const
1158 {
1159 if (IsQSymmS16(info))
1160 {
1161 return std::make_unique<RefTransposeQSymm16Workload>(descriptor, info);
1162 }
1163 else if (IsBFloat16(info))
1164 {
1165 return std::make_unique<RefTransposeBFloat16Workload>(descriptor, info);
1166 }
1167 else if (IsQAsymmS8(info))
1168 {
1169 return std::make_unique<RefTransposeQAsymmS8Workload>(descriptor, info);
1170 }
1171 return MakeWorkloadHelper<RefTransposeFloat16Workload, RefTransposeFloat32Workload, RefTransposeQAsymm8Workload,
1172 NullWorkload, NullWorkload, NullWorkload>(descriptor, info);
1173 }
1174
CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor & descriptor,const WorkloadInfo & info) const1175 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateTransposeConvolution2d(
1176 const TransposeConvolution2dQueueDescriptor& descriptor,
1177 const WorkloadInfo& info) const
1178 {
1179 return std::make_unique<RefTransposeConvolution2dWorkload>(descriptor, info);
1180 }
1181
CreateUnidirectionalSequenceLstm(const UnidirectionalSequenceLstmQueueDescriptor & descriptor,const WorkloadInfo & info) const1182 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateUnidirectionalSequenceLstm(
1183 const UnidirectionalSequenceLstmQueueDescriptor& descriptor,
1184 const WorkloadInfo& info) const
1185 {
1186 return std::make_unique<RefUnidirectionalSequenceLstmWorkload>(descriptor, info);;
1187 }
1188
1189 } // namespace armnn
1190