1 //
2 // Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include "ClWorkloadFactory.hpp"
6 #include "ClBackendId.hpp"
7 #include "ClBackendModelContext.hpp"
8 #include "ClContextDeserializer.hpp"
9 #include "ClContextSerializer.hpp"
10
11 #include <Layer.hpp>
12
13 #include <armnn/Exceptions.hpp>
14 #include <armnn/Logging.hpp>
15 #include <armnn/Utils.hpp>
16 #include <armnn/utility/IgnoreUnused.hpp>
17 #include <armnn/utility/NumericCast.hpp>
18 #include <armnn/utility/PolymorphicDowncast.hpp>
19
20 #include <backendsCommon/MakeWorkloadHelper.hpp>
21 #include <armnn/backends/MemCopyWorkload.hpp>
22 #include <backendsCommon/MemImportWorkload.hpp>
23 #include <armnn/backends/TensorHandle.hpp>
24
25 #include <cl/ClTensorHandle.hpp>
26 #include <cl/workloads/ClWorkloads.hpp>
27 #include <cl/workloads/ClWorkloadUtils.hpp>
28
29 #include <arm_compute/core/CL/CLKernelLibrary.h>
30 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
31 #include <arm_compute/runtime/CL/CLScheduler.h>
32
33 #include <armnnUtils/Filesystem.hpp>
34 #include <fstream>
35
36 #include <sys/stat.h>
37
38 namespace armnn
39 {
40
41 namespace
42 {
43 static const BackendId s_Id{ClBackendId()};
44 }
45
IsLayerSupported(const Layer & layer,Optional<DataType> dataType,std::string & outReasonIfUnsupported)46 bool ClWorkloadFactory::IsLayerSupported(const Layer& layer,
47 Optional<DataType> dataType,
48 std::string& outReasonIfUnsupported)
49 {
50 return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
51 }
52
IsLayerSupported(const IConnectableLayer & layer,Optional<DataType> dataType,std::string & outReasonIfUnsupported,const ModelOptions & modelOptions)53 bool ClWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer,
54 Optional<DataType> dataType,
55 std::string& outReasonIfUnsupported,
56 const ModelOptions& modelOptions)
57 {
58 return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
59 }
60
GetBackendId() const61 const BackendId& ClWorkloadFactory::GetBackendId() const
62 {
63 return s_Id;
64 }
65
AfterWorkloadsCreated()66 void ClWorkloadFactory::AfterWorkloadsCreated()
67 {
68 if(m_ModelContextPtr)
69 {
70 auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
71 if (modelOptions->SaveCachedNetwork())
72 {
73 ClContextSerializer serializer;
74 serializer.Serialize(m_CLCompileContext);
75 auto cachedFd = modelOptions->GetCachedFileDescriptor();
76 if (cachedFd != -1)
77 {
78 std::vector<uint8_t> compiledContextData;
79 std::stringstream stream;
80 bool serialized = serializer.SaveSerializedToStream(stream);
81 if (serialized)
82 {
83 std::string const serializedString{stream.str()};
84 std::copy(serializedString.begin(),
85 serializedString.end(),
86 std::back_inserter(compiledContextData));
87 auto success = write(cachedFd, compiledContextData.data(), compiledContextData.size());
88 if (success == -1)
89 {
90 ARMNN_LOG(info) << "ClWorkloadFactory:: Could not cache the compiled context!";
91 }
92 }
93 }
94
95 // Save map to a filepath provided in ModelOptions
96 auto filePath = modelOptions->GetCachedNetworkFilePath();
97 if (filePath != "" && fs::exists(filePath) && fs::is_regular_file(filePath))
98 {
99 // Serialize ClContext to the file specified
100 std::ofstream file(filePath, std::ios::out | std::ios::binary);
101 serializer.SaveSerializedToStream(file);
102 }
103 }
104 }
105 }
106
107 template <typename FloatWorkload, typename Uint8Workload, typename QueueDescriptorType, typename... Args>
MakeWorkload(const QueueDescriptorType & descriptor,const WorkloadInfo & info,Args &&...args)108 std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor,
109 const WorkloadInfo& info,
110 Args&&... args)
111 {
112 try
113 {
114 return MakeWorkloadHelper<FloatWorkload, Uint8Workload>(descriptor, info, std::forward<Args>(args)...);
115 }
116 catch (const cl::Error& clError)
117 {
118 throw WrapClError(clError, CHECK_LOCATION());
119 }
120 }
121
122 template <typename Workload, typename QueueDescriptorType, typename... Args>
MakeWorkload(const QueueDescriptorType & descriptor,const WorkloadInfo & info,Args &&...args)123 std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor,
124 const WorkloadInfo& info,
125 Args&&... args)
126 {
127 try
128 {
129 return std::make_unique<Workload>(descriptor, info, std::forward<Args>(args)...);
130 }
131 catch (const cl::Error& clError)
132 {
133 throw WrapClError(clError, CHECK_LOCATION());
134 }
135 }
136
InitializeCLCompileContext()137 void ClWorkloadFactory::InitializeCLCompileContext()
138 {
139 // Initialize our m_CLCompileContext using default device and context
140 auto context = arm_compute::CLKernelLibrary::get().context();
141 auto device = arm_compute::CLKernelLibrary::get().get_device();
142 m_CLCompileContext = arm_compute::CLCompileContext(context, device);
143
144 if (m_ModelContextPtr)
145 {
146 // Load saved programs if the user has set a filepath
147 auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
148 auto filePath = modelOptions->GetCachedNetworkFilePath();
149 if (!(modelOptions->SaveCachedNetwork()))
150 {
151 ClContextDeserializer deserializer;
152 auto cachedFd = modelOptions->GetCachedFileDescriptor();
153 if (cachedFd != -1)
154 {
155 struct stat statBuffer;
156 if (fstat(cachedFd, &statBuffer) == 0)
157 {
158 long dataSize = static_cast<long>(statBuffer.st_size);
159 if( dataSize > 0)
160 {
161 auto offset = lseek(cachedFd, 0, SEEK_CUR);
162 if (offset == 0)
163 {
164 std::vector <uint8_t> compiledContextData(static_cast<unsigned int>(dataSize));
165 auto success = pread(cachedFd, compiledContextData.data(), compiledContextData.size(), 0);
166 if (success != -1)
167 {
168 deserializer.DeserializeFromBinary(m_CLCompileContext,
169 context,
170 device,
171 compiledContextData);
172 }
173 }
174 }
175
176 }
177 }
178
179 if (filePath != "" && fs::exists(filePath) && fs::is_regular_file(filePath))
180 {
181 // Deserialize binary file and load into m_CLCompileContext
182 deserializer.Deserialize(m_CLCompileContext, context, device, filePath);
183 }
184 }
185 }
186 }
187
ClWorkloadFactory(const std::shared_ptr<ClMemoryManager> & memoryManager)188 ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager)
189 : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{})
190 {
191 InitializeCLCompileContext();
192 }
193
ClWorkloadFactory(const std::shared_ptr<ClMemoryManager> & memoryManager,const IBackendInternal::IBackendSpecificModelContextPtr & modelContextPtr)194 ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager,
195 const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr)
196 : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
197 {
198 InitializeCLCompileContext();
199 }
200
CreateTensorHandle(const TensorInfo & tensorInfo,const bool IsMemoryManaged) const201 std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
202 const bool IsMemoryManaged) const
203 {
204 IgnoreUnused(IsMemoryManaged);
205 std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo);
206 tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
207
208 return tensorHandle;
209 }
210
CreateTensorHandle(const TensorInfo & tensorInfo,DataLayout dataLayout,const bool IsMemoryManaged) const211 std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
212 DataLayout dataLayout,
213 const bool IsMemoryManaged) const
214 {
215 IgnoreUnused(IsMemoryManaged);
216 std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo, dataLayout);
217 tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
218
219 return tensorHandle;
220 }
221
CreateSubTensorHandle(ITensorHandle & parent,TensorShape const & subTensorShape,unsigned int const * subTensorOrigin) const222 std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
223 TensorShape const& subTensorShape,
224 unsigned int const* subTensorOrigin) const
225 {
226 arm_compute::Coordinates coords;
227 arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
228
229 coords.set_num_dimensions(subTensorShape.GetNumDimensions());
230 for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++)
231 {
232 // Arm compute indexes tensor coords in reverse order.
233 unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1;
234 coords.set(i, armnn::numeric_cast<int>(subTensorOrigin[revertedIndex]));
235 }
236
237 const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape(parent.GetShape());
238 if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape))
239 {
240 return nullptr;
241 }
242
243 return std::make_unique<ClSubTensorHandle>(
244 PolymorphicDowncast<IClTensorHandle*>(&parent), shape, coords);
245 }
246
CreateWorkload(LayerType type,const QueueDescriptor & descriptor,const WorkloadInfo & info) const247 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateWorkload(LayerType type,
248 const QueueDescriptor& descriptor,
249 const WorkloadInfo& info) const
250 {
251 switch(type)
252 {
253 case LayerType::Activation :
254 {
255 auto activationQueueDescriptor = PolymorphicDowncast<const ActivationQueueDescriptor*>(&descriptor);
256 return MakeWorkload<ClActivationWorkload>(*activationQueueDescriptor, info, m_CLCompileContext);
257 }
258 case LayerType::Addition :
259 {
260 auto additionQueueDescriptor = PolymorphicDowncast<const AdditionQueueDescriptor*>(&descriptor);
261 return MakeWorkload<ClAdditionWorkload>(*additionQueueDescriptor, info, m_CLCompileContext);
262 }
263 case LayerType::ArgMinMax :
264 {
265 auto argMinMaxQueueDescriptor = PolymorphicDowncast<const ArgMinMaxQueueDescriptor*>(&descriptor);
266 return MakeWorkload<ClArgMinMaxWorkload>(*argMinMaxQueueDescriptor, info, m_CLCompileContext);
267 }
268 case LayerType::BatchMatMul :
269 {
270 auto batchMatMulQueueDescriptor = PolymorphicDowncast<const BatchMatMulQueueDescriptor*>(&descriptor);
271 return std::make_unique<ClBatchMatMulWorkload>(*batchMatMulQueueDescriptor, info, m_CLCompileContext);
272 }
273 case LayerType::BatchNormalization :
274 {
275 auto batchNormalizationQueueDescriptor
276 = PolymorphicDowncast<const BatchNormalizationQueueDescriptor*>(&descriptor);
277 return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>
278 (*batchNormalizationQueueDescriptor, info, m_CLCompileContext);
279 }
280 case LayerType::BatchToSpaceNd :
281 {
282 auto batchToSpaceNdQueueDescriptor
283 = PolymorphicDowncast<const BatchToSpaceNdQueueDescriptor*>(&descriptor);
284 return MakeWorkload<ClBatchToSpaceNdWorkload>(*batchToSpaceNdQueueDescriptor, info, m_CLCompileContext);
285 }
286 case LayerType::Cast :
287 {
288 auto castQueueDescriptor = PolymorphicDowncast<const CastQueueDescriptor*>(&descriptor);
289 return MakeWorkload<ClCastWorkload>(*castQueueDescriptor, info, m_CLCompileContext);
290 }
291 case LayerType::ChannelShuffle :
292 {
293 auto channelShuffleQueueDescriptor
294 = PolymorphicDowncast<const ChannelShuffleQueueDescriptor*>(&descriptor);
295 return MakeWorkload<ClChannelShuffleWorkload>(*channelShuffleQueueDescriptor, info, m_CLCompileContext);
296 }
297 case LayerType::Comparison :
298 {
299 auto comparisonQueueDescriptor = PolymorphicDowncast<const ComparisonQueueDescriptor*>(&descriptor);
300 return MakeWorkload<ClComparisonWorkload>(*comparisonQueueDescriptor, info, m_CLCompileContext);
301 }
302 case LayerType::Concat :
303 {
304 auto concatQueueDescriptor = PolymorphicDowncast<const ConcatQueueDescriptor*>(&descriptor);
305 return MakeWorkload<ClConcatWorkload>(*concatQueueDescriptor, info, m_CLCompileContext);
306 }
307 case LayerType::Constant :
308 {
309 auto constantQueueDescriptor = PolymorphicDowncast<const ConstantQueueDescriptor*>(&descriptor);
310 return MakeWorkload<ClConstantWorkload>(*constantQueueDescriptor, info, m_CLCompileContext);
311 }
312 case LayerType::ConvertFp16ToFp32 :
313 {
314 auto convertFp16ToFp32QueueDescriptor
315 = PolymorphicDowncast<const ConvertFp16ToFp32QueueDescriptor*>(&descriptor);
316 return MakeWorkload<ClConvertFp16ToFp32Workload>(*convertFp16ToFp32QueueDescriptor,
317 info,
318 m_CLCompileContext);
319 }
320 case LayerType::ConvertFp32ToFp16 :
321 {
322 auto convertFp32ToFp16QueueDescriptor
323 = PolymorphicDowncast<const ConvertFp32ToFp16QueueDescriptor*>(&descriptor);
324 return MakeWorkload<ClConvertFp32ToFp16Workload>(*convertFp32ToFp16QueueDescriptor,
325 info,
326 m_CLCompileContext);
327 }
328 case LayerType::Convolution2d :
329 {
330 auto convolution2dQueueDescriptor = PolymorphicDowncast<const Convolution2dQueueDescriptor*>(&descriptor);
331
332 bool isFastMathEnabled = false;
333 if (m_ModelContextPtr)
334 {
335 if (m_ModelContextPtr.get() != nullptr)
336 {
337 auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
338 if (modelOptions)
339 {
340 isFastMathEnabled = modelOptions->IsFastMathEnabled();
341 }
342 }
343 }
344 return MakeWorkload<ClConvolution2dWorkload>(*convolution2dQueueDescriptor,
345 info,
346 m_MemoryManager->GetIntraLayerManager(),
347 m_CLCompileContext,
348 isFastMathEnabled);
349 }
350 case LayerType::Convolution3d :
351 {
352 auto convolution3dQueueDescriptor = PolymorphicDowncast<const Convolution3dQueueDescriptor*>(&descriptor);
353
354 bool isFastMathEnabled = false;
355 if (m_ModelContextPtr)
356 {
357 if (m_ModelContextPtr.get() != nullptr)
358 {
359 auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
360 if (modelOptions)
361 {
362 isFastMathEnabled = modelOptions->IsFastMathEnabled();
363 }
364 }
365 }
366 return MakeWorkload<ClConvolution3dWorkload>(*convolution3dQueueDescriptor,
367 info,
368 m_MemoryManager->GetIntraLayerManager(),
369 m_CLCompileContext,
370 isFastMathEnabled);
371 }
372 case LayerType::Debug :
373 {
374 auto debugQueueDescriptor = PolymorphicDowncast<const DebugQueueDescriptor*>(&descriptor);
375 return MakeWorkload<NullWorkload, NullWorkload>(*debugQueueDescriptor, info, m_CLCompileContext);
376 }
377 case LayerType::DepthToSpace :
378 {
379 auto depthToSpaceQueueDescriptor = PolymorphicDowncast<const DepthToSpaceQueueDescriptor*>(&descriptor);
380 return MakeWorkload<ClDepthToSpaceWorkload>(*depthToSpaceQueueDescriptor, info, m_CLCompileContext);
381 }
382 case LayerType::DepthwiseConvolution2d :
383 {
384 auto depthwiseConvolution2dQueueDescriptor
385 = PolymorphicDowncast<const DepthwiseConvolution2dQueueDescriptor*>(&descriptor);
386 return MakeWorkload<ClDepthwiseConvolutionWorkload>(*depthwiseConvolution2dQueueDescriptor,
387 info,
388 m_CLCompileContext);
389 }
390 case LayerType::Dequantize :
391 {
392 auto dequantizeQueueDescriptor = PolymorphicDowncast<const DequantizeQueueDescriptor*>(&descriptor);
393 return MakeWorkload<ClDequantizeWorkload>(*dequantizeQueueDescriptor, info, m_CLCompileContext);
394 }
395 case LayerType::DetectionPostProcess :
396 {
397 auto detectionPostProcessQueueDescriptor
398 = PolymorphicDowncast<const DetectionPostProcessQueueDescriptor*>(&descriptor);
399 return MakeWorkload<NullWorkload, NullWorkload>(*detectionPostProcessQueueDescriptor,
400 info,
401 m_CLCompileContext);
402 }
403 case LayerType::Division :
404 {
405 auto divisionQueueDescriptor = PolymorphicDowncast<const DivisionQueueDescriptor*>(&descriptor);
406 return std::make_unique<ClDivisionWorkload>(*divisionQueueDescriptor, info, m_CLCompileContext);
407 }
408 case LayerType::ElementwiseBinary :
409 {
410 auto elementwiseBinaryQueueDescriptor
411 = PolymorphicDowncast<const ElementwiseBinaryQueueDescriptor*>(&descriptor);
412
413 switch (elementwiseBinaryQueueDescriptor->m_Parameters.m_Operation)
414 {
415 case BinaryOperation::Add:
416 {
417 AdditionQueueDescriptor additionQueueDescriptor;
418 additionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
419 additionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
420 additionQueueDescriptor.m_AdditionalInfoObject =
421 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
422 return std::make_unique<ClAdditionWorkload>(additionQueueDescriptor, info, m_CLCompileContext);
423 }
424 case BinaryOperation::Div:
425 {
426 DivisionQueueDescriptor divisionQueueDescriptor;
427 divisionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
428 divisionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
429 divisionQueueDescriptor.m_AdditionalInfoObject =
430 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
431 return std::make_unique<ClDivisionWorkload>(divisionQueueDescriptor, info, m_CLCompileContext);
432 }
433 case BinaryOperation::Maximum:
434 {
435 MaximumQueueDescriptor maximumQueueDescriptor;
436 maximumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
437 maximumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
438 maximumQueueDescriptor.m_AdditionalInfoObject =
439 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
440 return std::make_unique<ClMaximumWorkload>(maximumQueueDescriptor, info, m_CLCompileContext);
441 }
442 case BinaryOperation::Minimum:
443 {
444 MinimumQueueDescriptor minimumQueueDescriptor;
445 minimumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
446 minimumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
447 minimumQueueDescriptor.m_AdditionalInfoObject =
448 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
449 return std::make_unique<ClMinimumWorkload>(minimumQueueDescriptor, info, m_CLCompileContext);
450 }
451 case BinaryOperation::Mul:
452 {
453 MultiplicationQueueDescriptor multiplicationQueueDescriptor;
454 multiplicationQueueDescriptor.m_Inputs = descriptor.m_Inputs;
455 multiplicationQueueDescriptor.m_Outputs = descriptor.m_Outputs;
456 multiplicationQueueDescriptor.m_AdditionalInfoObject =
457 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
458 return std::make_unique<ClMultiplicationWorkload>(multiplicationQueueDescriptor,
459 info,
460 m_CLCompileContext);
461 }
462 case BinaryOperation::Sub:
463 {
464 SubtractionQueueDescriptor subtractionQueueDescriptor;
465 subtractionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
466 subtractionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
467 subtractionQueueDescriptor.m_AdditionalInfoObject =
468 elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
469 return std::make_unique<ClSubtractionWorkload>(subtractionQueueDescriptor,
470 info,
471 m_CLCompileContext);
472 }
473 default:
474 return nullptr;
475 }
476 }
477 case LayerType::ElementwiseUnary :
478 {
479 auto elementwiseUnaryQueueDescriptor
480 = PolymorphicDowncast<const ElementwiseUnaryQueueDescriptor*>(&descriptor);
481
482 switch(elementwiseUnaryQueueDescriptor->m_Parameters.m_Operation)
483 {
484 case UnaryOperation::Abs:
485 {
486 AbsQueueDescriptor absQueueDescriptor;
487 absQueueDescriptor.m_Inputs = elementwiseUnaryQueueDescriptor->m_Inputs;
488 absQueueDescriptor.m_Outputs = elementwiseUnaryQueueDescriptor->m_Outputs;
489
490 return std::make_unique<ClAbsWorkload>(absQueueDescriptor, info, m_CLCompileContext);
491 }
492 case UnaryOperation::Exp:
493 return std::make_unique<ClExpWorkload>(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext);
494 case UnaryOperation::Log:
495 return std::make_unique<ClLogWorkload>(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext);
496 case UnaryOperation::LogicalNot:
497 return std::make_unique<ClLogicalNotWorkload>(*elementwiseUnaryQueueDescriptor,
498 info,
499 m_CLCompileContext);
500 case UnaryOperation::Neg:
501 return std::make_unique<ClNegWorkload>(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext);
502 case UnaryOperation::Rsqrt:
503 {
504 RsqrtQueueDescriptor rsqrtQueueDescriptor;
505 rsqrtQueueDescriptor.m_Inputs = elementwiseUnaryQueueDescriptor->m_Inputs;
506 rsqrtQueueDescriptor.m_Outputs = elementwiseUnaryQueueDescriptor->m_Outputs;
507
508 return std::make_unique<ClRsqrtWorkload>(rsqrtQueueDescriptor, info, m_CLCompileContext);
509 }
510 case UnaryOperation::Sin:
511 return std::make_unique<ClSinWorkload>(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext);
512 case UnaryOperation::Sqrt:
513 return std::make_unique<ClSqrtWorkload>(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext);
514 default:
515 return nullptr;
516 }
517 }
518 case LayerType::Fill :
519 {
520 auto fillQueueDescriptor = PolymorphicDowncast<const FillQueueDescriptor*>(&descriptor);
521 return std::make_unique<ClFillWorkload>(*fillQueueDescriptor, info, m_CLCompileContext);
522 }
523 case LayerType::Floor :
524 {
525 auto floorQueueDescriptor = PolymorphicDowncast<const FloorQueueDescriptor*>(&descriptor);
526 return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(*floorQueueDescriptor, info, m_CLCompileContext);
527 }
528 case LayerType::FullyConnected :
529 {
530 auto fullyConnectedQueueDescriptor
531 = PolymorphicDowncast<const FullyConnectedQueueDescriptor*>(&descriptor);
532 return MakeWorkload<ClFullyConnectedWorkload>(*fullyConnectedQueueDescriptor,
533 info,
534 m_MemoryManager->GetIntraLayerManager(),
535 m_CLCompileContext);
536 }
537 case LayerType::Gather :
538 {
539 auto gatherQueueDescriptor = PolymorphicDowncast<const GatherQueueDescriptor*>(&descriptor);
540 return MakeWorkload<ClGatherWorkload>(*gatherQueueDescriptor, info, m_CLCompileContext);
541 }
542 case LayerType::GatherNd :
543 {
544 auto gatherNdQueueDescriptor = PolymorphicDowncast<const GatherNdQueueDescriptor*>(&descriptor);
545 return MakeWorkload<ClGatherNdWorkload>(*gatherNdQueueDescriptor, info, m_CLCompileContext);
546 }
547 case LayerType::Input :
548 {
549 auto inputQueueDescriptor = PolymorphicDowncast<const InputQueueDescriptor*>(&descriptor);
550 return std::make_unique<CopyMemGenericWorkload>(*inputQueueDescriptor, info);
551 }
552 case LayerType::InstanceNormalization :
553 {
554 auto instanceNormalizationQueueDescriptor
555 = PolymorphicDowncast<const InstanceNormalizationQueueDescriptor*>(&descriptor);
556 return MakeWorkload<ClInstanceNormalizationWorkload>(*instanceNormalizationQueueDescriptor,
557 info,
558 m_CLCompileContext);
559 }
560 case LayerType::L2Normalization :
561 {
562 auto l2NormalizationQueueDescriptor
563 = PolymorphicDowncast<const L2NormalizationQueueDescriptor*>(&descriptor);
564 return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(*l2NormalizationQueueDescriptor,
565 info,
566 m_CLCompileContext);
567 }
568 case LayerType::LogicalBinary :
569 {
570 auto logicalBinaryQueueDescriptor = PolymorphicDowncast<const LogicalBinaryQueueDescriptor*>(&descriptor);
571
572 switch(logicalBinaryQueueDescriptor->m_Parameters.m_Operation)
573 {
574 case LogicalBinaryOperation::LogicalAnd:
575 return std::make_unique<ClLogicalAndWorkload>(*logicalBinaryQueueDescriptor,
576 info,
577 m_CLCompileContext);
578 case LogicalBinaryOperation::LogicalOr:
579 return std::make_unique<ClLogicalOrWorkload>(*logicalBinaryQueueDescriptor,
580 info,
581 m_CLCompileContext);
582 default:
583 return nullptr;
584 }
585 }
586 case LayerType::LogSoftmax :
587 {
588 auto logSoftmaxQueueDescriptor = PolymorphicDowncast<const LogSoftmaxQueueDescriptor*>(&descriptor);
589
590 return MakeWorkload<ClLogSoftmaxWorkload>(*logSoftmaxQueueDescriptor,
591 info,
592 m_MemoryManager->GetIntraLayerManager(),
593 m_CLCompileContext);
594 }
595 case LayerType::Lstm :
596 {
597 auto lstmQueueDescriptor = PolymorphicDowncast<const LstmQueueDescriptor*>(&descriptor);
598 return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(*lstmQueueDescriptor, info, m_CLCompileContext);
599 }
600 case LayerType::Maximum :
601 {
602 auto maximumQueueDescriptor = PolymorphicDowncast<const MaximumQueueDescriptor*>(&descriptor);
603 return MakeWorkload<ClMaximumWorkload>(*maximumQueueDescriptor, info, m_CLCompileContext);
604 }
605 case LayerType::Mean :
606 {
607 auto meanQueueDescriptor = PolymorphicDowncast<const MeanQueueDescriptor*>(&descriptor);
608 return MakeWorkload<ClMeanWorkload>(*meanQueueDescriptor, info, m_CLCompileContext);
609 }
610 case LayerType::MemCopy :
611 {
612 auto memCopyQueueDescriptor = PolymorphicDowncast<const MemCopyQueueDescriptor*>(&descriptor);
613 if (memCopyQueueDescriptor->m_Inputs.empty() || !memCopyQueueDescriptor->m_Inputs[0])
614 {
615 throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemCopy workload");
616 }
617 return MakeWorkload<CopyMemGenericWorkload>(*memCopyQueueDescriptor, info);
618 }
619 case LayerType::MemImport :
620 {
621 auto memImportQueueDescriptor = PolymorphicDowncast<const MemImportQueueDescriptor*>(&descriptor);
622 if (memImportQueueDescriptor->m_Inputs.empty() || !memImportQueueDescriptor->m_Inputs[0])
623 {
624 throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemImport workload");
625 }
626 return std::make_unique<ImportMemGenericWorkload>(*memImportQueueDescriptor, info);
627 }
628 case LayerType::Minimum :
629 {
630 auto minimumQueueDescriptor = PolymorphicDowncast<const MinimumQueueDescriptor*>(&descriptor);
631 return MakeWorkload<ClMinimumWorkload>(*minimumQueueDescriptor, info, m_CLCompileContext);
632 }
633 case LayerType::Multiplication :
634 {
635 auto multiplicationQueueDescriptor = PolymorphicDowncast<const MultiplicationQueueDescriptor*>(&descriptor);
636 return MakeWorkload<ClMultiplicationWorkload>(*multiplicationQueueDescriptor, info, m_CLCompileContext);
637 }
638 case LayerType::Normalization :
639 {
640 auto normalizationQueueDescriptor = PolymorphicDowncast<const NormalizationQueueDescriptor*>(&descriptor);
641 return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(*normalizationQueueDescriptor,
642 info,
643 m_CLCompileContext);
644 }
645 case LayerType::Output :
646 {
647 auto outputQueueDescriptor = PolymorphicDowncast<const OutputQueueDescriptor*>(&descriptor);
648 return std::make_unique<CopyMemGenericWorkload>(*outputQueueDescriptor, info);
649 }
650 case LayerType::Pad :
651 {
652 auto padQueueDescriptor = PolymorphicDowncast<const PadQueueDescriptor*>(&descriptor);
653 return MakeWorkload<ClPadWorkload>(*padQueueDescriptor, info, m_CLCompileContext);
654 }
655 case LayerType::Permute :
656 {
657 auto permuteQueueDescriptor = PolymorphicDowncast<const PermuteQueueDescriptor*>(&descriptor);
658 return MakeWorkload<ClPermuteWorkload>(*permuteQueueDescriptor, info, m_CLCompileContext);
659 }
660 case LayerType::Pooling2d :
661 {
662 auto pooling2dQueueDescriptor = PolymorphicDowncast<const Pooling2dQueueDescriptor*>(&descriptor);
663 return MakeWorkload<ClPooling2dWorkload>(*pooling2dQueueDescriptor, info, m_CLCompileContext);
664 }
665 case LayerType::Pooling3d :
666 {
667 auto pooling3dQueueDescriptor = PolymorphicDowncast<const Pooling3dQueueDescriptor*>(&descriptor);
668 return MakeWorkload<ClPooling3dWorkload>(*pooling3dQueueDescriptor, info, m_CLCompileContext);
669 }
670 case LayerType::PreCompiled :
671 {
672 auto preCompiledQueueDescriptor = PolymorphicDowncast<const PreCompiledQueueDescriptor*>(&descriptor);
673 return MakeWorkload<NullWorkload, NullWorkload>(*preCompiledQueueDescriptor, info, m_CLCompileContext);
674 }
675 case LayerType::Prelu :
676 {
677 auto preluQueueDescriptor = PolymorphicDowncast<const PreluQueueDescriptor*>(&descriptor);
678 return MakeWorkload<ClPreluWorkload>(*preluQueueDescriptor, info, m_CLCompileContext);
679 }
680 case LayerType::QLstm :
681 {
682 auto qLstmQueueDescriptor = PolymorphicDowncast<const QLstmQueueDescriptor*>(&descriptor);
683 return std::make_unique<ClQLstmWorkload>(*qLstmQueueDescriptor, info, m_CLCompileContext);
684 }
685 case LayerType::Quantize :
686 {
687 auto quantizeQueueDescriptor = PolymorphicDowncast<const QuantizeQueueDescriptor*>(&descriptor);
688 return MakeWorkload<ClQuantizeWorkload>(*quantizeQueueDescriptor, info, m_CLCompileContext);
689 }
690 case LayerType::QuantizedLstm :
691 {
692 auto quantizedLstmQueueDescriptor = PolymorphicDowncast<const QuantizedLstmQueueDescriptor*>(&descriptor);
693 return MakeWorkload<ClQuantizedLstmWorkload>(*quantizedLstmQueueDescriptor, info, m_CLCompileContext);
694 }
695 case LayerType::Rank :
696 {
697 auto rankQueueDescriptor = PolymorphicDowncast<const RankQueueDescriptor*>(&descriptor);
698 return std::make_unique<ClRankWorkload>(*rankQueueDescriptor, info);
699 }
700 case LayerType::Reduce :
701 {
702 auto reduceQueueDescriptor = PolymorphicDowncast<const ReduceQueueDescriptor*>(&descriptor);
703 return std::make_unique<ClReduceWorkload>(*reduceQueueDescriptor, info);
704 }
705 case LayerType::Reshape :
706 {
707 auto reshapeQueueDescriptor = PolymorphicDowncast<const ReshapeQueueDescriptor*>(&descriptor);
708 return MakeWorkload<ClReshapeWorkload>(*reshapeQueueDescriptor, info, m_CLCompileContext);
709 }
710 case LayerType::Resize :
711 {
712 auto resizeQueueDescriptor = PolymorphicDowncast<const ResizeQueueDescriptor*>(&descriptor);
713 return MakeWorkload<ClResizeWorkload>(*resizeQueueDescriptor, info, m_CLCompileContext);
714 }
715 case LayerType::Slice :
716 {
717 auto sliceQueueDescriptor = PolymorphicDowncast<const SliceQueueDescriptor*>(&descriptor);
718 return MakeWorkload<ClSliceWorkload>(*sliceQueueDescriptor, info, m_CLCompileContext);
719 }
720 case LayerType::Softmax :
721 {
722 auto softmaxQueueDescriptor = PolymorphicDowncast<const SoftmaxQueueDescriptor*>(&descriptor);
723 return std::make_unique<ClSoftmaxWorkload>(*softmaxQueueDescriptor,
724 info,
725 m_MemoryManager->GetIntraLayerManager(),
726 m_CLCompileContext);
727 }
728 case LayerType::SpaceToBatchNd :
729 {
730 auto spaceToBatchNdQueueDescriptor
731 = PolymorphicDowncast<const SpaceToBatchNdQueueDescriptor*>(&descriptor);
732 return MakeWorkload<ClSpaceToBatchNdWorkload>(*spaceToBatchNdQueueDescriptor, info, m_CLCompileContext);
733 }
734 case LayerType::SpaceToDepth :
735 {
736 auto spaceToDepthQueueDescriptor = PolymorphicDowncast<const SpaceToDepthQueueDescriptor*>(&descriptor);
737 return MakeWorkload<ClSpaceToDepthWorkload>(*spaceToDepthQueueDescriptor, info, m_CLCompileContext);
738 }
739 case LayerType::Splitter :
740 {
741 auto splitterQueueDescriptor = PolymorphicDowncast<const SplitterQueueDescriptor*>(&descriptor);
742 return MakeWorkload<ClSplitterWorkload>(*splitterQueueDescriptor, info, m_CLCompileContext);
743 }
744 case LayerType::Stack :
745 {
746 auto stackQueueDescriptor = PolymorphicDowncast<const StackQueueDescriptor*>(&descriptor);
747 return MakeWorkload<ClStackWorkload>(*stackQueueDescriptor, info, m_CLCompileContext);
748 }
749 case LayerType::StridedSlice :
750 {
751 auto stridedSliceQueueDescriptor = PolymorphicDowncast<const StridedSliceQueueDescriptor*>(&descriptor);
752 return MakeWorkload<ClStridedSliceWorkload>(*stridedSliceQueueDescriptor, info, m_CLCompileContext);
753 }
754 case LayerType::Subtraction :
755 {
756 auto subtractionQueueDescriptor = PolymorphicDowncast<const SubtractionQueueDescriptor*>(&descriptor);
757 return MakeWorkload<ClSubtractionWorkload>(*subtractionQueueDescriptor, info, m_CLCompileContext);
758 }
759 case LayerType::Transpose :
760 {
761 auto transposeQueueDescriptor = PolymorphicDowncast<const TransposeQueueDescriptor*>(&descriptor);
762 return MakeWorkload<ClTransposeWorkload>(*transposeQueueDescriptor, info, m_CLCompileContext);
763 }
764 case LayerType::TransposeConvolution2d :
765 {
766 auto transposeConvolution2dQueueDescriptor
767 = PolymorphicDowncast<const TransposeConvolution2dQueueDescriptor*>(&descriptor);
768 return MakeWorkload<ClTransposeConvolution2dWorkload>(*transposeConvolution2dQueueDescriptor,
769 info,
770 m_MemoryManager->GetIntraLayerManager(),
771 m_CLCompileContext);
772 }
773 case LayerType::UnidirectionalSequenceLstm :
774 {
775 auto desc = PolymorphicDowncast<const UnidirectionalSequenceLstmQueueDescriptor*>(&descriptor);
776 return MakeWorkloadHelper<ClUnidirectionalSequenceLstmFloatWorkload, NullWorkload>(*desc,
777 info,
778 m_CLCompileContext);
779 }
780 default:
781 return nullptr;
782 }
783 }
784
CreateActivation(const ActivationQueueDescriptor & descriptor,const WorkloadInfo & info) const785 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
786 const WorkloadInfo& info) const
787 {
788 return MakeWorkload<ClActivationWorkload>(descriptor, info, m_CLCompileContext);
789 }
790
CreateAddition(const AdditionQueueDescriptor & descriptor,const WorkloadInfo & info) const791 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
792 const WorkloadInfo& info) const
793 {
794 return MakeWorkload<ClAdditionWorkload>(descriptor, info, m_CLCompileContext);
795 }
796
CreateArgMinMax(const ArgMinMaxQueueDescriptor & descriptor,const WorkloadInfo & info) const797 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor,
798 const WorkloadInfo& info) const
799 {
800 return std::make_unique<ClArgMinMaxWorkload>(descriptor, info, m_CLCompileContext);
801 }
802
CreateBatchNormalization(const BatchNormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const803 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchNormalization(
804 const BatchNormalizationQueueDescriptor& descriptor,
805 const WorkloadInfo& info) const
806 {
807 return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
808 }
809
CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor & descriptor,const WorkloadInfo & info) const810 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
811 const WorkloadInfo& info) const
812 {
813 return MakeWorkload<ClBatchToSpaceNdWorkload>(descriptor, info, m_CLCompileContext);
814 }
815
CreateCast(const CastQueueDescriptor & descriptor,const WorkloadInfo & info) const816 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateCast(const CastQueueDescriptor& descriptor,
817 const WorkloadInfo& info) const
818 {
819 return MakeWorkload<ClCastWorkload>(descriptor, info, m_CLCompileContext);
820 }
821
CreateChannelShuffle(const ChannelShuffleQueueDescriptor & descriptor,const WorkloadInfo & info) const822 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateChannelShuffle(const ChannelShuffleQueueDescriptor& descriptor,
823 const WorkloadInfo& info) const
824 {
825 return MakeWorkload<ClChannelShuffleWorkload>(descriptor, info, m_CLCompileContext);
826 }
827
CreateComparison(const ComparisonQueueDescriptor & descriptor,const WorkloadInfo & info) const828 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateComparison(const ComparisonQueueDescriptor& descriptor,
829 const WorkloadInfo& info) const
830 {
831 return MakeWorkload<ClComparisonWorkload>(descriptor, info, m_CLCompileContext);
832 }
833
CreateConcat(const ConcatQueueDescriptor & descriptor,const WorkloadInfo & info) const834 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConcat(const ConcatQueueDescriptor& descriptor,
835 const WorkloadInfo& info) const
836 {
837 return MakeWorkload<ClConcatWorkload>(descriptor, info, m_CLCompileContext);
838 }
839
CreateConstant(const ConstantQueueDescriptor & descriptor,const WorkloadInfo & info) const840 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
841 const WorkloadInfo& info) const
842 {
843 return MakeWorkload<ClConstantWorkload>(descriptor, info, m_CLCompileContext);
844 }
845
CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor & descriptor,const WorkloadInfo & info) const846 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp16ToFp32(
847 const ConvertFp16ToFp32QueueDescriptor& descriptor,
848 const WorkloadInfo& info) const
849 {
850 return MakeWorkload<ClConvertFp16ToFp32Workload>(descriptor, info, m_CLCompileContext);
851 }
852
CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor & descriptor,const WorkloadInfo & info) const853 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp32ToFp16(
854 const ConvertFp32ToFp16QueueDescriptor& descriptor,
855 const WorkloadInfo& info) const
856 {
857 return MakeWorkload<ClConvertFp32ToFp16Workload>(descriptor, info, m_CLCompileContext);
858 }
859
CreateConvolution2d(const Convolution2dQueueDescriptor & descriptor,const WorkloadInfo & info) const860 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
861 const WorkloadInfo& info) const
862 {
863 bool isFastMathEnabled = false;
864 if (m_ModelContextPtr)
865 {
866 if (m_ModelContextPtr.get() != nullptr)
867 {
868 auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
869 if (modelOptions)
870 {
871 isFastMathEnabled = modelOptions->IsFastMathEnabled();
872 }
873 }
874 }
875 return MakeWorkload<ClConvolution2dWorkload>(descriptor,
876 info,
877 m_MemoryManager->GetIntraLayerManager(),
878 m_CLCompileContext,
879 isFastMathEnabled);
880 }
881
CreateConvolution3d(const Convolution3dQueueDescriptor & descriptor,const WorkloadInfo & info) const882 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvolution3d(const Convolution3dQueueDescriptor& descriptor,
883 const WorkloadInfo& info) const
884 {
885 bool isFastMathEnabled = false;
886 if (m_ModelContextPtr)
887 {
888 if (m_ModelContextPtr.get() != nullptr)
889 {
890 auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
891 if (modelOptions)
892 {
893 isFastMathEnabled = modelOptions->IsFastMathEnabled();
894 }
895 }
896 }
897 return MakeWorkload<ClConvolution3dWorkload>(descriptor,
898 info,
899 m_MemoryManager->GetIntraLayerManager(),
900 m_CLCompileContext,
901 isFastMathEnabled);
902 }
903
CreateDebug(const DebugQueueDescriptor & descriptor,const WorkloadInfo & info) const904 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
905 const WorkloadInfo& info) const
906 {
907 return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
908 }
909
CreateDepthToSpace(const DepthToSpaceQueueDescriptor & descriptor,const WorkloadInfo & info) const910 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor,
911 const WorkloadInfo& info) const
912 {
913 return MakeWorkload<ClDepthToSpaceWorkload>(descriptor, info, m_CLCompileContext);
914 }
915
CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor & descriptor,const WorkloadInfo & info) const916 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthwiseConvolution2d(
917 const DepthwiseConvolution2dQueueDescriptor& descriptor,
918 const WorkloadInfo& info) const
919 {
920 return MakeWorkload<ClDepthwiseConvolutionWorkload>(descriptor, info, m_CLCompileContext);
921 }
922
CreateDequantize(const DequantizeQueueDescriptor & descriptor,const WorkloadInfo & info) const923 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDequantize(const DequantizeQueueDescriptor& descriptor,
924 const WorkloadInfo& info) const
925 {
926 return MakeWorkload<ClDequantizeWorkload>(descriptor, info, m_CLCompileContext);
927 }
928
CreateDetectionPostProcess(const DetectionPostProcessQueueDescriptor & descriptor,const WorkloadInfo & info) const929 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDetectionPostProcess(
930 const DetectionPostProcessQueueDescriptor& descriptor,
931 const WorkloadInfo& info) const
932 {
933 return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
934 }
935
CreateDivision(const DivisionQueueDescriptor & descriptor,const WorkloadInfo & info) const936 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& descriptor,
937 const WorkloadInfo& info) const
938 {
939 return std::make_unique<ClDivisionWorkload>(descriptor, info, m_CLCompileContext);
940 }
941
CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor & descriptor,const WorkloadInfo & info) const942 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor& descriptor,
943 const WorkloadInfo& info) const
944 {
945 switch(descriptor.m_Parameters.m_Operation)
946 {
947 case UnaryOperation::Abs:
948 {
949 AbsQueueDescriptor absQueueDescriptor;
950 absQueueDescriptor.m_Inputs = descriptor.m_Inputs;
951 absQueueDescriptor.m_Outputs = descriptor.m_Outputs;
952
953 return std::make_unique<ClAbsWorkload>(absQueueDescriptor, info, m_CLCompileContext);
954 }
955 case UnaryOperation::Exp:
956 return std::make_unique<ClExpWorkload>(descriptor, info, m_CLCompileContext);
957 case UnaryOperation::Log:
958 return std::make_unique<ClLogWorkload>(descriptor, info, m_CLCompileContext);
959 case UnaryOperation::LogicalNot:
960 return std::make_unique<ClLogicalNotWorkload>(descriptor, info, m_CLCompileContext);
961 case UnaryOperation::Neg:
962 return std::make_unique<ClNegWorkload>(descriptor, info, m_CLCompileContext);
963 case UnaryOperation::Rsqrt:
964 {
965 RsqrtQueueDescriptor rsqrtQueueDescriptor;
966 rsqrtQueueDescriptor.m_Inputs = descriptor.m_Inputs;
967 rsqrtQueueDescriptor.m_Outputs = descriptor.m_Outputs;
968
969 return std::make_unique<ClRsqrtWorkload>(rsqrtQueueDescriptor, info, m_CLCompileContext);
970 }
971 case UnaryOperation::Sin:
972 return std::make_unique<ClSinWorkload>(descriptor, info, m_CLCompileContext);
973 default:
974 return nullptr;
975 }
976 }
977
CreateFill(const FillQueueDescriptor & descriptor,const WorkloadInfo & info) const978 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFill(const FillQueueDescriptor& descriptor,
979 const WorkloadInfo& info) const
980 {
981 return std::make_unique<ClFillWorkload>(descriptor, info, m_CLCompileContext);
982 }
983
CreateFloor(const FloorQueueDescriptor & descriptor,const WorkloadInfo & info) const984 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
985 const WorkloadInfo& info) const
986 {
987 return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
988 }
989
CreateFullyConnected(const FullyConnectedQueueDescriptor & descriptor,const WorkloadInfo & info) const990 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor,
991 const WorkloadInfo& info) const
992 {
993 return MakeWorkload<ClFullyConnectedWorkload>(descriptor,
994 info,
995 m_MemoryManager->GetIntraLayerManager(),
996 m_CLCompileContext);
997 }
998
CreateGather(const GatherQueueDescriptor & descriptor,const WorkloadInfo & info) const999 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateGather(const GatherQueueDescriptor& descriptor,
1000 const WorkloadInfo& info) const
1001 {
1002 return MakeWorkload<ClGatherWorkload>(descriptor, info, m_CLCompileContext);
1003 }
1004
CreateInput(const InputQueueDescriptor & descriptor,const WorkloadInfo & info) const1005 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
1006 const WorkloadInfo& info) const
1007 {
1008 return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
1009 }
1010
CreateInstanceNormalization(const InstanceNormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const1011 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateInstanceNormalization(
1012 const InstanceNormalizationQueueDescriptor& descriptor,
1013 const WorkloadInfo& info) const
1014 {
1015 return MakeWorkload<ClInstanceNormalizationWorkload>(descriptor, info, m_CLCompileContext);
1016 }
1017
CreateL2Normalization(const L2NormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const1018 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
1019 const WorkloadInfo& info) const
1020 {
1021 return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
1022 }
1023
CreateLogicalBinary(const LogicalBinaryQueueDescriptor & descriptor,const WorkloadInfo & info) const1024 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLogicalBinary(const LogicalBinaryQueueDescriptor& descriptor,
1025 const WorkloadInfo& info) const
1026 {
1027 switch(descriptor.m_Parameters.m_Operation)
1028 {
1029 case LogicalBinaryOperation::LogicalAnd:
1030 return std::make_unique<ClLogicalAndWorkload>(descriptor, info, m_CLCompileContext);
1031 case LogicalBinaryOperation::LogicalOr:
1032 return std::make_unique<ClLogicalOrWorkload>(descriptor, info, m_CLCompileContext);
1033 default:
1034 return nullptr;
1035 }
1036 }
1037
CreateLogSoftmax(const LogSoftmaxQueueDescriptor & descriptor,const WorkloadInfo & info) const1038 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLogSoftmax(const LogSoftmaxQueueDescriptor& descriptor,
1039 const WorkloadInfo& info) const
1040 {
1041 return MakeWorkload<ClLogSoftmaxWorkload>(descriptor,
1042 info,
1043 m_MemoryManager->GetIntraLayerManager(),
1044 m_CLCompileContext);
1045 }
1046
CreateLstm(const LstmQueueDescriptor & descriptor,const WorkloadInfo & info) const1047 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
1048 const WorkloadInfo& info) const
1049 {
1050 return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
1051 }
1052
CreateMaximum(const MaximumQueueDescriptor & descriptor,const WorkloadInfo & info) const1053 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMaximum(const MaximumQueueDescriptor& descriptor,
1054 const WorkloadInfo& info) const
1055 {
1056 return MakeWorkload<ClMaximumWorkload>(descriptor, info, m_CLCompileContext);
1057 }
1058
CreateMean(const MeanQueueDescriptor & descriptor,const WorkloadInfo & info) const1059 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
1060 const WorkloadInfo& info) const
1061 {
1062 return MakeWorkload<ClMeanWorkload>(descriptor, info, m_CLCompileContext);
1063 }
1064
CreateMemCopy(const MemCopyQueueDescriptor & descriptor,const WorkloadInfo & info) const1065 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
1066 const WorkloadInfo& info) const
1067 {
1068 if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
1069 {
1070 throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemCopy workload");
1071 }
1072
1073 return MakeWorkload<CopyMemGenericWorkload>(descriptor, info);
1074 }
1075
CreateMemImport(const MemImportQueueDescriptor & descriptor,const WorkloadInfo & info) const1076 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor,
1077 const WorkloadInfo& info) const
1078 {
1079 if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
1080 {
1081 throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemImport workload");
1082 }
1083
1084 return std::make_unique<ImportMemGenericWorkload>(descriptor, info);
1085 }
1086
CreateMinimum(const MinimumQueueDescriptor & descriptor,const WorkloadInfo & info) const1087 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMinimum(const MinimumQueueDescriptor& descriptor,
1088 const WorkloadInfo& info) const
1089 {
1090 return MakeWorkload<ClMinimumWorkload>(descriptor, info, m_CLCompileContext);
1091 }
1092
CreateMultiplication(const MultiplicationQueueDescriptor & descriptor,const WorkloadInfo & info) const1093 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& descriptor,
1094 const WorkloadInfo& info) const
1095 {
1096 return MakeWorkload<ClMultiplicationWorkload>(descriptor, info, m_CLCompileContext);
1097 }
1098
CreateNormalization(const NormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const1099 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor,
1100 const WorkloadInfo& info) const
1101 {
1102 return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
1103 }
1104
CreateOutput(const OutputQueueDescriptor & descriptor,const WorkloadInfo & info) const1105 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
1106 const WorkloadInfo& info) const
1107 {
1108 return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
1109 }
1110
CreatePad(const PadQueueDescriptor & descriptor,const WorkloadInfo & info) const1111 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
1112 const WorkloadInfo& info) const
1113 {
1114 return MakeWorkload<ClPadWorkload>(descriptor, info, m_CLCompileContext);
1115 }
1116
CreatePermute(const PermuteQueueDescriptor & descriptor,const WorkloadInfo & info) const1117 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
1118 const WorkloadInfo& info) const
1119 {
1120 return MakeWorkload<ClPermuteWorkload>(descriptor, info, m_CLCompileContext);
1121 }
1122
CreatePooling2d(const Pooling2dQueueDescriptor & descriptor,const WorkloadInfo & info) const1123 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
1124 const WorkloadInfo& info) const
1125 {
1126 return MakeWorkload<ClPooling2dWorkload>(descriptor, info, m_CLCompileContext);
1127 }
1128
CreatePreCompiled(const PreCompiledQueueDescriptor & descriptor,const WorkloadInfo & info) const1129 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor,
1130 const WorkloadInfo& info) const
1131 {
1132 return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
1133 }
1134
CreatePrelu(const PreluQueueDescriptor & descriptor,const WorkloadInfo & info) const1135 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePrelu(const PreluQueueDescriptor &descriptor,
1136 const WorkloadInfo &info) const
1137 {
1138 return MakeWorkload<ClPreluWorkload>(descriptor, info, m_CLCompileContext);
1139 }
1140
CreateQLstm(const QLstmQueueDescriptor & descriptor,const WorkloadInfo & info) const1141 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQLstm(const QLstmQueueDescriptor& descriptor,
1142 const WorkloadInfo& info) const
1143 {
1144 return std::make_unique<ClQLstmWorkload>(descriptor, info, m_CLCompileContext);
1145 }
1146
CreateQuantize(const QuantizeQueueDescriptor & descriptor,const WorkloadInfo & info) const1147 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQuantize(const QuantizeQueueDescriptor& descriptor,
1148 const WorkloadInfo& info) const
1149 {
1150 return MakeWorkload<ClQuantizeWorkload>(descriptor, info, m_CLCompileContext);
1151 }
1152
CreateQuantizedLstm(const QuantizedLstmQueueDescriptor & descriptor,const WorkloadInfo & info) const1153 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQuantizedLstm(const QuantizedLstmQueueDescriptor& descriptor,
1154 const WorkloadInfo& info) const
1155 {
1156 return MakeWorkload<ClQuantizedLstmWorkload>(descriptor, info, m_CLCompileContext);
1157 }
1158
CreateRank(const RankQueueDescriptor & descriptor,const WorkloadInfo & info) const1159 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateRank(const RankQueueDescriptor& descriptor,
1160 const WorkloadInfo& info) const
1161 {
1162 return std::make_unique<ClRankWorkload>(descriptor, info);
1163 }
1164
CreateReduce(const ReduceQueueDescriptor & descriptor,const WorkloadInfo & info) const1165 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor,
1166 const WorkloadInfo& info) const
1167 {
1168 return std::make_unique<ClReduceWorkload>(descriptor, info);
1169 }
1170
CreateReshape(const ReshapeQueueDescriptor & descriptor,const WorkloadInfo & info) const1171 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
1172 const WorkloadInfo& info) const
1173 {
1174 return MakeWorkload<ClReshapeWorkload>(descriptor, info, m_CLCompileContext);
1175 }
1176
CreateResize(const ResizeQueueDescriptor & descriptor,const WorkloadInfo & info) const1177 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor,
1178 const WorkloadInfo& info) const
1179 {
1180 return MakeWorkload<ClResizeWorkload>(descriptor, info, m_CLCompileContext);
1181 }
1182
CreateSlice(const SliceQueueDescriptor & descriptor,const WorkloadInfo & info) const1183 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSlice(const SliceQueueDescriptor& descriptor,
1184 const WorkloadInfo& info) const
1185 {
1186 return MakeWorkload<ClSliceWorkload>(descriptor, info, m_CLCompileContext);
1187 }
1188
CreateSoftmax(const SoftmaxQueueDescriptor & descriptor,const WorkloadInfo & info) const1189 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
1190 const WorkloadInfo& info) const
1191 {
1192 return std::make_unique<ClSoftmaxWorkload>(descriptor,
1193 info,
1194 m_MemoryManager->GetIntraLayerManager(),
1195 m_CLCompileContext);
1196 }
1197
CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor & descriptor,const WorkloadInfo & info) const1198 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
1199 const WorkloadInfo& info) const
1200 {
1201 return MakeWorkload<ClSpaceToBatchNdWorkload>(descriptor, info, m_CLCompileContext);
1202 }
1203
CreateSpaceToDepth(const SpaceToDepthQueueDescriptor & descriptor,const WorkloadInfo & info) const1204 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor,
1205 const WorkloadInfo& info) const
1206 {
1207 return MakeWorkload<ClSpaceToDepthWorkload>(descriptor, info, m_CLCompileContext);
1208 }
1209
CreateSplitter(const SplitterQueueDescriptor & descriptor,const WorkloadInfo & info) const1210 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
1211 const WorkloadInfo& info) const
1212 {
1213 return MakeWorkload<ClSplitterWorkload>(descriptor, info, m_CLCompileContext);
1214 }
1215
CreateStack(const StackQueueDescriptor & descriptor,const WorkloadInfo & info) const1216 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateStack(const StackQueueDescriptor& descriptor,
1217 const WorkloadInfo& info) const
1218 {
1219 return MakeWorkload<ClStackWorkload>(descriptor, info, m_CLCompileContext);
1220 }
1221
CreateStridedSlice(const StridedSliceQueueDescriptor & descriptor,const WorkloadInfo & info) const1222 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
1223 const WorkloadInfo& info) const
1224 {
1225 return MakeWorkload<ClStridedSliceWorkload>(descriptor, info, m_CLCompileContext);
1226 }
1227
CreateSubtraction(const SubtractionQueueDescriptor & descriptor,const WorkloadInfo & info) const1228 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor,
1229 const WorkloadInfo& info) const
1230 {
1231 return MakeWorkload<ClSubtractionWorkload>(descriptor, info, m_CLCompileContext);
1232 }
1233
CreateTranspose(const TransposeQueueDescriptor & descriptor,const WorkloadInfo & info) const1234 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateTranspose(const TransposeQueueDescriptor& descriptor,
1235 const WorkloadInfo& info) const
1236 {
1237 return MakeWorkload<ClTransposeWorkload>(descriptor, info, m_CLCompileContext);
1238 }
1239
CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor & descriptor,const WorkloadInfo & info) const1240 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateTransposeConvolution2d(
1241 const TransposeConvolution2dQueueDescriptor& descriptor,
1242 const WorkloadInfo& info) const
1243 {
1244 return MakeWorkload<ClTransposeConvolution2dWorkload>(descriptor,
1245 info,
1246 m_MemoryManager->GetIntraLayerManager(),
1247 m_CLCompileContext);
1248 }
1249
1250 } // namespace armnn
1251