xref: /aosp_15_r20/external/armnn/src/backends/cl/ClWorkloadFactory.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include "ClWorkloadFactory.hpp"
6 #include "ClBackendId.hpp"
7 #include "ClBackendModelContext.hpp"
8 #include "ClContextDeserializer.hpp"
9 #include "ClContextSerializer.hpp"
10 
11 #include <Layer.hpp>
12 
13 #include <armnn/Exceptions.hpp>
14 #include <armnn/Logging.hpp>
15 #include <armnn/Utils.hpp>
16 #include <armnn/utility/IgnoreUnused.hpp>
17 #include <armnn/utility/NumericCast.hpp>
18 #include <armnn/utility/PolymorphicDowncast.hpp>
19 
20 #include <backendsCommon/MakeWorkloadHelper.hpp>
21 #include <armnn/backends/MemCopyWorkload.hpp>
22 #include <backendsCommon/MemImportWorkload.hpp>
23 #include <armnn/backends/TensorHandle.hpp>
24 
25 #include <cl/ClTensorHandle.hpp>
26 #include <cl/workloads/ClWorkloads.hpp>
27 #include <cl/workloads/ClWorkloadUtils.hpp>
28 
29 #include <arm_compute/core/CL/CLKernelLibrary.h>
30 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
31 #include <arm_compute/runtime/CL/CLScheduler.h>
32 
33 #include <armnnUtils/Filesystem.hpp>
34 #include <fstream>
35 
36 #include <sys/stat.h>
37 
38 namespace armnn
39 {
40 
41 namespace
42 {
43 static const BackendId s_Id{ClBackendId()};
44 }
45 
IsLayerSupported(const Layer & layer,Optional<DataType> dataType,std::string & outReasonIfUnsupported)46 bool ClWorkloadFactory::IsLayerSupported(const Layer& layer,
47                                          Optional<DataType> dataType,
48                                          std::string& outReasonIfUnsupported)
49 {
50     return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
51 }
52 
IsLayerSupported(const IConnectableLayer & layer,Optional<DataType> dataType,std::string & outReasonIfUnsupported,const ModelOptions & modelOptions)53 bool ClWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer,
54                                          Optional<DataType> dataType,
55                                          std::string& outReasonIfUnsupported,
56                                          const ModelOptions& modelOptions)
57 {
58     return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
59 }
60 
GetBackendId() const61 const BackendId& ClWorkloadFactory::GetBackendId() const
62 {
63     return s_Id;
64 }
65 
AfterWorkloadsCreated()66 void ClWorkloadFactory::AfterWorkloadsCreated()
67 {
68     if(m_ModelContextPtr)
69     {
70         auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
71         if (modelOptions->SaveCachedNetwork())
72         {
73             ClContextSerializer serializer;
74             serializer.Serialize(m_CLCompileContext);
75             auto cachedFd = modelOptions->GetCachedFileDescriptor();
76             if (cachedFd != -1)
77             {
78                 std::vector<uint8_t> compiledContextData;
79                 std::stringstream stream;
80                 bool serialized = serializer.SaveSerializedToStream(stream);
81                 if (serialized)
82                 {
83                     std::string const serializedString{stream.str()};
84                     std::copy(serializedString.begin(),
85                               serializedString.end(),
86                               std::back_inserter(compiledContextData));
87                     auto success = write(cachedFd, compiledContextData.data(), compiledContextData.size());
88                     if (success == -1)
89                     {
90                         ARMNN_LOG(info) << "ClWorkloadFactory:: Could not cache the compiled context!";
91                     }
92                 }
93             }
94 
95             // Save map to a filepath provided in ModelOptions
96             auto filePath = modelOptions->GetCachedNetworkFilePath();
97             if (filePath != "" && fs::exists(filePath) && fs::is_regular_file(filePath))
98             {
99                 // Serialize ClContext to the file specified
100                 std::ofstream file(filePath, std::ios::out | std::ios::binary);
101                 serializer.SaveSerializedToStream(file);
102             }
103         }
104     }
105 }
106 
107 template <typename FloatWorkload, typename Uint8Workload, typename QueueDescriptorType, typename... Args>
MakeWorkload(const QueueDescriptorType & descriptor,const WorkloadInfo & info,Args &&...args)108 std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor,
109                                                            const WorkloadInfo& info,
110                                                            Args&&... args)
111 {
112     try
113     {
114         return MakeWorkloadHelper<FloatWorkload, Uint8Workload>(descriptor, info, std::forward<Args>(args)...);
115     }
116     catch (const cl::Error& clError)
117     {
118         throw WrapClError(clError, CHECK_LOCATION());
119     }
120 }
121 
122 template <typename Workload, typename QueueDescriptorType, typename... Args>
MakeWorkload(const QueueDescriptorType & descriptor,const WorkloadInfo & info,Args &&...args)123 std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor,
124                                                            const WorkloadInfo& info,
125                                                            Args&&... args)
126 {
127     try
128     {
129         return std::make_unique<Workload>(descriptor, info, std::forward<Args>(args)...);
130     }
131     catch (const cl::Error& clError)
132     {
133         throw WrapClError(clError, CHECK_LOCATION());
134     }
135 }
136 
InitializeCLCompileContext()137 void ClWorkloadFactory::InitializeCLCompileContext()
138 {
139     // Initialize our m_CLCompileContext using default device and context
140     auto context = arm_compute::CLKernelLibrary::get().context();
141     auto device  = arm_compute::CLKernelLibrary::get().get_device();
142     m_CLCompileContext = arm_compute::CLCompileContext(context, device);
143 
144     if (m_ModelContextPtr)
145     {
146         // Load saved programs if the user has set a filepath
147         auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
148         auto filePath = modelOptions->GetCachedNetworkFilePath();
149         if (!(modelOptions->SaveCachedNetwork()))
150         {
151             ClContextDeserializer deserializer;
152             auto cachedFd = modelOptions->GetCachedFileDescriptor();
153             if (cachedFd != -1)
154             {
155                 struct stat statBuffer;
156                 if (fstat(cachedFd, &statBuffer) == 0)
157                 {
158                     long dataSize = static_cast<long>(statBuffer.st_size);
159                     if( dataSize > 0)
160                     {
161                         auto offset = lseek(cachedFd, 0, SEEK_CUR);
162                         if (offset == 0)
163                         {
164                             std::vector <uint8_t> compiledContextData(static_cast<unsigned int>(dataSize));
165                             auto success = pread(cachedFd, compiledContextData.data(), compiledContextData.size(), 0);
166                             if (success != -1)
167                             {
168                                 deserializer.DeserializeFromBinary(m_CLCompileContext,
169                                                                    context,
170                                                                    device,
171                                                                    compiledContextData);
172                             }
173                         }
174                     }
175 
176                 }
177             }
178 
179             if (filePath != "" && fs::exists(filePath) && fs::is_regular_file(filePath))
180             {
181                 // Deserialize binary file and load into m_CLCompileContext
182                 deserializer.Deserialize(m_CLCompileContext, context, device, filePath);
183             }
184         }
185     }
186 }
187 
ClWorkloadFactory(const std::shared_ptr<ClMemoryManager> & memoryManager)188 ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager)
189     : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{})
190 {
191     InitializeCLCompileContext();
192 }
193 
ClWorkloadFactory(const std::shared_ptr<ClMemoryManager> & memoryManager,const IBackendInternal::IBackendSpecificModelContextPtr & modelContextPtr)194 ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager,
195                                      const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr)
196     : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
197 {
198     InitializeCLCompileContext();
199 }
200 
CreateTensorHandle(const TensorInfo & tensorInfo,const bool IsMemoryManaged) const201 std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
202                                                                      const bool IsMemoryManaged) const
203 {
204     IgnoreUnused(IsMemoryManaged);
205     std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo);
206     tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
207 
208     return tensorHandle;
209 }
210 
CreateTensorHandle(const TensorInfo & tensorInfo,DataLayout dataLayout,const bool IsMemoryManaged) const211 std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
212                                                                      DataLayout dataLayout,
213                                                                      const bool IsMemoryManaged) const
214 {
215     IgnoreUnused(IsMemoryManaged);
216     std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo, dataLayout);
217     tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
218 
219     return tensorHandle;
220 }
221 
CreateSubTensorHandle(ITensorHandle & parent,TensorShape const & subTensorShape,unsigned int const * subTensorOrigin) const222 std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
223                                                                         TensorShape const& subTensorShape,
224                                                                         unsigned int const* subTensorOrigin) const
225 {
226     arm_compute::Coordinates coords;
227     arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
228 
229     coords.set_num_dimensions(subTensorShape.GetNumDimensions());
230     for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++)
231     {
232         // Arm compute indexes tensor coords in reverse order.
233         unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1;
234         coords.set(i, armnn::numeric_cast<int>(subTensorOrigin[revertedIndex]));
235     }
236 
237     const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape(parent.GetShape());
238     if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape))
239     {
240         return nullptr;
241     }
242 
243     return std::make_unique<ClSubTensorHandle>(
244         PolymorphicDowncast<IClTensorHandle*>(&parent), shape, coords);
245 }
246 
CreateWorkload(LayerType type,const QueueDescriptor & descriptor,const WorkloadInfo & info) const247 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateWorkload(LayerType type,
248                                                              const QueueDescriptor& descriptor,
249                                                              const WorkloadInfo& info) const
250 {
251     switch(type)
252     {
253         case LayerType::Activation :
254         {
255             auto activationQueueDescriptor = PolymorphicDowncast<const ActivationQueueDescriptor*>(&descriptor);
256             return MakeWorkload<ClActivationWorkload>(*activationQueueDescriptor, info, m_CLCompileContext);
257         }
258         case LayerType::Addition :
259         {
260             auto additionQueueDescriptor = PolymorphicDowncast<const AdditionQueueDescriptor*>(&descriptor);
261             return MakeWorkload<ClAdditionWorkload>(*additionQueueDescriptor, info, m_CLCompileContext);
262         }
263         case LayerType::ArgMinMax :
264         {
265             auto argMinMaxQueueDescriptor = PolymorphicDowncast<const ArgMinMaxQueueDescriptor*>(&descriptor);
266             return MakeWorkload<ClArgMinMaxWorkload>(*argMinMaxQueueDescriptor, info, m_CLCompileContext);
267         }
268         case LayerType::BatchMatMul :
269         {
270             auto batchMatMulQueueDescriptor = PolymorphicDowncast<const BatchMatMulQueueDescriptor*>(&descriptor);
271             return std::make_unique<ClBatchMatMulWorkload>(*batchMatMulQueueDescriptor, info, m_CLCompileContext);
272         }
273         case LayerType::BatchNormalization :
274         {
275             auto batchNormalizationQueueDescriptor
276                     = PolymorphicDowncast<const BatchNormalizationQueueDescriptor*>(&descriptor);
277             return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>
278                     (*batchNormalizationQueueDescriptor, info, m_CLCompileContext);
279         }
280         case LayerType::BatchToSpaceNd :
281         {
282             auto batchToSpaceNdQueueDescriptor
283                     = PolymorphicDowncast<const BatchToSpaceNdQueueDescriptor*>(&descriptor);
284             return MakeWorkload<ClBatchToSpaceNdWorkload>(*batchToSpaceNdQueueDescriptor, info, m_CLCompileContext);
285         }
286         case LayerType::Cast :
287         {
288             auto castQueueDescriptor = PolymorphicDowncast<const CastQueueDescriptor*>(&descriptor);
289             return MakeWorkload<ClCastWorkload>(*castQueueDescriptor, info, m_CLCompileContext);
290         }
291         case LayerType::ChannelShuffle :
292         {
293             auto channelShuffleQueueDescriptor
294                     = PolymorphicDowncast<const ChannelShuffleQueueDescriptor*>(&descriptor);
295             return MakeWorkload<ClChannelShuffleWorkload>(*channelShuffleQueueDescriptor, info, m_CLCompileContext);
296         }
297         case LayerType::Comparison :
298         {
299             auto comparisonQueueDescriptor = PolymorphicDowncast<const ComparisonQueueDescriptor*>(&descriptor);
300             return MakeWorkload<ClComparisonWorkload>(*comparisonQueueDescriptor, info, m_CLCompileContext);
301         }
302         case LayerType::Concat :
303         {
304             auto concatQueueDescriptor = PolymorphicDowncast<const ConcatQueueDescriptor*>(&descriptor);
305             return MakeWorkload<ClConcatWorkload>(*concatQueueDescriptor, info, m_CLCompileContext);
306         }
307         case LayerType::Constant :
308         {
309             auto constantQueueDescriptor = PolymorphicDowncast<const ConstantQueueDescriptor*>(&descriptor);
310             return MakeWorkload<ClConstantWorkload>(*constantQueueDescriptor, info, m_CLCompileContext);
311         }
312         case LayerType::ConvertFp16ToFp32 :
313         {
314             auto convertFp16ToFp32QueueDescriptor
315                     = PolymorphicDowncast<const ConvertFp16ToFp32QueueDescriptor*>(&descriptor);
316             return MakeWorkload<ClConvertFp16ToFp32Workload>(*convertFp16ToFp32QueueDescriptor,
317                                                              info,
318                                                              m_CLCompileContext);
319         }
320         case LayerType::ConvertFp32ToFp16 :
321         {
322             auto convertFp32ToFp16QueueDescriptor
323                     = PolymorphicDowncast<const ConvertFp32ToFp16QueueDescriptor*>(&descriptor);
324             return MakeWorkload<ClConvertFp32ToFp16Workload>(*convertFp32ToFp16QueueDescriptor,
325                                                              info,
326                                                              m_CLCompileContext);
327         }
328         case LayerType::Convolution2d :
329         {
330             auto convolution2dQueueDescriptor = PolymorphicDowncast<const Convolution2dQueueDescriptor*>(&descriptor);
331 
332             bool isFastMathEnabled = false;
333             if (m_ModelContextPtr)
334             {
335                 if (m_ModelContextPtr.get() != nullptr)
336                 {
337                     auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
338                     if (modelOptions)
339                     {
340                         isFastMathEnabled = modelOptions->IsFastMathEnabled();
341                     }
342                 }
343             }
344             return MakeWorkload<ClConvolution2dWorkload>(*convolution2dQueueDescriptor,
345                                                          info,
346                                                          m_MemoryManager->GetIntraLayerManager(),
347                                                          m_CLCompileContext,
348                                                          isFastMathEnabled);
349         }
350         case LayerType::Convolution3d :
351         {
352             auto convolution3dQueueDescriptor = PolymorphicDowncast<const Convolution3dQueueDescriptor*>(&descriptor);
353 
354             bool isFastMathEnabled = false;
355             if (m_ModelContextPtr)
356             {
357                 if (m_ModelContextPtr.get() != nullptr)
358                 {
359                     auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
360                     if (modelOptions)
361                     {
362                         isFastMathEnabled = modelOptions->IsFastMathEnabled();
363                     }
364                 }
365             }
366             return MakeWorkload<ClConvolution3dWorkload>(*convolution3dQueueDescriptor,
367                                                          info,
368                                                          m_MemoryManager->GetIntraLayerManager(),
369                                                          m_CLCompileContext,
370                                                          isFastMathEnabled);
371         }
372         case LayerType::Debug :
373         {
374             auto debugQueueDescriptor = PolymorphicDowncast<const DebugQueueDescriptor*>(&descriptor);
375             return MakeWorkload<NullWorkload, NullWorkload>(*debugQueueDescriptor, info, m_CLCompileContext);
376         }
377         case LayerType::DepthToSpace :
378         {
379             auto depthToSpaceQueueDescriptor = PolymorphicDowncast<const DepthToSpaceQueueDescriptor*>(&descriptor);
380             return MakeWorkload<ClDepthToSpaceWorkload>(*depthToSpaceQueueDescriptor, info, m_CLCompileContext);
381         }
382         case LayerType::DepthwiseConvolution2d :
383         {
384             auto depthwiseConvolution2dQueueDescriptor
385                     = PolymorphicDowncast<const DepthwiseConvolution2dQueueDescriptor*>(&descriptor);
386             return MakeWorkload<ClDepthwiseConvolutionWorkload>(*depthwiseConvolution2dQueueDescriptor,
387                                                                 info,
388                                                                 m_CLCompileContext);
389         }
390         case LayerType::Dequantize :
391         {
392             auto dequantizeQueueDescriptor = PolymorphicDowncast<const DequantizeQueueDescriptor*>(&descriptor);
393             return MakeWorkload<ClDequantizeWorkload>(*dequantizeQueueDescriptor, info, m_CLCompileContext);
394         }
395         case LayerType::DetectionPostProcess :
396         {
397             auto detectionPostProcessQueueDescriptor
398                     = PolymorphicDowncast<const DetectionPostProcessQueueDescriptor*>(&descriptor);
399             return MakeWorkload<NullWorkload, NullWorkload>(*detectionPostProcessQueueDescriptor,
400                                                             info,
401                                                             m_CLCompileContext);
402         }
403         case LayerType::Division :
404         {
405             auto divisionQueueDescriptor = PolymorphicDowncast<const DivisionQueueDescriptor*>(&descriptor);
406             return std::make_unique<ClDivisionWorkload>(*divisionQueueDescriptor, info, m_CLCompileContext);
407         }
408         case LayerType::ElementwiseBinary :
409         {
410             auto elementwiseBinaryQueueDescriptor
411                     = PolymorphicDowncast<const ElementwiseBinaryQueueDescriptor*>(&descriptor);
412 
413             switch (elementwiseBinaryQueueDescriptor->m_Parameters.m_Operation)
414             {
415                 case BinaryOperation::Add:
416                 {
417                     AdditionQueueDescriptor additionQueueDescriptor;
418                     additionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
419                     additionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
420                     additionQueueDescriptor.m_AdditionalInfoObject =
421                             elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
422                     return std::make_unique<ClAdditionWorkload>(additionQueueDescriptor, info, m_CLCompileContext);
423                 }
424                 case BinaryOperation::Div:
425                 {
426                     DivisionQueueDescriptor divisionQueueDescriptor;
427                     divisionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
428                     divisionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
429                     divisionQueueDescriptor.m_AdditionalInfoObject =
430                             elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
431                     return std::make_unique<ClDivisionWorkload>(divisionQueueDescriptor, info, m_CLCompileContext);
432                 }
433                 case BinaryOperation::Maximum:
434                 {
435                     MaximumQueueDescriptor maximumQueueDescriptor;
436                     maximumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
437                     maximumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
438                     maximumQueueDescriptor.m_AdditionalInfoObject =
439                             elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
440                     return std::make_unique<ClMaximumWorkload>(maximumQueueDescriptor, info, m_CLCompileContext);
441                 }
442                 case BinaryOperation::Minimum:
443                 {
444                     MinimumQueueDescriptor minimumQueueDescriptor;
445                     minimumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
446                     minimumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
447                     minimumQueueDescriptor.m_AdditionalInfoObject =
448                             elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
449                     return std::make_unique<ClMinimumWorkload>(minimumQueueDescriptor, info, m_CLCompileContext);
450                 }
451                 case BinaryOperation::Mul:
452                 {
453                     MultiplicationQueueDescriptor multiplicationQueueDescriptor;
454                     multiplicationQueueDescriptor.m_Inputs = descriptor.m_Inputs;
455                     multiplicationQueueDescriptor.m_Outputs = descriptor.m_Outputs;
456                     multiplicationQueueDescriptor.m_AdditionalInfoObject =
457                             elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
458                     return std::make_unique<ClMultiplicationWorkload>(multiplicationQueueDescriptor,
459                                                                       info,
460                                                                       m_CLCompileContext);
461                 }
462                 case BinaryOperation::Sub:
463                 {
464                     SubtractionQueueDescriptor subtractionQueueDescriptor;
465                     subtractionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
466                     subtractionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
467                     subtractionQueueDescriptor.m_AdditionalInfoObject =
468                             elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
469                     return std::make_unique<ClSubtractionWorkload>(subtractionQueueDescriptor,
470                                                                    info,
471                                                                    m_CLCompileContext);
472                 }
473                 default:
474                     return nullptr;
475             }
476         }
477         case LayerType::ElementwiseUnary :
478         {
479             auto elementwiseUnaryQueueDescriptor
480                     = PolymorphicDowncast<const ElementwiseUnaryQueueDescriptor*>(&descriptor);
481 
482             switch(elementwiseUnaryQueueDescriptor->m_Parameters.m_Operation)
483             {
484                 case UnaryOperation::Abs:
485                 {
486                     AbsQueueDescriptor absQueueDescriptor;
487                     absQueueDescriptor.m_Inputs  = elementwiseUnaryQueueDescriptor->m_Inputs;
488                     absQueueDescriptor.m_Outputs = elementwiseUnaryQueueDescriptor->m_Outputs;
489 
490                     return  std::make_unique<ClAbsWorkload>(absQueueDescriptor, info, m_CLCompileContext);
491                 }
492                 case UnaryOperation::Exp:
493                     return std::make_unique<ClExpWorkload>(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext);
494                 case UnaryOperation::Log:
495                     return std::make_unique<ClLogWorkload>(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext);
496                 case UnaryOperation::LogicalNot:
497                     return std::make_unique<ClLogicalNotWorkload>(*elementwiseUnaryQueueDescriptor,
498                                                                   info,
499                                                                   m_CLCompileContext);
500                 case UnaryOperation::Neg:
501                     return std::make_unique<ClNegWorkload>(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext);
502                 case UnaryOperation::Rsqrt:
503                 {
504                     RsqrtQueueDescriptor rsqrtQueueDescriptor;
505                     rsqrtQueueDescriptor.m_Inputs  = elementwiseUnaryQueueDescriptor->m_Inputs;
506                     rsqrtQueueDescriptor.m_Outputs = elementwiseUnaryQueueDescriptor->m_Outputs;
507 
508                     return std::make_unique<ClRsqrtWorkload>(rsqrtQueueDescriptor, info, m_CLCompileContext);
509                 }
510                 case UnaryOperation::Sin:
511                     return std::make_unique<ClSinWorkload>(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext);
512                 case UnaryOperation::Sqrt:
513                     return std::make_unique<ClSqrtWorkload>(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext);
514                 default:
515                     return nullptr;
516             }
517         }
518         case LayerType::Fill :
519         {
520             auto fillQueueDescriptor = PolymorphicDowncast<const FillQueueDescriptor*>(&descriptor);
521             return std::make_unique<ClFillWorkload>(*fillQueueDescriptor, info, m_CLCompileContext);
522         }
523         case LayerType::Floor :
524         {
525             auto floorQueueDescriptor = PolymorphicDowncast<const FloorQueueDescriptor*>(&descriptor);
526             return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(*floorQueueDescriptor, info, m_CLCompileContext);
527         }
528         case LayerType::FullyConnected :
529         {
530             auto fullyConnectedQueueDescriptor
531                     = PolymorphicDowncast<const FullyConnectedQueueDescriptor*>(&descriptor);
532             return MakeWorkload<ClFullyConnectedWorkload>(*fullyConnectedQueueDescriptor,
533                                                           info,
534                                                           m_MemoryManager->GetIntraLayerManager(),
535                                                           m_CLCompileContext);
536         }
537         case LayerType::Gather :
538         {
539             auto gatherQueueDescriptor = PolymorphicDowncast<const GatherQueueDescriptor*>(&descriptor);
540             return MakeWorkload<ClGatherWorkload>(*gatherQueueDescriptor, info, m_CLCompileContext);
541         }
542         case LayerType::GatherNd :
543         {
544             auto gatherNdQueueDescriptor = PolymorphicDowncast<const GatherNdQueueDescriptor*>(&descriptor);
545             return MakeWorkload<ClGatherNdWorkload>(*gatherNdQueueDescriptor, info, m_CLCompileContext);
546         }
547         case LayerType::Input :
548         {
549             auto inputQueueDescriptor = PolymorphicDowncast<const InputQueueDescriptor*>(&descriptor);
550             return std::make_unique<CopyMemGenericWorkload>(*inputQueueDescriptor, info);
551         }
552         case LayerType::InstanceNormalization :
553         {
554             auto instanceNormalizationQueueDescriptor
555                     = PolymorphicDowncast<const InstanceNormalizationQueueDescriptor*>(&descriptor);
556             return MakeWorkload<ClInstanceNormalizationWorkload>(*instanceNormalizationQueueDescriptor,
557                                                                  info,
558                                                                  m_CLCompileContext);
559         }
560         case LayerType::L2Normalization :
561         {
562             auto l2NormalizationQueueDescriptor
563                     = PolymorphicDowncast<const L2NormalizationQueueDescriptor*>(&descriptor);
564             return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(*l2NormalizationQueueDescriptor,
565                                                                               info,
566                                                                               m_CLCompileContext);
567         }
568         case LayerType::LogicalBinary :
569         {
570             auto logicalBinaryQueueDescriptor = PolymorphicDowncast<const LogicalBinaryQueueDescriptor*>(&descriptor);
571 
572             switch(logicalBinaryQueueDescriptor->m_Parameters.m_Operation)
573             {
574                 case LogicalBinaryOperation::LogicalAnd:
575                     return std::make_unique<ClLogicalAndWorkload>(*logicalBinaryQueueDescriptor,
576                                                                   info,
577                                                                   m_CLCompileContext);
578                 case LogicalBinaryOperation::LogicalOr:
579                     return std::make_unique<ClLogicalOrWorkload>(*logicalBinaryQueueDescriptor,
580                                                                  info,
581                                                                  m_CLCompileContext);
582                 default:
583                     return nullptr;
584             }
585         }
586         case LayerType::LogSoftmax :
587         {
588             auto logSoftmaxQueueDescriptor = PolymorphicDowncast<const LogSoftmaxQueueDescriptor*>(&descriptor);
589 
590             return MakeWorkload<ClLogSoftmaxWorkload>(*logSoftmaxQueueDescriptor,
591                                                       info,
592                                                       m_MemoryManager->GetIntraLayerManager(),
593                                                       m_CLCompileContext);
594         }
595         case LayerType::Lstm :
596         {
597             auto lstmQueueDescriptor = PolymorphicDowncast<const LstmQueueDescriptor*>(&descriptor);
598             return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(*lstmQueueDescriptor, info, m_CLCompileContext);
599         }
600         case LayerType::Maximum :
601         {
602             auto maximumQueueDescriptor = PolymorphicDowncast<const MaximumQueueDescriptor*>(&descriptor);
603             return MakeWorkload<ClMaximumWorkload>(*maximumQueueDescriptor, info, m_CLCompileContext);
604         }
605         case LayerType::Mean :
606         {
607             auto meanQueueDescriptor = PolymorphicDowncast<const MeanQueueDescriptor*>(&descriptor);
608             return MakeWorkload<ClMeanWorkload>(*meanQueueDescriptor, info, m_CLCompileContext);
609         }
610         case LayerType::MemCopy :
611         {
612             auto memCopyQueueDescriptor = PolymorphicDowncast<const MemCopyQueueDescriptor*>(&descriptor);
613             if (memCopyQueueDescriptor->m_Inputs.empty() || !memCopyQueueDescriptor->m_Inputs[0])
614             {
615                 throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemCopy workload");
616             }
617             return MakeWorkload<CopyMemGenericWorkload>(*memCopyQueueDescriptor, info);
618         }
619         case LayerType::MemImport :
620         {
621             auto memImportQueueDescriptor = PolymorphicDowncast<const MemImportQueueDescriptor*>(&descriptor);
622             if (memImportQueueDescriptor->m_Inputs.empty() || !memImportQueueDescriptor->m_Inputs[0])
623             {
624                 throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemImport workload");
625             }
626             return std::make_unique<ImportMemGenericWorkload>(*memImportQueueDescriptor, info);
627         }
628         case LayerType::Minimum :
629         {
630             auto minimumQueueDescriptor = PolymorphicDowncast<const MinimumQueueDescriptor*>(&descriptor);
631             return MakeWorkload<ClMinimumWorkload>(*minimumQueueDescriptor, info, m_CLCompileContext);
632         }
633         case LayerType::Multiplication :
634         {
635             auto multiplicationQueueDescriptor = PolymorphicDowncast<const MultiplicationQueueDescriptor*>(&descriptor);
636             return MakeWorkload<ClMultiplicationWorkload>(*multiplicationQueueDescriptor, info, m_CLCompileContext);
637         }
638         case LayerType::Normalization :
639         {
640             auto normalizationQueueDescriptor = PolymorphicDowncast<const NormalizationQueueDescriptor*>(&descriptor);
641             return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(*normalizationQueueDescriptor,
642                                                                             info,
643                                                                             m_CLCompileContext);
644         }
645         case LayerType::Output :
646         {
647             auto outputQueueDescriptor = PolymorphicDowncast<const OutputQueueDescriptor*>(&descriptor);
648             return std::make_unique<CopyMemGenericWorkload>(*outputQueueDescriptor, info);
649         }
650         case LayerType::Pad :
651         {
652             auto padQueueDescriptor = PolymorphicDowncast<const PadQueueDescriptor*>(&descriptor);
653             return MakeWorkload<ClPadWorkload>(*padQueueDescriptor, info, m_CLCompileContext);
654         }
655         case LayerType::Permute :
656         {
657             auto permuteQueueDescriptor = PolymorphicDowncast<const PermuteQueueDescriptor*>(&descriptor);
658             return MakeWorkload<ClPermuteWorkload>(*permuteQueueDescriptor, info, m_CLCompileContext);
659         }
660         case LayerType::Pooling2d :
661         {
662             auto pooling2dQueueDescriptor = PolymorphicDowncast<const Pooling2dQueueDescriptor*>(&descriptor);
663             return MakeWorkload<ClPooling2dWorkload>(*pooling2dQueueDescriptor, info, m_CLCompileContext);
664         }
665         case LayerType::Pooling3d :
666         {
667             auto pooling3dQueueDescriptor = PolymorphicDowncast<const Pooling3dQueueDescriptor*>(&descriptor);
668             return MakeWorkload<ClPooling3dWorkload>(*pooling3dQueueDescriptor, info, m_CLCompileContext);
669         }
670         case LayerType::PreCompiled :
671         {
672             auto preCompiledQueueDescriptor = PolymorphicDowncast<const PreCompiledQueueDescriptor*>(&descriptor);
673             return MakeWorkload<NullWorkload, NullWorkload>(*preCompiledQueueDescriptor, info, m_CLCompileContext);
674         }
675         case LayerType::Prelu :
676         {
677             auto preluQueueDescriptor = PolymorphicDowncast<const PreluQueueDescriptor*>(&descriptor);
678             return MakeWorkload<ClPreluWorkload>(*preluQueueDescriptor, info, m_CLCompileContext);
679         }
680         case LayerType::QLstm :
681         {
682             auto qLstmQueueDescriptor = PolymorphicDowncast<const QLstmQueueDescriptor*>(&descriptor);
683             return std::make_unique<ClQLstmWorkload>(*qLstmQueueDescriptor, info, m_CLCompileContext);
684         }
685         case LayerType::Quantize :
686         {
687             auto quantizeQueueDescriptor = PolymorphicDowncast<const QuantizeQueueDescriptor*>(&descriptor);
688             return MakeWorkload<ClQuantizeWorkload>(*quantizeQueueDescriptor, info, m_CLCompileContext);
689         }
690         case LayerType::QuantizedLstm :
691         {
692             auto quantizedLstmQueueDescriptor = PolymorphicDowncast<const QuantizedLstmQueueDescriptor*>(&descriptor);
693             return MakeWorkload<ClQuantizedLstmWorkload>(*quantizedLstmQueueDescriptor, info, m_CLCompileContext);
694         }
695         case LayerType::Rank :
696         {
697             auto rankQueueDescriptor = PolymorphicDowncast<const RankQueueDescriptor*>(&descriptor);
698             return std::make_unique<ClRankWorkload>(*rankQueueDescriptor, info);
699         }
700         case LayerType::Reduce :
701         {
702             auto reduceQueueDescriptor = PolymorphicDowncast<const ReduceQueueDescriptor*>(&descriptor);
703             return std::make_unique<ClReduceWorkload>(*reduceQueueDescriptor, info);
704         }
705         case LayerType::Reshape :
706         {
707             auto reshapeQueueDescriptor = PolymorphicDowncast<const ReshapeQueueDescriptor*>(&descriptor);
708             return MakeWorkload<ClReshapeWorkload>(*reshapeQueueDescriptor, info, m_CLCompileContext);
709         }
710         case LayerType::Resize :
711         {
712             auto resizeQueueDescriptor = PolymorphicDowncast<const ResizeQueueDescriptor*>(&descriptor);
713             return MakeWorkload<ClResizeWorkload>(*resizeQueueDescriptor, info, m_CLCompileContext);
714         }
715         case LayerType::Slice :
716         {
717             auto sliceQueueDescriptor = PolymorphicDowncast<const SliceQueueDescriptor*>(&descriptor);
718             return MakeWorkload<ClSliceWorkload>(*sliceQueueDescriptor, info, m_CLCompileContext);
719         }
720         case LayerType::Softmax :
721         {
722             auto softmaxQueueDescriptor = PolymorphicDowncast<const SoftmaxQueueDescriptor*>(&descriptor);
723             return std::make_unique<ClSoftmaxWorkload>(*softmaxQueueDescriptor,
724                                                        info,
725                                                        m_MemoryManager->GetIntraLayerManager(),
726                                                        m_CLCompileContext);
727         }
728         case LayerType::SpaceToBatchNd :
729         {
730             auto spaceToBatchNdQueueDescriptor
731                     = PolymorphicDowncast<const SpaceToBatchNdQueueDescriptor*>(&descriptor);
732             return MakeWorkload<ClSpaceToBatchNdWorkload>(*spaceToBatchNdQueueDescriptor, info, m_CLCompileContext);
733         }
734         case LayerType::SpaceToDepth :
735         {
736             auto spaceToDepthQueueDescriptor = PolymorphicDowncast<const SpaceToDepthQueueDescriptor*>(&descriptor);
737             return MakeWorkload<ClSpaceToDepthWorkload>(*spaceToDepthQueueDescriptor, info, m_CLCompileContext);
738         }
739         case LayerType::Splitter :
740         {
741             auto splitterQueueDescriptor = PolymorphicDowncast<const SplitterQueueDescriptor*>(&descriptor);
742             return MakeWorkload<ClSplitterWorkload>(*splitterQueueDescriptor, info, m_CLCompileContext);
743         }
744         case LayerType::Stack :
745         {
746             auto stackQueueDescriptor = PolymorphicDowncast<const StackQueueDescriptor*>(&descriptor);
747             return MakeWorkload<ClStackWorkload>(*stackQueueDescriptor, info, m_CLCompileContext);
748         }
749         case LayerType::StridedSlice :
750         {
751             auto stridedSliceQueueDescriptor = PolymorphicDowncast<const StridedSliceQueueDescriptor*>(&descriptor);
752             return MakeWorkload<ClStridedSliceWorkload>(*stridedSliceQueueDescriptor, info, m_CLCompileContext);
753         }
754         case LayerType::Subtraction :
755         {
756             auto subtractionQueueDescriptor = PolymorphicDowncast<const SubtractionQueueDescriptor*>(&descriptor);
757             return MakeWorkload<ClSubtractionWorkload>(*subtractionQueueDescriptor, info, m_CLCompileContext);
758         }
759         case LayerType::Transpose :
760         {
761             auto transposeQueueDescriptor = PolymorphicDowncast<const TransposeQueueDescriptor*>(&descriptor);
762             return MakeWorkload<ClTransposeWorkload>(*transposeQueueDescriptor, info, m_CLCompileContext);
763         }
764         case LayerType::TransposeConvolution2d :
765         {
766             auto transposeConvolution2dQueueDescriptor
767                     = PolymorphicDowncast<const TransposeConvolution2dQueueDescriptor*>(&descriptor);
768             return MakeWorkload<ClTransposeConvolution2dWorkload>(*transposeConvolution2dQueueDescriptor,
769                                                                   info,
770                                                                   m_MemoryManager->GetIntraLayerManager(),
771                                                                   m_CLCompileContext);
772         }
773         case LayerType::UnidirectionalSequenceLstm :
774         {
775             auto desc = PolymorphicDowncast<const UnidirectionalSequenceLstmQueueDescriptor*>(&descriptor);
776             return MakeWorkloadHelper<ClUnidirectionalSequenceLstmFloatWorkload, NullWorkload>(*desc,
777                                                                                                info,
778                                                                                                m_CLCompileContext);
779         }
780         default:
781             return nullptr;
782     }
783 }
784 
CreateActivation(const ActivationQueueDescriptor & descriptor,const WorkloadInfo & info) const785 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
786                                                                const WorkloadInfo& info) const
787 {
788     return MakeWorkload<ClActivationWorkload>(descriptor, info, m_CLCompileContext);
789 }
790 
CreateAddition(const AdditionQueueDescriptor & descriptor,const WorkloadInfo & info) const791 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
792                                                              const WorkloadInfo& info) const
793 {
794     return MakeWorkload<ClAdditionWorkload>(descriptor, info, m_CLCompileContext);
795 }
796 
CreateArgMinMax(const ArgMinMaxQueueDescriptor & descriptor,const WorkloadInfo & info) const797 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor,
798                                                               const WorkloadInfo& info) const
799 {
800     return std::make_unique<ClArgMinMaxWorkload>(descriptor, info, m_CLCompileContext);
801 }
802 
CreateBatchNormalization(const BatchNormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const803 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchNormalization(
804     const BatchNormalizationQueueDescriptor& descriptor,
805     const WorkloadInfo& info) const
806 {
807     return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
808 }
809 
CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor & descriptor,const WorkloadInfo & info) const810 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
811                                                                    const WorkloadInfo& info) const
812 {
813     return MakeWorkload<ClBatchToSpaceNdWorkload>(descriptor, info, m_CLCompileContext);
814 }
815 
CreateCast(const CastQueueDescriptor & descriptor,const WorkloadInfo & info) const816 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateCast(const CastQueueDescriptor& descriptor,
817                                                          const WorkloadInfo& info) const
818 {
819     return MakeWorkload<ClCastWorkload>(descriptor, info, m_CLCompileContext);
820 }
821 
CreateChannelShuffle(const ChannelShuffleQueueDescriptor & descriptor,const WorkloadInfo & info) const822 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateChannelShuffle(const ChannelShuffleQueueDescriptor& descriptor,
823                                                                    const WorkloadInfo& info) const
824 {
825     return MakeWorkload<ClChannelShuffleWorkload>(descriptor, info, m_CLCompileContext);
826 }
827 
CreateComparison(const ComparisonQueueDescriptor & descriptor,const WorkloadInfo & info) const828 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateComparison(const ComparisonQueueDescriptor& descriptor,
829                                                                const WorkloadInfo& info) const
830 {
831     return MakeWorkload<ClComparisonWorkload>(descriptor, info, m_CLCompileContext);
832 }
833 
CreateConcat(const ConcatQueueDescriptor & descriptor,const WorkloadInfo & info) const834 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConcat(const ConcatQueueDescriptor& descriptor,
835                                                            const WorkloadInfo& info) const
836 {
837     return MakeWorkload<ClConcatWorkload>(descriptor, info, m_CLCompileContext);
838 }
839 
CreateConstant(const ConstantQueueDescriptor & descriptor,const WorkloadInfo & info) const840 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
841                                                              const WorkloadInfo& info) const
842 {
843     return MakeWorkload<ClConstantWorkload>(descriptor, info, m_CLCompileContext);
844 }
845 
CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor & descriptor,const WorkloadInfo & info) const846 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp16ToFp32(
847     const ConvertFp16ToFp32QueueDescriptor& descriptor,
848     const WorkloadInfo& info) const
849 {
850     return MakeWorkload<ClConvertFp16ToFp32Workload>(descriptor, info, m_CLCompileContext);
851 }
852 
CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor & descriptor,const WorkloadInfo & info) const853 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp32ToFp16(
854     const ConvertFp32ToFp16QueueDescriptor& descriptor,
855     const WorkloadInfo& info) const
856 {
857     return MakeWorkload<ClConvertFp32ToFp16Workload>(descriptor, info, m_CLCompileContext);
858 }
859 
CreateConvolution2d(const Convolution2dQueueDescriptor & descriptor,const WorkloadInfo & info) const860 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
861                                                                   const WorkloadInfo& info) const
862 {
863     bool isFastMathEnabled = false;
864     if (m_ModelContextPtr)
865     {
866         if (m_ModelContextPtr.get() != nullptr)
867         {
868             auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
869             if (modelOptions)
870             {
871                 isFastMathEnabled = modelOptions->IsFastMathEnabled();
872             }
873         }
874     }
875     return MakeWorkload<ClConvolution2dWorkload>(descriptor,
876                                                  info,
877                                                  m_MemoryManager->GetIntraLayerManager(),
878                                                  m_CLCompileContext,
879                                                  isFastMathEnabled);
880 }
881 
CreateConvolution3d(const Convolution3dQueueDescriptor & descriptor,const WorkloadInfo & info) const882 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvolution3d(const Convolution3dQueueDescriptor& descriptor,
883                                                                   const WorkloadInfo& info) const
884 {
885     bool isFastMathEnabled = false;
886     if (m_ModelContextPtr)
887     {
888         if (m_ModelContextPtr.get() != nullptr)
889         {
890             auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
891             if (modelOptions)
892             {
893                 isFastMathEnabled = modelOptions->IsFastMathEnabled();
894             }
895         }
896     }
897     return MakeWorkload<ClConvolution3dWorkload>(descriptor,
898                                                  info,
899                                                  m_MemoryManager->GetIntraLayerManager(),
900                                                  m_CLCompileContext,
901                                                  isFastMathEnabled);
902 }
903 
CreateDebug(const DebugQueueDescriptor & descriptor,const WorkloadInfo & info) const904 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
905                                                           const WorkloadInfo& info) const
906 {
907     return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
908 }
909 
CreateDepthToSpace(const DepthToSpaceQueueDescriptor & descriptor,const WorkloadInfo & info) const910 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor,
911                                                                  const WorkloadInfo& info) const
912 {
913     return MakeWorkload<ClDepthToSpaceWorkload>(descriptor, info, m_CLCompileContext);
914 }
915 
CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor & descriptor,const WorkloadInfo & info) const916 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthwiseConvolution2d(
917     const DepthwiseConvolution2dQueueDescriptor& descriptor,
918     const WorkloadInfo& info) const
919 {
920     return MakeWorkload<ClDepthwiseConvolutionWorkload>(descriptor, info, m_CLCompileContext);
921 }
922 
CreateDequantize(const DequantizeQueueDescriptor & descriptor,const WorkloadInfo & info) const923 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDequantize(const DequantizeQueueDescriptor& descriptor,
924                                                                const WorkloadInfo& info) const
925 {
926     return MakeWorkload<ClDequantizeWorkload>(descriptor, info, m_CLCompileContext);
927 }
928 
CreateDetectionPostProcess(const DetectionPostProcessQueueDescriptor & descriptor,const WorkloadInfo & info) const929 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDetectionPostProcess(
930     const DetectionPostProcessQueueDescriptor& descriptor,
931     const WorkloadInfo& info) const
932 {
933     return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
934 }
935 
CreateDivision(const DivisionQueueDescriptor & descriptor,const WorkloadInfo & info) const936 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& descriptor,
937                                                              const WorkloadInfo& info) const
938 {
939     return std::make_unique<ClDivisionWorkload>(descriptor, info, m_CLCompileContext);
940 }
941 
CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor & descriptor,const WorkloadInfo & info) const942 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor& descriptor,
943                                                                      const WorkloadInfo& info) const
944 {
945     switch(descriptor.m_Parameters.m_Operation)
946     {
947         case UnaryOperation::Abs:
948         {
949             AbsQueueDescriptor absQueueDescriptor;
950             absQueueDescriptor.m_Inputs  = descriptor.m_Inputs;
951             absQueueDescriptor.m_Outputs = descriptor.m_Outputs;
952 
953             return  std::make_unique<ClAbsWorkload>(absQueueDescriptor, info, m_CLCompileContext);
954         }
955         case UnaryOperation::Exp:
956             return std::make_unique<ClExpWorkload>(descriptor, info, m_CLCompileContext);
957          case UnaryOperation::Log:
958             return std::make_unique<ClLogWorkload>(descriptor, info, m_CLCompileContext);
959         case UnaryOperation::LogicalNot:
960             return std::make_unique<ClLogicalNotWorkload>(descriptor, info, m_CLCompileContext);
961         case UnaryOperation::Neg:
962             return std::make_unique<ClNegWorkload>(descriptor, info, m_CLCompileContext);
963         case UnaryOperation::Rsqrt:
964         {
965             RsqrtQueueDescriptor rsqrtQueueDescriptor;
966             rsqrtQueueDescriptor.m_Inputs  = descriptor.m_Inputs;
967             rsqrtQueueDescriptor.m_Outputs = descriptor.m_Outputs;
968 
969             return std::make_unique<ClRsqrtWorkload>(rsqrtQueueDescriptor, info, m_CLCompileContext);
970         }
971         case UnaryOperation::Sin:
972             return std::make_unique<ClSinWorkload>(descriptor, info, m_CLCompileContext);
973         default:
974             return nullptr;
975     }
976 }
977 
CreateFill(const FillQueueDescriptor & descriptor,const WorkloadInfo & info) const978 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFill(const FillQueueDescriptor& descriptor,
979                                                          const WorkloadInfo& info) const
980 {
981     return std::make_unique<ClFillWorkload>(descriptor, info, m_CLCompileContext);
982 }
983 
CreateFloor(const FloorQueueDescriptor & descriptor,const WorkloadInfo & info) const984 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
985                                                           const WorkloadInfo& info) const
986 {
987     return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
988 }
989 
CreateFullyConnected(const FullyConnectedQueueDescriptor & descriptor,const WorkloadInfo & info) const990 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor,
991                                                                    const WorkloadInfo& info) const
992 {
993     return MakeWorkload<ClFullyConnectedWorkload>(descriptor,
994                                                   info,
995                                                   m_MemoryManager->GetIntraLayerManager(),
996                                                   m_CLCompileContext);
997 }
998 
CreateGather(const GatherQueueDescriptor & descriptor,const WorkloadInfo & info) const999 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateGather(const GatherQueueDescriptor& descriptor,
1000                                                            const WorkloadInfo& info) const
1001 {
1002     return MakeWorkload<ClGatherWorkload>(descriptor, info, m_CLCompileContext);
1003 }
1004 
CreateInput(const InputQueueDescriptor & descriptor,const WorkloadInfo & info) const1005 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
1006                                                           const WorkloadInfo& info) const
1007 {
1008     return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
1009 }
1010 
CreateInstanceNormalization(const InstanceNormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const1011 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateInstanceNormalization(
1012     const InstanceNormalizationQueueDescriptor& descriptor,
1013     const WorkloadInfo& info) const
1014 {
1015     return MakeWorkload<ClInstanceNormalizationWorkload>(descriptor, info, m_CLCompileContext);
1016 }
1017 
CreateL2Normalization(const L2NormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const1018 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
1019                                                                     const WorkloadInfo& info) const
1020 {
1021     return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
1022 }
1023 
CreateLogicalBinary(const LogicalBinaryQueueDescriptor & descriptor,const WorkloadInfo & info) const1024 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLogicalBinary(const LogicalBinaryQueueDescriptor& descriptor,
1025                                                                   const WorkloadInfo& info) const
1026 {
1027     switch(descriptor.m_Parameters.m_Operation)
1028     {
1029         case LogicalBinaryOperation::LogicalAnd:
1030             return std::make_unique<ClLogicalAndWorkload>(descriptor, info, m_CLCompileContext);
1031         case LogicalBinaryOperation::LogicalOr:
1032             return std::make_unique<ClLogicalOrWorkload>(descriptor, info, m_CLCompileContext);
1033         default:
1034             return nullptr;
1035     }
1036 }
1037 
CreateLogSoftmax(const LogSoftmaxQueueDescriptor & descriptor,const WorkloadInfo & info) const1038 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLogSoftmax(const LogSoftmaxQueueDescriptor& descriptor,
1039                                                                const WorkloadInfo& info) const
1040 {
1041     return MakeWorkload<ClLogSoftmaxWorkload>(descriptor,
1042                                               info,
1043                                               m_MemoryManager->GetIntraLayerManager(),
1044                                               m_CLCompileContext);
1045 }
1046 
CreateLstm(const LstmQueueDescriptor & descriptor,const WorkloadInfo & info) const1047 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
1048                                                          const WorkloadInfo& info) const
1049 {
1050     return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
1051 }
1052 
CreateMaximum(const MaximumQueueDescriptor & descriptor,const WorkloadInfo & info) const1053 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMaximum(const MaximumQueueDescriptor& descriptor,
1054                                                             const WorkloadInfo& info) const
1055 {
1056     return MakeWorkload<ClMaximumWorkload>(descriptor, info, m_CLCompileContext);
1057 }
1058 
CreateMean(const MeanQueueDescriptor & descriptor,const WorkloadInfo & info) const1059 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
1060                                                          const WorkloadInfo& info) const
1061 {
1062     return MakeWorkload<ClMeanWorkload>(descriptor, info, m_CLCompileContext);
1063 }
1064 
CreateMemCopy(const MemCopyQueueDescriptor & descriptor,const WorkloadInfo & info) const1065 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
1066                                                             const WorkloadInfo& info) const
1067 {
1068     if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
1069     {
1070         throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemCopy workload");
1071     }
1072 
1073     return MakeWorkload<CopyMemGenericWorkload>(descriptor, info);
1074 }
1075 
CreateMemImport(const MemImportQueueDescriptor & descriptor,const WorkloadInfo & info) const1076 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor,
1077                                                               const WorkloadInfo& info) const
1078 {
1079     if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
1080     {
1081         throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemImport workload");
1082     }
1083 
1084     return std::make_unique<ImportMemGenericWorkload>(descriptor, info);
1085 }
1086 
CreateMinimum(const MinimumQueueDescriptor & descriptor,const WorkloadInfo & info) const1087 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMinimum(const MinimumQueueDescriptor& descriptor,
1088                                                             const WorkloadInfo& info) const
1089 {
1090     return MakeWorkload<ClMinimumWorkload>(descriptor, info, m_CLCompileContext);
1091 }
1092 
CreateMultiplication(const MultiplicationQueueDescriptor & descriptor,const WorkloadInfo & info) const1093 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& descriptor,
1094                                                                    const WorkloadInfo& info) const
1095 {
1096     return MakeWorkload<ClMultiplicationWorkload>(descriptor, info, m_CLCompileContext);
1097 }
1098 
CreateNormalization(const NormalizationQueueDescriptor & descriptor,const WorkloadInfo & info) const1099 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor,
1100                                                                   const WorkloadInfo& info) const
1101 {
1102     return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
1103 }
1104 
CreateOutput(const OutputQueueDescriptor & descriptor,const WorkloadInfo & info) const1105 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
1106                                                            const WorkloadInfo& info) const
1107 {
1108     return std::make_unique<CopyMemGenericWorkload>(descriptor, info);
1109 }
1110 
CreatePad(const PadQueueDescriptor & descriptor,const WorkloadInfo & info) const1111 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
1112                                                         const WorkloadInfo& info) const
1113 {
1114     return MakeWorkload<ClPadWorkload>(descriptor, info, m_CLCompileContext);
1115 }
1116 
CreatePermute(const PermuteQueueDescriptor & descriptor,const WorkloadInfo & info) const1117 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
1118                                                             const WorkloadInfo& info) const
1119 {
1120     return MakeWorkload<ClPermuteWorkload>(descriptor, info, m_CLCompileContext);
1121 }
1122 
CreatePooling2d(const Pooling2dQueueDescriptor & descriptor,const WorkloadInfo & info) const1123 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
1124                                                               const WorkloadInfo& info) const
1125 {
1126     return MakeWorkload<ClPooling2dWorkload>(descriptor, info, m_CLCompileContext);
1127 }
1128 
CreatePreCompiled(const PreCompiledQueueDescriptor & descriptor,const WorkloadInfo & info) const1129 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor,
1130                                                                 const WorkloadInfo& info) const
1131 {
1132     return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
1133 }
1134 
CreatePrelu(const PreluQueueDescriptor & descriptor,const WorkloadInfo & info) const1135 std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePrelu(const PreluQueueDescriptor &descriptor,
1136                                                           const WorkloadInfo &info) const
1137 {
1138     return MakeWorkload<ClPreluWorkload>(descriptor, info, m_CLCompileContext);
1139 }
1140 
CreateQLstm(const QLstmQueueDescriptor & descriptor,const WorkloadInfo & info) const1141 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQLstm(const QLstmQueueDescriptor& descriptor,
1142                                                           const WorkloadInfo& info) const
1143 {
1144     return std::make_unique<ClQLstmWorkload>(descriptor, info, m_CLCompileContext);
1145 }
1146 
CreateQuantize(const QuantizeQueueDescriptor & descriptor,const WorkloadInfo & info) const1147 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQuantize(const QuantizeQueueDescriptor& descriptor,
1148                                                              const WorkloadInfo& info) const
1149 {
1150     return MakeWorkload<ClQuantizeWorkload>(descriptor, info, m_CLCompileContext);
1151 }
1152 
CreateQuantizedLstm(const QuantizedLstmQueueDescriptor & descriptor,const WorkloadInfo & info) const1153 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQuantizedLstm(const QuantizedLstmQueueDescriptor& descriptor,
1154                                                                   const WorkloadInfo& info) const
1155 {
1156     return MakeWorkload<ClQuantizedLstmWorkload>(descriptor, info, m_CLCompileContext);
1157 }
1158 
CreateRank(const RankQueueDescriptor & descriptor,const WorkloadInfo & info) const1159 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateRank(const RankQueueDescriptor& descriptor,
1160                                                          const WorkloadInfo& info) const
1161 {
1162     return std::make_unique<ClRankWorkload>(descriptor, info);
1163 }
1164 
CreateReduce(const ReduceQueueDescriptor & descriptor,const WorkloadInfo & info) const1165 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor,
1166                                                            const WorkloadInfo& info) const
1167 {
1168     return std::make_unique<ClReduceWorkload>(descriptor, info);
1169 }
1170 
CreateReshape(const ReshapeQueueDescriptor & descriptor,const WorkloadInfo & info) const1171 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
1172                                                             const WorkloadInfo& info) const
1173 {
1174     return MakeWorkload<ClReshapeWorkload>(descriptor, info, m_CLCompileContext);
1175 }
1176 
CreateResize(const ResizeQueueDescriptor & descriptor,const WorkloadInfo & info) const1177 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor,
1178                                                            const WorkloadInfo& info) const
1179 {
1180     return MakeWorkload<ClResizeWorkload>(descriptor, info, m_CLCompileContext);
1181 }
1182 
CreateSlice(const SliceQueueDescriptor & descriptor,const WorkloadInfo & info) const1183 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSlice(const SliceQueueDescriptor& descriptor,
1184                                                           const WorkloadInfo& info) const
1185 {
1186     return MakeWorkload<ClSliceWorkload>(descriptor, info, m_CLCompileContext);
1187 }
1188 
CreateSoftmax(const SoftmaxQueueDescriptor & descriptor,const WorkloadInfo & info) const1189 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
1190                                                             const WorkloadInfo& info) const
1191 {
1192     return std::make_unique<ClSoftmaxWorkload>(descriptor,
1193                                                info,
1194                                                m_MemoryManager->GetIntraLayerManager(),
1195                                                m_CLCompileContext);
1196 }
1197 
CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor & descriptor,const WorkloadInfo & info) const1198 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
1199                                                                    const WorkloadInfo& info) const
1200 {
1201     return MakeWorkload<ClSpaceToBatchNdWorkload>(descriptor, info, m_CLCompileContext);
1202 }
1203 
CreateSpaceToDepth(const SpaceToDepthQueueDescriptor & descriptor,const WorkloadInfo & info) const1204 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor,
1205                                                                  const WorkloadInfo& info) const
1206 {
1207     return MakeWorkload<ClSpaceToDepthWorkload>(descriptor, info, m_CLCompileContext);
1208 }
1209 
CreateSplitter(const SplitterQueueDescriptor & descriptor,const WorkloadInfo & info) const1210 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
1211                                                              const WorkloadInfo& info) const
1212 {
1213     return MakeWorkload<ClSplitterWorkload>(descriptor, info, m_CLCompileContext);
1214 }
1215 
CreateStack(const StackQueueDescriptor & descriptor,const WorkloadInfo & info) const1216 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateStack(const StackQueueDescriptor& descriptor,
1217                                                           const WorkloadInfo& info) const
1218 {
1219     return MakeWorkload<ClStackWorkload>(descriptor, info, m_CLCompileContext);
1220 }
1221 
CreateStridedSlice(const StridedSliceQueueDescriptor & descriptor,const WorkloadInfo & info) const1222 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
1223                                                                  const WorkloadInfo& info) const
1224 {
1225     return MakeWorkload<ClStridedSliceWorkload>(descriptor, info, m_CLCompileContext);
1226 }
1227 
CreateSubtraction(const SubtractionQueueDescriptor & descriptor,const WorkloadInfo & info) const1228 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor,
1229                                                                 const WorkloadInfo& info) const
1230 {
1231     return MakeWorkload<ClSubtractionWorkload>(descriptor, info, m_CLCompileContext);
1232 }
1233 
CreateTranspose(const TransposeQueueDescriptor & descriptor,const WorkloadInfo & info) const1234 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateTranspose(const TransposeQueueDescriptor& descriptor,
1235                                                               const WorkloadInfo& info) const
1236 {
1237     return MakeWorkload<ClTransposeWorkload>(descriptor, info, m_CLCompileContext);
1238 }
1239 
CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor & descriptor,const WorkloadInfo & info) const1240 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateTransposeConvolution2d(
1241     const TransposeConvolution2dQueueDescriptor& descriptor,
1242     const WorkloadInfo& info) const
1243 {
1244     return MakeWorkload<ClTransposeConvolution2dWorkload>(descriptor,
1245                                                           info,
1246                                                           m_MemoryManager->GetIntraLayerManager(),
1247                                                           m_CLCompileContext);
1248 }
1249 
1250 } // namespace armnn
1251