xref: /aosp_15_r20/external/armnn/src/backends/neon/NeonBackend.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NeonBackend.hpp"
7 #include "NeonBackendId.hpp"
8 #include "NeonBackendModelContext.hpp"
9 #include "NeonWorkloadFactory.hpp"
10 #include "NeonLayerSupport.hpp"
11 #include "NeonTensorHandleFactory.hpp"
12 
13 #include <armnn/BackendRegistry.hpp>
14 #include <armnn/Descriptors.hpp>
15 
16 #include <aclCommon/ArmComputeSubgraphUtils.hpp>
17 #include <aclCommon/ArmComputeUtils.hpp>
18 #include <aclCommon/BaseMemoryManager.hpp>
19 
20 #include <armnn/backends/IBackendContext.hpp>
21 #include <armnn/backends/IMemoryManager.hpp>
22 
23 #include <armnn/utility/PolymorphicDowncast.hpp>
24 
25 #include <neon/workloads/NeonAdditionWorkload.hpp>
26 #include <neon/workloads/NeonBatchNormalizationWorkload.hpp>
27 #include <neon/workloads/NeonConvolution2dWorkload.hpp>
28 #include <neon/workloads/NeonDepthwiseConvolutionWorkload.hpp>
29 #include <neon/workloads/NeonDivisionWorkload.hpp>
30 #include <neon/workloads/NeonFullyConnectedWorkload.hpp>
31 #include <neon/workloads/NeonMultiplicationWorkload.hpp>
32 #include <neon/workloads/NeonReduceWorkload.hpp>
33 #include <neon/workloads/NeonSubtractionWorkload.hpp>
34 #include <backendsCommon/DefaultAllocator.hpp>
35 
36 #include <Optimizer.hpp>
37 
38 #include <arm_compute/core/Types.h>
39 #include <arm_compute/runtime/Allocator.h>
40 
41 namespace armnn
42 {
43 
GetIdStatic()44 const BackendId& NeonBackend::GetIdStatic()
45 {
46     static const BackendId s_Id{NeonBackendId()};
47     return s_Id;
48 }
49 
CreateMemoryManager() const50 IBackendInternal::IMemoryManagerUniquePtr NeonBackend::CreateMemoryManager() const
51 {
52     return std::make_unique<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
53                                                BaseMemoryManager::MemoryAffinity::Offset);
54 }
55 
CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr & memoryManager) const56 IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
57     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
58 {
59     return std::make_unique<NeonWorkloadFactory>(
60         PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
61 }
62 
CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const ModelOptions & modelOptions) const63 IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
64     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
65 {
66     return std::make_unique<NeonWorkloadFactory>(
67         PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
68 }
69 
CreateWorkloadFactory(class TensorHandleFactoryRegistry & tensorHandleFactoryRegistry) const70 IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
71     class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const
72 {
73     auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
74                                                              BaseMemoryManager::MemoryAffinity::Offset);
75 
76     tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
77 
78     auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
79     // Register copy and import factory pair
80     tensorHandleFactoryRegistry.RegisterCopyAndImportFactoryPair(factory->GetId(), factory->GetId());
81     // Register the factory
82     tensorHandleFactoryRegistry.RegisterFactory(std::move(factory));
83 
84 
85     return std::make_unique<NeonWorkloadFactory>(
86         PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
87 }
88 
CreateWorkloadFactory(TensorHandleFactoryRegistry & tensorHandleFactoryRegistry,const ModelOptions & modelOptions) const89 IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
90     TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, const ModelOptions& modelOptions) const
91 {
92     auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
93                                                              BaseMemoryManager::MemoryAffinity::Offset);
94 
95     tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
96 
97     auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
98     // Register copy and import factory pair
99     tensorHandleFactoryRegistry.RegisterCopyAndImportFactoryPair(factory->GetId(), factory->GetId());
100     // Register the factory
101     tensorHandleFactoryRegistry.RegisterFactory(std::move(factory));
102 
103     return std::make_unique<NeonWorkloadFactory>(
104         PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
105 }
106 
CreateBackendContext(const IRuntime::CreationOptions &) const107 IBackendInternal::IBackendContextPtr NeonBackend::CreateBackendContext(const IRuntime::CreationOptions&) const
108 {
109     return IBackendContextPtr{};
110 }
111 
CreateBackendProfilingContext(const IRuntime::CreationOptions &,IBackendProfilingPtr &)112 IBackendInternal::IBackendProfilingContextPtr NeonBackend::CreateBackendProfilingContext(
113     const IRuntime::CreationOptions&, IBackendProfilingPtr&)
114 {
115     return IBackendProfilingContextPtr{};
116 }
117 
CreateBackendSpecificModelContext(const ModelOptions & modelOptions) const118 IBackendInternal::IBackendSpecificModelContextPtr NeonBackend::CreateBackendSpecificModelContext(
119     const ModelOptions& modelOptions) const
120 {
121     return IBackendSpecificModelContextPtr{new NeonBackendModelContext{modelOptions}};
122 }
123 
GetLayerSupport() const124 IBackendInternal::ILayerSupportSharedPtr NeonBackend::GetLayerSupport() const
125 {
126     static ILayerSupportSharedPtr layerSupport
127         {
128             new NeonLayerSupport(IBackendInternal::IBackendSpecificModelContextPtr{})
129         };
130     return layerSupport;
131 }
132 
GetLayerSupport(const ModelOptions & modelOptions) const133 IBackendInternal::ILayerSupportSharedPtr NeonBackend::GetLayerSupport(const ModelOptions& modelOptions) const
134 {
135     static ILayerSupportSharedPtr layerSupport
136         {
137             new NeonLayerSupport(CreateBackendSpecificModelContext(modelOptions))
138         };
139     return layerSupport;
140 }
141 
OptimizeSubgraphView(const SubgraphView & subgraph,const ModelOptions & modelOptions) const142 OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
143                                                     const ModelOptions& modelOptions) const
144 {
145     OptimizationViews optimizationViews(modelOptions);
146 
147     auto it = subgraph.endIConnectable();
148     std::map<LayerGuid, Layer*> untouched;
149 
150     while (it != subgraph.beginIConnectable())
151     {
152         --it;
153         Layer& base = *(PolymorphicDowncast<Layer*>(*it));
154         untouched.insert({base.GetGuid(), &base});
155     }
156 
157     it = subgraph.endIConnectable();
158     while (it != subgraph.beginIConnectable())
159     {
160         --it;
161         Layer& base = *(PolymorphicDowncast<Layer*>(*it));
162 
163         // Fuse activation into previous layer if supported by backend
164         if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d
165              || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected
166              || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication
167              || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division)
168             && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
169         {
170             for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
171             {
172                 if (output->GetNumConnections() == 1)
173                 {
174                     for (auto&& childInput : output->GetConnections())
175                     {
176                         if ((childInput->GetOwningLayer().GetType() == LayerType::Activation) &&
177                             (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
178                         {
179                             Layer& child = childInput->GetOwningLayer();
180 
181                             auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
182 
183                             const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
184                                                      base.GetName();
185 
186                             // Get params from activation layer
187                             ActivationDescriptor activationDesc = activationLayer->GetParameters();
188 
189                             if (base.GetType() == LayerType::Convolution2d)
190                             {
191                                 Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);
192 
193                                 Optional<TensorInfo> biases;
194 
195                                 if (baseLayer->GetParameters().m_BiasEnabled)
196                                 {
197                                     biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
198                                 }
199 
200                                 arm_compute::Status status = NeonConvolution2dWorkloadValidate(
201                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
202                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
203                                         baseLayer->GetParameters(),
204                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
205                                         biases,
206                                         false,
207                                         &activationDesc);
208 
209                                 if (status)
210                                 {
211                                     FuseConvolution2dLayer<Convolution2dLayer>(optimizationViews,
212                                                                                baseLayer,
213                                                                                activationLayer,
214                                                                                activationDesc,
215                                                                                name);
216                                     untouched.erase(baseLayer->GetGuid());
217                                     untouched.erase(activationLayer->GetGuid());
218                                 }
219                             }
220                             else if (base.GetType() == LayerType::DepthwiseConvolution2d)
221                             {
222                                 DepthwiseConvolution2dLayer* baseLayer =
223                                         PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
224 
225                                 Optional<TensorInfo> biases;
226 
227                                 if (baseLayer->GetParameters().m_BiasEnabled)
228                                 {
229                                     biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
230                                 }
231 
232                                 arm_compute::Status status = NeonDepthwiseConvolutionWorkloadValidate(
233                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
234                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
235                                         baseLayer->GetParameters(),
236                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
237                                         biases,
238                                         &activationDesc);
239 
240                                 if (status)
241                                 {
242                                     FuseDepthwiseConvolution2dLayer<DepthwiseConvolution2dLayer>(optimizationViews,
243                                                                                                  baseLayer,
244                                                                                                  activationLayer,
245                                                                                                  activationDesc,
246                                                                                                  name);
247                                     untouched.erase(baseLayer->GetGuid());
248                                     untouched.erase(activationLayer->GetGuid());
249                                 }
250                             }
251                             else if (base.GetType() == LayerType::FullyConnected)
252                             {
253                                 FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
254                                 FullyConnectedDescriptor descriptor = baseLayer->GetParameters();
255 
256                                 // As bias is optional only try to get TensorInfo from input if bias is enabled.
257                                 Optional<TensorInfo> biases;
258                                 if (descriptor.m_BiasEnabled)
259                                 {
260                                     biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
261                                 }
262 
263                                 arm_compute::Status status = NeonFullyConnectedWorkloadValidate(
264                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
265                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
266                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
267                                         biases,
268                                         baseLayer->GetParameters(),
269                                         &activationDesc);
270 
271                                 if (status)
272                                 {
273                                     FuseFullyConnectedLayer<FullyConnectedLayer>(optimizationViews,
274                                                                                  baseLayer,
275                                                                                  activationLayer,
276                                                                                  activationDesc,
277                                                                                  name);
278                                     untouched.erase(baseLayer->GetGuid());
279                                     untouched.erase(activationLayer->GetGuid());
280                                 }
281                             }
282                             else if (base.GetType() == LayerType::BatchNormalization)
283                             {
284                                 BatchNormalizationLayer* baseLayer =
285                                         PolymorphicDowncast<BatchNormalizationLayer*>(&base);
286 
287                                 arm_compute::Status status = NeonBatchNormalizationValidate(
288                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
289                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
290                                         baseLayer->m_Mean->GetTensorInfo(),
291                                         baseLayer->m_Variance->GetTensorInfo(),
292                                         baseLayer->m_Beta->GetTensorInfo(),
293                                         baseLayer->m_Gamma->GetTensorInfo(),
294                                         baseLayer->GetParameters(),
295                                         &activationDesc);
296 
297                                 if (status)
298                                 {
299                                     BatchNormalizationLayer* replacementLayer =
300                                         FuseBatchNormalizationLayer<BatchNormalizationLayer>(optimizationViews,
301                                                                                              baseLayer,
302                                                                                              activationLayer,
303                                                                                              activationDesc,
304                                                                                              name);
305 
306                                     replacementLayer->m_Beta     = std::move(baseLayer->m_Beta);
307                                     replacementLayer->m_Gamma    = std::move(baseLayer->m_Gamma);
308                                     replacementLayer->m_Mean     = std::move(baseLayer->m_Mean);
309                                     replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
310                                     untouched.erase(baseLayer->GetGuid());
311                                     untouched.erase(activationLayer->GetGuid());
312                                 }
313                             }
314                             else if (base.GetType() == LayerType::Addition)
315                             {
316                                 AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
317 
318                                 arm_compute::Status status = NeonAdditionWorkloadValidate(
319                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
320                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
321                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
322                                         &activationDesc);
323 
324                                 if (status)
325                                 {
326                                     FuseAdditionLayer<AdditionLayer>(optimizationViews,
327                                                                      baseLayer,
328                                                                      activationLayer,
329                                                                      activationDesc,
330                                                                      name);
331                                     untouched.erase(baseLayer->GetGuid());
332                                     untouched.erase(activationLayer->GetGuid());
333                                 }
334                             }
335                             else if (base.GetType() == LayerType::Division)
336                             {
337                                 DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
338 
339                                 arm_compute::Status status = NeonDivisionWorkloadValidate(
340                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
341                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
342                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
343                                         &activationDesc);
344 
345                                 if (status)
346                                 {
347                                     FuseDivisionLayer<DivisionLayer>(optimizationViews,
348                                                                      baseLayer,
349                                                                      activationLayer,
350                                                                      activationDesc,
351                                                                      name);
352                                     untouched.erase(baseLayer->GetGuid());
353                                     untouched.erase(activationLayer->GetGuid());
354                                 }
355                             }
356                             else if (base.GetType() == LayerType::Multiplication)
357                             {
358                                 MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);
359 
360                                 arm_compute::Status status = NeonMultiplicationWorkloadValidate(
361                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
362                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
363                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
364                                         &activationDesc);
365 
366                                 if (status)
367                                 {
368                                     FuseMultiplicationLayer<MultiplicationLayer>(optimizationViews,
369                                                                                  baseLayer,
370                                                                                  activationLayer,
371                                                                                  activationDesc,
372                                                                                  name);
373                                     untouched.erase(baseLayer->GetGuid());
374                                     untouched.erase(activationLayer->GetGuid());
375                                 }
376                             }
377                             else if (base.GetType() == LayerType::Subtraction)
378                             {
379                                 SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
380 
381                                 arm_compute::Status status = NeonSubtractionWorkloadValidate(
382                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
383                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
384                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
385                                         &activationDesc);
386 
387                                 if (status)
388                                 {
389                                     FuseSubtractionLayer<SubtractionLayer>(optimizationViews,
390                                                                            baseLayer,
391                                                                            activationLayer,
392                                                                            activationDesc,
393                                                                            name);
394                                     untouched.erase(baseLayer->GetGuid());
395                                     untouched.erase(activationLayer->GetGuid());
396                                 }
397                             }
398                             else if (base.GetType() == LayerType::ElementwiseBinary)
399                             {
400                                 ElementwiseBinaryLayer* baseLayer = PolymorphicDowncast<ElementwiseBinaryLayer*>(&base);
401 
402                                 if (baseLayer->GetParameters().m_Operation == BinaryOperation::Add)
403                                 {
404                                     arm_compute::Status status = NeonAdditionWorkloadValidate(
405                                             baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
406                                             baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
407                                             activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
408                                             &activationDesc);
409 
410                                     if (status)
411                                     {
412                                         FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
413                                                                                            baseLayer,
414                                                                                            activationLayer,
415                                                                                            activationDesc,
416                                                                                            BinaryOperation::Add,
417                                                                                            name);
418                                         untouched.erase(baseLayer->GetGuid());
419                                         untouched.erase(activationLayer->GetGuid());
420                                     }
421                                 }
422                                 else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Div)
423                                 {
424                                     arm_compute::Status status = NeonDivisionWorkloadValidate(
425                                             baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
426                                             baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
427                                             activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
428                                             &activationDesc);
429 
430                                     if (status)
431                                     {
432                                         FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
433                                                                                            baseLayer,
434                                                                                            activationLayer,
435                                                                                            activationDesc,
436                                                                                            BinaryOperation::Div,
437                                                                                            name);
438                                         untouched.erase(baseLayer->GetGuid());
439                                         untouched.erase(activationLayer->GetGuid());
440                                     }
441                                 }
442                                 else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Mul)
443                                 {
444                                     arm_compute::Status status = NeonMultiplicationWorkloadValidate(
445                                             baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
446                                             baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
447                                             activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
448                                             &activationDesc);
449 
450                                     if (status)
451                                     {
452                                         FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
453                                                                                            baseLayer,
454                                                                                            activationLayer,
455                                                                                            activationDesc,
456                                                                                            BinaryOperation::Mul,
457                                                                                            name);
458                                         untouched.erase(baseLayer->GetGuid());
459                                         untouched.erase(activationLayer->GetGuid());
460                                     }
461                                 }
462                                 else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Sub)
463                                 {
464                                     arm_compute::Status status = NeonSubtractionWorkloadValidate(
465                                             baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
466                                             baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
467                                             activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
468                                             &activationDesc);
469 
470                                     if (status)
471                                     {
472                                         FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
473                                                                                            baseLayer,
474                                                                                            activationLayer,
475                                                                                            activationDesc,
476                                                                                            BinaryOperation::Sub,
477                                                                                            name);
478                                         untouched.erase(baseLayer->GetGuid());
479                                         untouched.erase(activationLayer->GetGuid());
480                                     }
481                                 }
482                                 // No fusion available for other BinaryOperations
483                             }
484                         }
485                     }
486                 }
487             }
488         }
489 
490         // Separate reduce layer with multiple axes into multiple reduce layers with 1 axis.
491         if (base.GetType() == LayerType::Reduce)
492         {
493             ReduceLayer* baseLayer            = PolymorphicDowncast<ReduceLayer*>(&base);
494             ReduceDescriptor reduceDescriptor = baseLayer->GetParameters();
495 
496             if (!reduceDescriptor.m_vAxis.empty() && reduceDescriptor.m_vAxis.size() > 1)
497             {
498                 // Add new layers to the graph and connect them.
499                 std::vector<IConnectableLayer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
500                                                                                         baseLayer,
501                                                                                         reduceDescriptor);
502 
503                 // Replace existing baselayer with new subgraph.
504                 ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
505                 untouched.erase(baseLayer->GetGuid());
506             }
507         }
508     }
509 
510     if (optimizationViews.GetSubstitutions().empty())
511     {
512         optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
513     }
514     else
515     {
516         ReportUntouchedLayers(optimizationViews, untouched);
517     }
518 
519     return optimizationViews;
520 }
521 
GetHandleFactoryPreferences() const522 std::vector<ITensorHandleFactory::FactoryId> NeonBackend::GetHandleFactoryPreferences() const
523 {
524     return std::vector<ITensorHandleFactory::FactoryId>() = { NeonTensorHandleFactory::GetIdStatic() };
525 }
526 
RegisterTensorHandleFactories(class TensorHandleFactoryRegistry & registry)527 void NeonBackend::RegisterTensorHandleFactories(class TensorHandleFactoryRegistry& registry)
528 {
529     auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
530                                                              BaseMemoryManager::MemoryAffinity::Offset);
531 
532     registry.RegisterMemoryManager(memoryManager);
533 
534     auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
535     // Register copy and import factory pair
536     registry.RegisterCopyAndImportFactoryPair(factory->GetId(), factory->GetId());
537     // Register the factory
538     registry.RegisterFactory(std::move(factory));
539 }
540 
GetDefaultAllocator() const541 std::unique_ptr<ICustomAllocator> NeonBackend::GetDefaultAllocator() const
542 {
543     return std::make_unique<DefaultAllocator>();
544 }
545 
546 
547 } // namespace armnn
548