1 // 2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved. 3 // SPDX-License-Identifier: MIT 4 // 5 #include "NeonPooling3dWorkload.hpp" 6 #include "NeonWorkloadUtils.hpp" 7 #include <neon/NeonLayerSupport.hpp> 8 #include <neon/NeonTensorHandle.hpp> 9 #include <aclCommon/ArmComputeUtils.hpp> 10 #include <aclCommon/ArmComputeTensorUtils.hpp> 11 12 namespace armnn 13 { 14 using namespace armcomputetensorutils; NeonPooling3dWorkloadValidate(const TensorInfo & input,const TensorInfo & output,const Pooling3dDescriptor & descriptor)15 arm_compute::Status NeonPooling3dWorkloadValidate(const TensorInfo& input, 16 const TensorInfo& output, 17 const Pooling3dDescriptor& descriptor) 18 { 19 const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); 20 const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); 21 arm_compute::Pooling3dLayerInfo layerInfo = BuildArmComputePooling3dLayerInfo(descriptor); 22 return arm_compute::NEPooling3dLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); 23 } 24 NeonPooling3dWorkload(const Pooling3dQueueDescriptor & descriptor,const WorkloadInfo & info)25 NeonPooling3dWorkload::NeonPooling3dWorkload( const Pooling3dQueueDescriptor& descriptor, 26 const WorkloadInfo& info) 27 : NeonBaseWorkload<Pooling3dQueueDescriptor>(descriptor, info) 28 { 29 // Report Profiling Details 30 ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonPooling3dWorkload_Construct", 31 descriptor.m_Parameters, 32 info, 33 this->GetGuid()); 34 35 m_Data.ValidateInputsOutputs("NeonPooling3dWorkload", 1, 1); 36 37 arm_compute::ITensor& input = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); 38 arm_compute::ITensor& output = static_cast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); 39 40 arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); 41 input.info()->set_data_layout(aclDataLayout); 42 output.info()->set_data_layout(aclDataLayout); 43 44 // flag to use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy 45 // enable fp_mixed_precision for the the FP16 cases that 46 // accumulation reaches a limit beyond which there is no more increment of the value 47 bool fpMixedPrecision = false; 48 49 arm_compute::Pooling3dLayerInfo layerInfo = BuildArmComputePooling3dLayerInfo(m_Data.m_Parameters, 50 fpMixedPrecision); 51 { 52 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "NeonPooling3dWorkload_configure"); 53 54 auto layer = std::make_unique<arm_compute::NEPooling3dLayer>(); 55 layer->configure(&input, &output, layerInfo); 56 m_PoolingLayer.reset(layer.release()); 57 } 58 } Execute() const59 void NeonPooling3dWorkload::Execute() const 60 { 61 ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonPooling3dWorkload_Execute", this->GetGuid()); 62 m_PoolingLayer->run(); 63 } 64 } 65