xref: /aosp_15_r20/external/armnn/src/backends/cl/test/ClCustomAllocatorTests.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2021, 2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include <armnn/backends/ICustomAllocator.hpp>
7 #include <armnn/Descriptors.hpp>
8 #include <armnn/Exceptions.hpp>
9 #include <armnn/INetwork.hpp>
10 #include <armnn/IRuntime.hpp>
11 #include <armnn/Utils.hpp>
12 #include <armnn/BackendRegistry.hpp>
13 
14 #include <cl/ClBackend.hpp>
15 #if defined(ARMCOMPUTENEON_ENABLED)
16 #include <neon/NeonBackend.hpp>
17 #endif
18 #include <doctest/doctest.h>
19 #include <armnn/utility/IgnoreUnused.hpp>
20 // Contains the OpenCl interfaces for mapping memory in the Gpu Page Tables
21 // Requires the OpenCl backend to be included (GpuAcc)
22 #include <arm_compute/core/CL/CLKernelLibrary.h>
23 #include <CL/cl_ext.h>
24 #include <arm_compute/runtime/CL/CLScheduler.h>
25 
26 /** Sample implementation of ICustomAllocator for use with the ClBackend.
27  *  Note: any memory allocated must be host accessible with write access to allow for weights and biases
28  *  to be passed in. Read access is not required.. */
29 class SampleClBackendCustomAllocator : public armnn::ICustomAllocator
30 {
31 public:
32     SampleClBackendCustomAllocator() = default;
33 
allocate(size_t size,size_t alignment)34     void* allocate(size_t size, size_t alignment) override
35     {
36         // If alignment is 0 just use the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE for alignment
37         if (alignment == 0)
38         {
39             alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
40         }
41         size_t space = size + alignment + alignment;
42         auto allocatedMemPtr = std::malloc(space * sizeof(size_t));
43         if (std::align(alignment, size, allocatedMemPtr, space) == nullptr)
44         {
45             throw armnn::Exception("SampleClBackendCustomAllocator::Alignment failed");
46         }
47         return allocatedMemPtr;
48     }
49 
50     /** Interface to be implemented by the child class to free the allocated tensor */
free(void * ptr)51     void free(void* ptr) override
52     {
53         std::free(ptr);
54     }
55 
GetMemorySourceType()56     armnn::MemorySource GetMemorySourceType() override
57     {
58         return armnn::MemorySource::Malloc;
59     }
60 };
61 
CreateTestNetwork(armnn::TensorInfo & inputTensorInfo)62 armnn::INetworkPtr CreateTestNetwork(armnn::TensorInfo& inputTensorInfo)
63 {
64     using namespace armnn;
65 
66     armnn::FullyConnectedDescriptor fullyConnectedDesc;
67     float weightsData[] = {1.0f}; // Identity
68     TensorInfo weightsInfo(TensorShape({1, 1}), DataType::Float32, 0.0f, 0, true);
69     weightsInfo.SetConstant(true);
70     armnn::ConstTensor weights(weightsInfo, weightsData);
71 
72     armnn::INetworkPtr network = armnn::INetwork::Create();
73     armnn::IConnectableLayer* const inputLayer = network->AddInputLayer(0);
74     armnn::IConnectableLayer* const weightsLayer = network->AddConstantLayer(weights, "Weights");
75     armnn::IConnectableLayer* const fullyConnectedLayer =
76         network->AddFullyConnectedLayer(fullyConnectedDesc, "fully connected");
77     armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0);
78 
79     inputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
80     weightsLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(1));
81     fullyConnectedLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
82 
83     weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
84 
85     //Set the tensors in the network.
86 
87     inputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
88 
89     TensorInfo outputTensorInfo(TensorShape({1, 1}), DataType::Float32);
90     fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
91 
92     return network;
93 }
94 
95 TEST_SUITE("ClCustomAllocatorTests")
96 {
97 
98 // This is a copy of the SimpleSample app modified to use a custom
99 // allocator for the clbackend. It creates a FullyConnected network with a single layer
100 // taking a single number as an input
101 TEST_CASE("ClCustomAllocatorTest")
102 {
103     using namespace armnn;
104 
105     float number = 3;
106 
107     // Construct ArmNN network
108     armnn::NetworkId networkIdentifier;
109 
110     TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
111 
112     INetworkPtr myNetwork = CreateTestNetwork(inputTensorInfo);
113 
114     // Create ArmNN runtime
115     IRuntime::CreationOptions options; // default options
116     auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
117     options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}};
118     IRuntimePtr run = IRuntime::Create(options);
119 
120     // Optimise ArmNN network
121     OptimizerOptionsOpaque optOptions;
122     optOptions.SetImportEnabled(true);
123     optOptions.SetExportEnabled(true);
124     armnn::IOptimizedNetworkPtr optNet = Optimize(*myNetwork, {"GpuAcc"}, run->GetDeviceSpec(), optOptions);
125     CHECK(optNet);
126 
127     // Load graph into runtime
128     std::string ignoredErrorMessage;
129     INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
130     run->LoadNetwork(networkIdentifier, std::move(optNet), ignoredErrorMessage, networkProperties);
131 
132     // Creates structures for input & output
133     unsigned int numElements = inputTensorInfo.GetNumElements();
134     size_t totalBytes = numElements * sizeof(float);
135 
136     const size_t alignment =
137             arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
138 
139     void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
140 
141     // Input with negative values
142     auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
143     std::fill_n(inputPtr, numElements, number);
144 
145     void* alignedOutputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
146     auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
147     std::fill_n(outputPtr, numElements, -10.0f);
148 
149     armnn::TensorInfo inputTensorInfo2 = run->GetInputTensorInfo(networkIdentifier, 0);
150     inputTensorInfo2.SetConstant(true);
151     armnn::InputTensors inputTensors
152     {
153         {0, armnn::ConstTensor(inputTensorInfo2, alignedInputPtr)},
154     };
155     armnn::OutputTensors outputTensors
156     {
157         {0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), alignedOutputPtr)}
158     };
159 
160     // Execute network
161     run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
162     run->UnloadNetwork(networkIdentifier);
163 
164 
165     // Tell the CLBackend to sync memory so we can read the output.
166     arm_compute::CLScheduler::get().sync();
167     auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
168 
169     run->UnloadNetwork(networkIdentifier);
170     CHECK(outputResult[0] == number);
171     auto& backendRegistry = armnn::BackendRegistryInstance();
172     backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic());
173 }
174 
175 // Only run this test if NEON is enabled
176 #if defined(ARMCOMPUTENEON_ENABLED)
177 
178 TEST_CASE("ClCustomAllocatorCpuAccNegativeTest")
179 {
180     using namespace armnn;
181 
182     // Create ArmNN runtime
183     IRuntime::CreationOptions options; // default options
184     auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
185     options.m_CustomAllocatorMap = {{"CpuAcc", std::move(customAllocator)}};
186     IRuntimePtr run = IRuntime::Create(options);
187     TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
188     INetworkPtr myNetwork = CreateTestNetwork(inputTensorInfo);
189 
190     // Optimise ArmNN network
191     OptimizerOptionsOpaque optOptions;
192     optOptions.SetImportEnabled(true);
193     IOptimizedNetworkPtr optNet(nullptr, nullptr);
194     std::vector<std::string> errMessages;
195 
196     CHECK_THROWS_AS_MESSAGE(Optimize(*myNetwork, {"CpuAcc"}, run->GetDeviceSpec(), optOptions, errMessages),
197                             armnn::InvalidArgumentException,
198                             "Expected an exception as GetAvailablePreferredBackends() should be empty in Optimize().");
199 
200     auto& backendRegistry = armnn::BackendRegistryInstance();
201     backendRegistry.DeregisterAllocator(NeonBackend::GetIdStatic());
202 }
203 
204 #endif
205 
206 TEST_CASE("ClCustomAllocatorGpuAccNullptrTest")
207 {
208     using namespace armnn;
209 
210     // Create ArmNN runtime
211     IRuntime::CreationOptions options; // default options
212     auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
213     options.m_CustomAllocatorMap = {{"GpuAcc", nullptr}};
214 
215     CHECK_THROWS_AS_MESSAGE(IRuntimePtr run = IRuntime::Create(options),
216                             armnn::Exception,
217                             "Expected exception in RuntimeImpl::RuntimeImpl() as allocator was nullptr.");
218 }
219 
220 } // test suite ClCustomAllocatorTests
221