xref: /aosp_15_r20/external/armnn/src/backends/backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
8 #include <ResolveType.hpp>
9 
10 #include <armnn/IWorkingMemHandle.hpp>
11 #include <armnn/INetwork.hpp>
12 #include <armnn/Threadpool.hpp>
13 #include <armnn/IAsyncExecutionCallback.hpp>
14 
15 #include <AsyncExecutionCallback.hpp>
16 #include <CommonTestUtils.hpp>
17 
18 #include <doctest/doctest.h>
19 
20 #include <vector>
21 
22 namespace armnn
23 {
24 
25 namespace experimental
26 {
27 
28 template<DataType ArmnnIType, DataType ArmnnOType,
29         typename TInput = ResolveType <ArmnnIType>, typename TOutput = ResolveType <ArmnnOType>>
AsyncThreadedEndToEndTestImpl(INetworkPtr network,const std::vector<std::map<int,std::vector<TInput>>> & inputTensorData,const std::vector<std::map<int,std::vector<TOutput>>> & expectedOutputData,std::vector<BackendId> backends,const size_t numberOfInferences,float tolerance=0.000001f)30 void AsyncThreadedEndToEndTestImpl(INetworkPtr network,
31                                    const std::vector<std::map<int, std::vector<TInput>>>& inputTensorData,
32                                    const std::vector<std::map<int, std::vector<TOutput>>>& expectedOutputData,
33                                    std::vector<BackendId> backends,
34                                    const size_t numberOfInferences,
35                                    float tolerance = 0.000001f)
36 {
37     // Create Runtime in which test will run
38     IRuntime::CreationOptions options;
39     IRuntimePtr runtime(IRuntime::Create(options));
40 
41     // Optimize the Network
42     IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
43 
44     // Creates AsyncNetwork
45     NetworkId networkId = 0;
46     std::string errorMessage;
47     const INetworkProperties networkProperties(true, MemorySource::Undefined, MemorySource::Undefined);
48     runtime->LoadNetwork(networkId, std::move(optNet), errorMessage, networkProperties);
49 
50     std::vector<InputTensors> inputTensorsVec;
51     std::vector<OutputTensors> outputTensorsVec;
52     std::vector<std::map<int, std::vector<TOutput>>> outputStorageVec;
53     std::vector<std::unique_ptr<IWorkingMemHandle>> workingMemHandles;
54 
55     for (unsigned int i = 0; i < numberOfInferences; ++i)
56     {
57         InputTensors inputTensors;
58         OutputTensors outputTensors;
59         outputStorageVec.emplace_back(std::map<int, std::vector<TOutput>>());
60 
61         inputTensors.reserve(inputTensorData.size());
62         for (auto&& it : inputTensorData[i])
63         {
64             TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(networkId, it.first);
65             inputTensorInfo.SetConstant(true);
66             inputTensors.push_back({it.first,
67                                     ConstTensor(inputTensorInfo, it.second.data())});
68         }
69 
70         outputTensors.reserve(expectedOutputData.size());
71         for (auto&& it : expectedOutputData[i])
72         {
73             std::vector<TOutput> out(it.second.size());
74             outputStorageVec[i].emplace(it.first, out);
75             outputTensors.push_back({it.first,
76                                      Tensor(runtime->GetOutputTensorInfo(networkId, it.first),
77                                             outputStorageVec[i].at(it.first).data())});
78         }
79 
80         inputTensorsVec.push_back(inputTensors);
81         outputTensorsVec.push_back(outputTensors);
82 
83         workingMemHandles.push_back(runtime->CreateWorkingMemHandle(networkId));
84     }
85 
86     std::vector<std::thread> threads;
87     for (unsigned int i = 0; i < numberOfInferences; ++i)
88     {
89         // Access the vectors before we do anything multi-threaded
90         InputTensors& inputTensors = inputTensorsVec[i];
91         OutputTensors& outputTensors = outputTensorsVec[i];
92         IWorkingMemHandle& workingMemHandle = *workingMemHandles[i].get();
93 
94         threads.emplace_back([&]()
95         {
96             // Run the async network
97             runtime->Execute(workingMemHandle, inputTensors, outputTensors);
98         });
99     }
100 
101     for (unsigned int i = 0; i < numberOfInferences; ++i)
102     {
103         threads[i].join();
104     }
105 
106     // Checks the results.
107     for (unsigned int i = 0; i < numberOfInferences; ++i)
108     {
109         for (auto &&it : expectedOutputData[i])
110         {
111             std::vector<TOutput> out = outputStorageVec[i].at(it.first);
112             for (unsigned int j = 0; j < out.size(); ++j)
113             {
114                 CHECK(Compare<ArmnnOType>(it.second[j], out[j], tolerance) == true);
115             }
116         }
117     }
118 
119 }
120 
121 template<DataType ArmnnIType, DataType ArmnnOType,
122     typename TInput = ResolveType<ArmnnIType>, typename TOutput = ResolveType<ArmnnOType>>
AsyncEndToEndTestImpl(INetworkPtr network,const std::map<int,std::vector<TInput>> & inputTensorData,const std::map<int,std::vector<TOutput>> & expectedOutputData,std::vector<BackendId> backends,float tolerance=0.000001f,size_t numThreads=1)123 void AsyncEndToEndTestImpl(INetworkPtr network,
124                            const std::map<int, std::vector<TInput>>& inputTensorData,
125                            const std::map<int, std::vector<TOutput>>& expectedOutputData,
126                            std::vector<BackendId> backends,
127                            float tolerance = 0.000001f,
128                            size_t numThreads = 1)
129 {
130     ARMNN_ASSERT(numThreads >= 1);
131     const unsigned int numberOfInferences = numThreads == 1 ? 1 : 1000;
132 
133     // Create Runtime in which test will run
134     IRuntime::CreationOptions options;
135     IRuntimePtr               runtime(IRuntime::Create(options));
136 
137     // Optimize the Network
138     IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
139 
140     // Creates AsyncNetwork
141     NetworkId networkId = 0;
142 
143     std::string errorMessage;
144 
145     const INetworkProperties networkProperties(true, MemorySource::Undefined, MemorySource::Undefined);
146 
147     runtime->LoadNetwork(networkId, std::move(optNet), errorMessage, networkProperties);
148 
149     InputTensors inputTensors;
150     inputTensors.reserve(inputTensorData.size());
151     for (auto&& it : inputTensorData)
152     {
153         TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(networkId, it.first);
154         inputTensorInfo.SetConstant(true);
155         inputTensors.push_back({it.first,
156                                 ConstTensor(inputTensorInfo, it.second.data())});
157     }
158 
159     std::vector<OutputTensors> outputTensorsVec;
160     std::vector<std::map<int, std::vector<TOutput>>> outputStorageVec;
161 
162     outputTensorsVec.reserve(numberOfInferences);
163     outputStorageVec.reserve(numberOfInferences);
164     for (unsigned int i = 0; i < numberOfInferences; ++i)
165     {
166         OutputTensors outputTensors;
167         outputStorageVec.emplace_back(std::map<int, std::vector<TOutput>>());
168 
169         outputTensors.reserve(expectedOutputData.size());
170         for (auto&& it : expectedOutputData)
171         {
172             std::vector<TOutput> out(it.second.size());
173             outputStorageVec[i].emplace(it.first, out);
174             outputTensors.push_back({it.first,
175                                      Tensor(runtime->GetOutputTensorInfo(networkId, it.first),
176                                             outputStorageVec[i].at(it.first).data())});
177         }
178 
179         outputTensorsVec.push_back(outputTensors);
180     }
181 
182     if (numThreads == 1)
183     {
184         // Create WorkingMemHandle for this async network
185         std::unique_ptr<IWorkingMemHandle> workingMemHandle = runtime->CreateWorkingMemHandle(networkId);
186         IWorkingMemHandle& workingMemHandleRef = *workingMemHandle.get();
187 
188         // Run the async network
189         runtime->Execute(workingMemHandleRef, inputTensors, outputTensorsVec[0]);
190     }
191     else
192     {
193         std::vector<std::shared_ptr<IWorkingMemHandle>> memHandles;
194 
195         for (size_t i = 0; i < numThreads; ++i)
196         {
197             memHandles.emplace_back(runtime->CreateWorkingMemHandle(networkId));
198         }
199 
200         Threadpool threadpool(numThreads, runtime.get(), memHandles);
201         AsyncCallbackManager callbackManager;
202 
203         // For the asyncronous execution, we are adding a pool of working memory handles (1 per thread) in the
204         // LoadedNetwork with each scheduled inference having a random priority
205         for (size_t i = 0; i < numberOfInferences; ++i)
206         {
207             threadpool.Schedule(networkId,
208                                 inputTensors,
209                                 outputTensorsVec[i],
210                                 static_cast<QosExecPriority>(rand()%3),
211                                 callbackManager.GetNewCallback());
212         }
213 
214         // Wait until the execution signals a notify
215         for (size_t i = 0; i < numberOfInferences; ++i)
216         {
217             auto cb = callbackManager.GetNotifiedCallback();
218 
219             // Checks the results.
220             CHECK(cb->GetStatus() == Status::Success);
221         }
222     }
223 
224     for (auto&& outputStorage : outputStorageVec)
225     {
226         for (auto&& it : expectedOutputData)
227         {
228             std::vector<TOutput> out = outputStorage.at(it.first);
229 
230             for (unsigned int i = 0; i < out.size(); ++i)
231             {
232                 //CHECK(Compare<ArmnnOType>(it.second[i], out[i], tolerance) == true);
233                 CHECK(it.second[i] == doctest::Approx(out[i]).epsilon(tolerance));
234             }
235         }
236     }
237 }
238 
239 template<typename armnn::DataType DataType>
CreateStridedSliceNetwork(const TensorShape & inputShape,const TensorShape & outputShape,const std::vector<int> & beginData,const std::vector<int> & endData,const std::vector<int> & stridesData,int beginMask=0,int endMask=0,int shrinkAxisMask=0,int ellipsisMask=0,int newAxisMask=0,const float qScale=1.0f,const int32_t qOffset=0)240 INetworkPtr CreateStridedSliceNetwork(const TensorShape& inputShape,
241                                       const TensorShape& outputShape,
242                                       const std::vector<int>& beginData,
243                                       const std::vector<int>& endData,
244                                       const std::vector<int>& stridesData,
245                                       int beginMask = 0,
246                                       int endMask = 0,
247                                       int shrinkAxisMask = 0,
248                                       int ellipsisMask = 0,
249                                       int newAxisMask = 0,
250                                       const float qScale = 1.0f,
251                                       const int32_t qOffset = 0)
252 {
253     using namespace armnn;
254     // Builds up the structure of the network.
255     INetworkPtr net(INetwork::Create());
256 
257     TensorInfo inputTensorInfo(inputShape, DataType, qScale, qOffset);
258     TensorInfo outputTensorInfo(outputShape, DataType, qScale, qOffset);
259 
260     armnn::StridedSliceDescriptor stridedSliceDescriptor;
261     stridedSliceDescriptor.m_Begin = beginData;
262     stridedSliceDescriptor.m_End = endData;
263     stridedSliceDescriptor.m_Stride = stridesData;
264     stridedSliceDescriptor.m_BeginMask = beginMask;
265     stridedSliceDescriptor.m_EndMask = endMask;
266     stridedSliceDescriptor.m_ShrinkAxisMask = shrinkAxisMask;
267     stridedSliceDescriptor.m_EllipsisMask = ellipsisMask;
268     stridedSliceDescriptor.m_NewAxisMask = newAxisMask;
269 
270     IConnectableLayer* input = net->AddInputLayer(0, "Input_Layer");
271     IConnectableLayer* stridedSlice = net->AddStridedSliceLayer(stridedSliceDescriptor, "splitter");
272     IConnectableLayer* output = net->AddOutputLayer(0);
273 
274     Connect(input, stridedSlice, inputTensorInfo, 0, 0);
275     Connect(stridedSlice, output, outputTensorInfo, 0, 0);
276 
277     return net;
278 }
279 
280 template<armnn::DataType ArmnnType>
StridedSlicedEndToEndTest(const std::vector<BackendId> & backends,size_t numThreads)281 void StridedSlicedEndToEndTest(const std::vector<BackendId>& backends, size_t numThreads)
282 {
283     using namespace armnn;
284     using T = ResolveType<ArmnnType>;
285 
286     const TensorShape& inputShape = {3, 2, 3, 1};
287     const TensorShape& outputShape = {1, 2, 3, 1};
288     const std::vector<int>& beginData = {1, 0, 0, 0};
289     const std::vector<int>& endData = {2, 2, 3, 1};
290     const std::vector<int>& stridesData = {1, 1, 1, 1};
291     int beginMask = 0;
292     int endMask = 0;
293     int shrinkAxisMask = 0;
294     int ellipsisMask = 0;
295     int newAxisMask = 0;
296 
297     // Builds up the structure of the network
298     INetworkPtr net = CreateStridedSliceNetwork<ArmnnType>(inputShape,
299                                                            outputShape,
300                                                            beginData,
301                                                            endData,
302                                                            stridesData,
303                                                            beginMask,
304                                                            endMask,
305                                                            shrinkAxisMask,
306                                                            ellipsisMask,
307                                                            newAxisMask);
308 
309     CHECK(net);
310     // Creates structures for input & output.
311     std::vector<T> inputData{
312             1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f,
313 
314             3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f,
315 
316             5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f
317     };
318 
319     std::vector<T> outputExpected{
320             3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f
321     };
322 
323     std::map<int, std::vector<T>> inputTensorData = {{0, inputData}};
324     std::map<int, std::vector<T>> expectedOutputData = {{0, outputExpected}};
325 
326     AsyncEndToEndTestImpl<ArmnnType, ArmnnType>(move(net),
327                                                 inputTensorData,
328                                                 expectedOutputData,
329                                                 backends,
330                                                 0.000001f,
331                                                 numThreads);
332 }
333 
334 template<armnn::DataType ArmnnType>
StridedSlicedMultiThreadedEndToEndTest(const std::vector<BackendId> & backends)335 void StridedSlicedMultiThreadedEndToEndTest(const std::vector<BackendId>& backends)
336 {
337     using namespace armnn;
338     using T = ResolveType<ArmnnType>;
339 
340     const TensorShape& inputShape = {3, 2, 3, 1};
341     const TensorShape& outputShape = {1, 2, 3, 1};
342     const std::vector<int>& beginData = {1, 0, 0, 0};
343     const std::vector<int>& endData = {2, 2, 3, 1};
344     const std::vector<int>& stridesData = {1, 1, 1, 1};
345     int beginMask = 0;
346     int endMask = 0;
347     int shrinkAxisMask = 0;
348     int ellipsisMask = 0;
349     int newAxisMask = 0;
350 
351     // Builds up the structure of the network
352     INetworkPtr net = CreateStridedSliceNetwork<ArmnnType>(inputShape,
353                                                            outputShape,
354                                                            beginData,
355                                                            endData,
356                                                            stridesData,
357                                                            beginMask,
358                                                            endMask,
359                                                            shrinkAxisMask,
360                                                            ellipsisMask,
361                                                            newAxisMask);
362 
363     CHECK(net);
364 
365     // Creates structures for input & output.
366     std::vector<T> inputData1{
367             1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f,
368 
369             3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f,
370 
371             5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f
372     };
373 
374     std::vector<T> outputExpected1{ 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f };
375 
376     // Creates structures for input & output.
377     std::vector<T> inputData2{
378             1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f,
379 
380             8.0f, 8.0f, 8.0f, 7.0f, 7.0f, 7.0f,
381 
382             5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f
383     };
384 
385     std::vector<T> outputExpected2{ 8.0f, 8.0f, 8.0f, 7.0f, 7.0f, 7.0f };
386 
387     std::vector<std::map<int, std::vector<T>>> inputTensors;
388     std::vector<std::map<int, std::vector<T>>> outputTensors;
389 
390     inputTensors.push_back(std::map<int, std::vector<T>> {{0, inputData1}});
391     inputTensors.push_back(std::map<int, std::vector<T>> {{0, inputData2}});
392     outputTensors.push_back(std::map<int, std::vector<T>> {{0, outputExpected1}});
393     outputTensors.push_back(std::map<int, std::vector<T>> {{0, outputExpected2}});
394 
395     AsyncThreadedEndToEndTestImpl<ArmnnType, ArmnnType>(move(net), inputTensors, outputTensors, backends, 2);
396 }
397 
398 } // experimental namespace
399 
400 } // armnn namespace
401 
402