xref: /aosp_15_r20/external/armnn/src/backends/backendsCommon/test/OptimizedNetworkTests.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include <CommonTestUtils.hpp>
7 
8 #include <Graph.hpp>
9 #include <Network.hpp>
10 
11 #include <reference/RefWorkloadFactory.hpp>
12 
13 #include <doctest/doctest.h>
14 
15 TEST_SUITE("OptimizedNetwork")
16 {
17 TEST_CASE("SerializeToDot")
18 {
19     // build up the structure of the network
20     armnn::INetworkPtr net(armnn::INetwork::Create());
21 
22     //Defines layers.
23     auto input = net->AddInputLayer(0);
24     auto add = net->AddElementwiseBinaryLayer(armnn::BinaryOperation::Add);
25     auto output = net->AddOutputLayer(0);
26 
27     // Connects layers.
28     input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
29     input->GetOutputSlot(0).Connect(add->GetInputSlot(1));
30     add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
31 
32     armnn::TensorShape shape({4});
33     armnn::TensorInfo info(shape, armnn::DataType::Float32);
34     input->GetOutputSlot(0).SetTensorInfo(info);
35     add->GetOutputSlot(0).SetTensorInfo(info);
36 
37     armnn::IRuntime::CreationOptions options;
38     armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
39 
40     std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
41     armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
42 
43     std::ostringstream ss;
44     optimizedNet->SerializeToDot(ss);
45 
46     auto inputId = input->GetGuid();
47     auto addId = add->GetGuid();
48     auto outputId = output->GetGuid();
49 
50     std::stringstream expected;
51     expected <<
52         "digraph Optimized {\n"
53         "    node [shape=\"record\"];\n"
54         "    edge [fontsize=8 fontcolor=\"blue\" fontname=\"arial-bold\"];\n"
55         "    " << inputId << " [label=\"{Input|Guid : " << inputId << "\\lLayerType : Input\\l"
56                              "BackendID : CpuRef\\l}\"];\n"
57         "    " << addId << " [label=\"{ElementwiseBinary|Guid : " << addId << "\\lLayerType : ElementwiseBinary\\l"
58                            "BackendID : CpuRef\\l}\"];\n"
59         "    " << outputId << " [label=\"{Output|Guid : " << outputId << "\\lLayerType : Output\\l"
60                               "BackendID : CpuRef\\l}\"];\n"
61         "    " << inputId << " -> " << addId << " [label=< [4] >];\n"
62         "    " << inputId << " -> " << addId << " [label=< [4] >];\n"
63         "    " << addId << " -> " << outputId << " [label=< [4] >];\n"
64         "}\n";
65 
66     CHECK(ss.str() == expected.str());
67 }
68 
69 TEST_CASE("OptimizeValidateDeviceNonSupportLayerNoFallback")
70 {
71     // build up the structure of the network
72     armnn::INetworkPtr net(armnn::INetwork::Create());
73 
74     armnn::IConnectableLayer* input = net->AddInputLayer(0);
75 
76     // This layer configuration isn't supported by CpuAcc and isn't allowed to fall back, so Optimize will return null.
77     armnn::NormalizationDescriptor descriptor;
78     armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor);
79 
80     armnn::IConnectableLayer* output = net->AddOutputLayer(0);
81 
82     input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0));
83     normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0));
84 
85     input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
86     normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
87 
88     armnn::IRuntime::CreationOptions options;
89     armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
90 
91     std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc };
92     std::vector<std::string> errMessages;
93 
94     try
95     {
96         Optimize(*net, backends, runtime->GetDeviceSpec(), armnn::OptimizerOptionsOpaque(), errMessages);
97         FAIL("Should have thrown an exception.");
98     }
99     catch (const armnn::InvalidArgumentException&)
100     {
101         // Different exceptions are thrown on different backends
102     }
103     CHECK(errMessages.size() > 0);
104 }
105 
106 TEST_CASE("OptimizeValidateDeviceNonSupportLayerWithFallback")
107 {
108     // build up the structure of the network
109     armnn::INetworkPtr net(armnn::INetwork::Create());
110 
111     armnn::IConnectableLayer* input = net->AddInputLayer(0);
112 
113     // This layer configuration isn't supported by CpuAcc but it allows to fallback to CpuRef.
114     armnn::NormalizationDescriptor descriptor;
115     armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor);
116 
117     armnn::IConnectableLayer* output = net->AddOutputLayer(0);
118 
119     input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0));
120     normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0));
121 
122     input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
123     normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
124 
125     armnn::IRuntime::CreationOptions options;
126     armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
127 
128     std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc, armnn::Compute::CpuRef };
129     armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
130     REQUIRE(optNet);
131 
132     armnn::Graph& graph = GetGraphForTesting(optNet.get());
133     graph.AllocateDynamicBuffers();
134 
135     for (auto&& layer : graph)
136     {
137         // If NEON is enabled, Input and Output layers are supported by CpuAcc,
138         // the other layers are supported by CpuRef.
139         // If NEON is not enabled, all layers are supported by CpuRef.
140 #if defined(ARMCOMPUTENEON_ENABLED)
141         if (layer->GetType() == armnn::LayerType::Output)
142         {
143             CHECK(layer->GetBackendId() == armnn::Compute::CpuAcc);
144         }
145         else if (layer->GetType() == armnn::LayerType::Normalization)
146         {
147             CHECK(layer->GetBackendId() == armnn::Compute::CpuRef);
148         }
149 #else
150         CHECK(layer->GetBackendId() == armnn::Compute::CpuRef);
151 #endif
152     }
153 }
154 
155 TEST_CASE("OptimizeValidateWorkloadsUndefinedComputeDevice")
156 {
157     const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32);
158 
159     // build up the structure of the network
160     armnn::INetworkPtr net(armnn::INetwork::Create());
161 
162     armnn::NormalizationDescriptor nmDesc;
163     armnn::ActivationDescriptor acDesc;
164 
165     //    in
166     //     |
167     //    nm
168     //   /  |
169     //  ac  |
170     //   \  |
171     //    ml
172     //     |
173     //    sm
174     //     |
175     //    ot
176     armnn::IConnectableLayer* layer = net->AddInputLayer(0, "in");
177     layer->GetOutputSlot(0).SetTensorInfo(desc);
178 
179     armnn::IConnectableLayer* const normLayer = net->AddNormalizationLayer(nmDesc, "nm");
180 
181     layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0));
182     normLayer->GetOutputSlot(0).SetTensorInfo(desc);
183 
184     layer = net->AddActivationLayer(acDesc, "ac");
185 
186     normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
187     layer->GetOutputSlot(0).SetTensorInfo(desc);
188 
189     armnn::IConnectableLayer* prevLayer = layer;
190     layer = net->AddElementwiseBinaryLayer(armnn::BinaryOperation::Mul, "ml");
191 
192     prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
193     normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
194     layer->GetOutputSlot(0).SetTensorInfo(desc);
195 
196     prevLayer = layer;
197     armnn::SoftmaxDescriptor softmaxDescriptor;
198     layer = net->AddSoftmaxLayer(softmaxDescriptor, "sm");
199 
200     prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
201     layer->GetOutputSlot(0).SetTensorInfo(desc);
202 
203     prevLayer = layer;
204     layer = net->AddOutputLayer(0, "ot");
205 
206     prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
207 
208     armnn::IRuntime::CreationOptions options;
209     armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
210 
211     std::vector<armnn::BackendId> backends = { armnn::Compute::Undefined };
212     std::vector<std::string> errMessages;
213 
214     try
215     {
216         Optimize(*net, backends, runtime->GetDeviceSpec(),
217                  armnn::OptimizerOptionsOpaque(), errMessages);
218         FAIL("Should have thrown an exception.");
219     }
220     catch (const armnn::InvalidArgumentException&)
221     {
222         // Different exceptions are thrown on different backends
223     }
224     CHECK(errMessages.size() > 0);
225 }
226 
227 TEST_CASE("OptimizeValidateWorkloadsUndefinedComputeDeviceWithFallback")
228 {
229     const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32);
230 
231     // build up the structure of the network
232     armnn::INetworkPtr net(armnn::INetwork::Create());
233 
234     armnn::NormalizationDescriptor nmDesc;
235     armnn::ActivationDescriptor acDesc;
236 
237     //    in
238     //     |
239     //    nm
240     //   /  |
241     //  ac  |
242     //   \  |
243     //    ml
244     //     |
245     //    sm
246     //     |
247     //    ot
248     armnn::IConnectableLayer* layer = net->AddInputLayer(0, "in");
249     layer->GetOutputSlot(0).SetTensorInfo(desc);
250 
251     armnn::IConnectableLayer* const normLayer = net->AddNormalizationLayer(nmDesc, "nm");
252 
253     layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0));
254     normLayer->GetOutputSlot(0).SetTensorInfo(desc);
255 
256     layer = net->AddActivationLayer(acDesc, "ac");
257 
258     normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
259     layer->GetOutputSlot(0).SetTensorInfo(desc);
260 
261     armnn::IConnectableLayer* prevLayer = layer;
262     layer = net->AddElementwiseBinaryLayer(armnn::BinaryOperation::Mul, "ml");
263 
264     prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
265     normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
266     layer->GetOutputSlot(0).SetTensorInfo(desc);
267 
268     prevLayer = layer;
269     armnn::SoftmaxDescriptor softmaxDescriptor;
270     layer = net->AddSoftmaxLayer(softmaxDescriptor, "sm");
271 
272     prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
273     layer->GetOutputSlot(0).SetTensorInfo(desc);
274 
275     prevLayer = layer;
276     layer = net->AddOutputLayer(0, "ot");
277 
278     prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
279 
280     armnn::IRuntime::CreationOptions options;
281     armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
282 
283     std::vector<armnn::BackendId> backends = { armnn::Compute::Undefined, armnn::Compute::CpuRef };
284 
285     armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
286     CHECK(optNet);
287 
288     armnn::Graph& graph = GetGraphForTesting(optNet.get());
289     graph.AllocateDynamicBuffers();
290 
291     // validate workloads
292     armnn::RefWorkloadFactory fact;
293     for (auto&& layer : graph)
294     {
295         CHECK(layer->GetBackendId() == armnn::Compute::CpuRef);
296         CHECK_NOTHROW(
297             layer->CreateWorkload(fact));
298     }
299 }
300 
301 TEST_CASE("OptimizeValidateWorkloadsDuplicateComputeDeviceWithFallback")
302 {
303     // build up the structure of the network
304     armnn::INetworkPtr net(armnn::INetwork::Create());
305 
306     armnn::IConnectableLayer* input = net->AddInputLayer(0);
307 
308     // This layer configuration isn't supported by CpuAcc but it allows to fallback to CpuRef.
309     armnn::NormalizationDescriptor descriptor;
310     armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor);
311 
312     armnn::IConnectableLayer* output = net->AddOutputLayer(0);
313 
314     input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0));
315     normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0));
316 
317     input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
318     normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
319 
320     armnn::IRuntime::CreationOptions options;
321     armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
322 
323     std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc,
324                                              armnn::Compute::GpuAcc,
325                                              armnn::Compute::CpuRef };
326 
327     armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
328     REQUIRE(optNet);
329 
330     armnn::Graph& graph = GetGraphForTesting(optNet.get());
331     graph.AllocateDynamicBuffers();
332 
333     for (auto&& layer : graph)
334     {
335         // If NEON is enabled, Input and Output layers are supported by CpuAcc,
336         // the other layers are supported by CpuRef.
337         // If only CL is enabled, Input and Output layers are supported by GpuAcc,
338         // the other layers are supported by CpuRef.
339         // If neither NEON, nor CL is enabled, all layers are supported by CpuRef.
340 #if defined(ARMCOMPUTENEON_ENABLED)
341         if (layer->GetType() == armnn::LayerType::Input)
342         {
343             CHECK(layer->GetBackendId() == armnn::Compute::CpuRef);
344         }
345         else if (layer->GetType() == armnn::LayerType::Output)
346         {
347             CHECK(layer->GetBackendId() == armnn::Compute::CpuAcc);
348         }
349         else if (layer->GetType() == armnn::LayerType::Normalization)
350         {
351             CHECK(layer->GetBackendId() == armnn::Compute::CpuRef);
352         }
353 #elif defined(ARMCOMPUTECL_ENABLED)
354         if (layer->GetType() == armnn::LayerType::Input)
355         {
356             CHECK(layer->GetBackendId() == armnn::Compute::CpuRef);
357         }
358         else if (layer->GetType() == armnn::LayerType::Output)
359         {
360             CHECK(layer->GetBackendId() == armnn::Compute::GpuAcc);
361         }
362         else if (layer->GetType() == armnn::LayerType::Normalization)
363         {
364             CHECK(layer->GetBackendId() == armnn::Compute::CpuRef);
365         }
366 #else
367         CHECK(layer->GetBackendId() == armnn::Compute::CpuRef);
368 #endif
369     }
370 }
371 
372 TEST_CASE("OptimizeNetworkCopy")
373 {
374     armnn::IRuntime::CreationOptions options;
375     armnn::IRuntimePtr runtime = armnn::IRuntime::Create(options);
376     std::vector<armnn::NetworkId> networkIds;
377 
378     const std::string layerName("convolution2d");
379     const armnn::TensorInfo inputInfo ({ 1, 5, 5, 1 }, armnn::DataType::Float32);
380     const armnn::TensorInfo outputInfo({ 1, 2, 2, 1 }, armnn::DataType::Float32);
381 
382     const armnn::TensorInfo weightsInfo({ 1, 3, 3, 1 }, armnn::DataType::Float32, 0.0f, 0, true);
383     const armnn::TensorInfo biasesInfo ({ 1 }, armnn::DataType::Float32, 0.0f, 0, true);
384 
385     std::vector<float> weightsData = GenerateRandomData<float>(weightsInfo.GetNumElements());
386     armnn::ConstTensor weights(weightsInfo, weightsData);
387 
388     std::vector<float> biasesData = GenerateRandomData<float>(biasesInfo.GetNumElements());
389     armnn::ConstTensor biases(biasesInfo, biasesData);
390 
391     armnn::Convolution2dDescriptor descriptor;
392     descriptor.m_PadLeft     = 1;
393     descriptor.m_PadRight    = 1;
394     descriptor.m_PadTop      = 1;
395     descriptor.m_PadBottom   = 1;
396     descriptor.m_StrideX     = 2;
397     descriptor.m_StrideY     = 2;
398     descriptor.m_DilationX   = 2;
399     descriptor.m_DilationY   = 2;
400     descriptor.m_BiasEnabled = true;
401     descriptor.m_DataLayout  = armnn::DataLayout::NHWC;
402 
403     armnn::INetworkPtr network = armnn::INetwork::Create();
404     armnn::IConnectableLayer* const inputLayer  = network->AddInputLayer(0);
405 
406     armnn::IConnectableLayer* const convLayer   = network->AddConvolution2dLayer(descriptor, layerName.c_str());
407     armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0);
408     armnn::IConnectableLayer* weightsLayer = network->AddConstantLayer(weights);
409     armnn::IConnectableLayer* biasLayer = network->AddConstantLayer(biases);
410 
411     weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
412     weightsLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(1u));
413 
414     biasLayer->GetOutputSlot(0).SetTensorInfo(biasesInfo);
415     biasLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(2u));
416 
417     inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
418     convLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
419 
420     inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
421     convLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
422 
423     std::vector<armnn::BackendId> preferredBackends { "CpuRef" };
424     armnn::ModelOptions modelOptions;
425     armnn::OptimizerOptionsOpaque optimizerOptions(false, false, false,
426                                                    false, modelOptions, false);
427     std::vector<std::string> errorMessages;
428 
429     // optimize the network.
430     armnn::IOptimizedNetworkPtr optNet = Optimize(*network,
431                                                   preferredBackends,
432                                                   runtime->GetDeviceSpec(),
433                                                   optimizerOptions,
434                                                   armnn::Optional<std::vector<std::string>&>(errorMessages));
435 
436     for (unsigned int i = 0; i < 2; ++i)
437     {
438         armnn::ModelOptions optimizedModelOptions;
439         auto copy = armnn::IOptimizedNetworkPtr(new armnn::IOptimizedNetwork(*optNet.get(), optimizedModelOptions),
440                                                &armnn::IOptimizedNetwork::Destroy);
441 
442         CHECK(copy);
443 
444         armnn::NetworkId netId;
445         std::string errorMessage;
446 
447         CHECK(armnn::Status::Success == runtime->LoadNetwork(netId, std::move(copy), errorMessage));
448 
449         // Record the networkID for the loaded network
450         networkIds.emplace_back(netId);
451     }
452     armnn::NetworkId optNetId;
453     std::string errorMessage;
454 
455     // Load the original optNet
456     CHECK(armnn::Status::Success == runtime->LoadNetwork(optNetId, std::move(optNet), errorMessage));
457 
458     std::vector<float> inputData = GenerateRandomData<float>(runtime->GetInputTensorInfo(optNetId, 0).GetNumElements());
459     std::vector<float> outputData(runtime->GetOutputTensorInfo(optNetId, 0).GetNumElements());
460 
461     armnn::TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(optNetId, 0);
462     inputTensorInfo.SetConstant(true);
463     armnn::InputTensors inputTensors
464     {
465         {
466             0, armnn::ConstTensor(inputTensorInfo, inputData.data())
467         }
468     };
469     armnn::OutputTensors outputTensors
470     {
471         {
472             0, armnn::Tensor(runtime->GetOutputTensorInfo(optNetId, 0), outputData.data())
473         }
474     };
475     runtime->EnqueueWorkload(optNetId, inputTensors, outputTensors);
476     runtime->UnloadNetwork(optNetId);
477 
478     // Record the networkID for the loaded network
479     for (unsigned int i = 0; i < networkIds.size(); ++i)
480     {
481         armnn::NetworkId netId = networkIds[i];
482         std::vector<float> copyOutputData(runtime->GetOutputTensorInfo(netId, 0).GetNumElements());
483 
484         armnn::TensorInfo inputTensorInfo2 = runtime->GetInputTensorInfo(netId, 0);
485         inputTensorInfo2.SetConstant(true);
486         armnn::InputTensors copyInputTensors
487         {
488             {
489                 0, armnn::ConstTensor(inputTensorInfo2, inputData.data())
490             }
491         };
492         armnn::OutputTensors copyOutputTensors
493         {
494             {
495                 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), copyOutputData.data())
496             }
497         };
498         runtime->EnqueueWorkload(netId, copyInputTensors, copyOutputTensors);
499         runtime->UnloadNetwork(netId);
500 
501         // Check results are identical to "original" version
502         for (unsigned int j = 0; j < outputData.size(); ++j)
503         {
504             CHECK(outputData[j] == copyOutputData[j]);
505         }
506     }
507 }
508 
509 }
510