1 // 2 // Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. 3 // SPDX-License-Identifier: MIT 4 // 5 6 #include <CommonTestUtils.hpp> 7 8 #include <Graph.hpp> 9 #include <Network.hpp> 10 11 #include <reference/RefWorkloadFactory.hpp> 12 13 #include <doctest/doctest.h> 14 15 TEST_SUITE("OptimizedNetwork") 16 { 17 TEST_CASE("SerializeToDot") 18 { 19 // build up the structure of the network 20 armnn::INetworkPtr net(armnn::INetwork::Create()); 21 22 //Defines layers. 23 auto input = net->AddInputLayer(0); 24 auto add = net->AddElementwiseBinaryLayer(armnn::BinaryOperation::Add); 25 auto output = net->AddOutputLayer(0); 26 27 // Connects layers. 28 input->GetOutputSlot(0).Connect(add->GetInputSlot(0)); 29 input->GetOutputSlot(0).Connect(add->GetInputSlot(1)); 30 add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); 31 32 armnn::TensorShape shape({4}); 33 armnn::TensorInfo info(shape, armnn::DataType::Float32); 34 input->GetOutputSlot(0).SetTensorInfo(info); 35 add->GetOutputSlot(0).SetTensorInfo(info); 36 37 armnn::IRuntime::CreationOptions options; 38 armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); 39 40 std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef}; 41 armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); 42 43 std::ostringstream ss; 44 optimizedNet->SerializeToDot(ss); 45 46 auto inputId = input->GetGuid(); 47 auto addId = add->GetGuid(); 48 auto outputId = output->GetGuid(); 49 50 std::stringstream expected; 51 expected << 52 "digraph Optimized {\n" 53 " node [shape=\"record\"];\n" 54 " edge [fontsize=8 fontcolor=\"blue\" fontname=\"arial-bold\"];\n" 55 " " << inputId << " [label=\"{Input|Guid : " << inputId << "\\lLayerType : Input\\l" 56 "BackendID : CpuRef\\l}\"];\n" 57 " " << addId << " [label=\"{ElementwiseBinary|Guid : " << addId << "\\lLayerType : ElementwiseBinary\\l" 58 "BackendID : CpuRef\\l}\"];\n" 59 " " << outputId << " [label=\"{Output|Guid : " << outputId << "\\lLayerType : Output\\l" 60 "BackendID : CpuRef\\l}\"];\n" 61 " " << inputId << " -> " << addId << " [label=< [4] >];\n" 62 " " << inputId << " -> " << addId << " [label=< [4] >];\n" 63 " " << addId << " -> " << outputId << " [label=< [4] >];\n" 64 "}\n"; 65 66 CHECK(ss.str() == expected.str()); 67 } 68 69 TEST_CASE("OptimizeValidateDeviceNonSupportLayerNoFallback") 70 { 71 // build up the structure of the network 72 armnn::INetworkPtr net(armnn::INetwork::Create()); 73 74 armnn::IConnectableLayer* input = net->AddInputLayer(0); 75 76 // This layer configuration isn't supported by CpuAcc and isn't allowed to fall back, so Optimize will return null. 77 armnn::NormalizationDescriptor descriptor; 78 armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor); 79 80 armnn::IConnectableLayer* output = net->AddOutputLayer(0); 81 82 input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0)); 83 normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0)); 84 85 input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); 86 normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); 87 88 armnn::IRuntime::CreationOptions options; 89 armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); 90 91 std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc }; 92 std::vector<std::string> errMessages; 93 94 try 95 { 96 Optimize(*net, backends, runtime->GetDeviceSpec(), armnn::OptimizerOptionsOpaque(), errMessages); 97 FAIL("Should have thrown an exception."); 98 } 99 catch (const armnn::InvalidArgumentException&) 100 { 101 // Different exceptions are thrown on different backends 102 } 103 CHECK(errMessages.size() > 0); 104 } 105 106 TEST_CASE("OptimizeValidateDeviceNonSupportLayerWithFallback") 107 { 108 // build up the structure of the network 109 armnn::INetworkPtr net(armnn::INetwork::Create()); 110 111 armnn::IConnectableLayer* input = net->AddInputLayer(0); 112 113 // This layer configuration isn't supported by CpuAcc but it allows to fallback to CpuRef. 114 armnn::NormalizationDescriptor descriptor; 115 armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor); 116 117 armnn::IConnectableLayer* output = net->AddOutputLayer(0); 118 119 input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0)); 120 normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0)); 121 122 input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); 123 normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); 124 125 armnn::IRuntime::CreationOptions options; 126 armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); 127 128 std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc, armnn::Compute::CpuRef }; 129 armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); 130 REQUIRE(optNet); 131 132 armnn::Graph& graph = GetGraphForTesting(optNet.get()); 133 graph.AllocateDynamicBuffers(); 134 135 for (auto&& layer : graph) 136 { 137 // If NEON is enabled, Input and Output layers are supported by CpuAcc, 138 // the other layers are supported by CpuRef. 139 // If NEON is not enabled, all layers are supported by CpuRef. 140 #if defined(ARMCOMPUTENEON_ENABLED) 141 if (layer->GetType() == armnn::LayerType::Output) 142 { 143 CHECK(layer->GetBackendId() == armnn::Compute::CpuAcc); 144 } 145 else if (layer->GetType() == armnn::LayerType::Normalization) 146 { 147 CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); 148 } 149 #else 150 CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); 151 #endif 152 } 153 } 154 155 TEST_CASE("OptimizeValidateWorkloadsUndefinedComputeDevice") 156 { 157 const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32); 158 159 // build up the structure of the network 160 armnn::INetworkPtr net(armnn::INetwork::Create()); 161 162 armnn::NormalizationDescriptor nmDesc; 163 armnn::ActivationDescriptor acDesc; 164 165 // in 166 // | 167 // nm 168 // / | 169 // ac | 170 // \ | 171 // ml 172 // | 173 // sm 174 // | 175 // ot 176 armnn::IConnectableLayer* layer = net->AddInputLayer(0, "in"); 177 layer->GetOutputSlot(0).SetTensorInfo(desc); 178 179 armnn::IConnectableLayer* const normLayer = net->AddNormalizationLayer(nmDesc, "nm"); 180 181 layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0)); 182 normLayer->GetOutputSlot(0).SetTensorInfo(desc); 183 184 layer = net->AddActivationLayer(acDesc, "ac"); 185 186 normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); 187 layer->GetOutputSlot(0).SetTensorInfo(desc); 188 189 armnn::IConnectableLayer* prevLayer = layer; 190 layer = net->AddElementwiseBinaryLayer(armnn::BinaryOperation::Mul, "ml"); 191 192 prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); 193 normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); 194 layer->GetOutputSlot(0).SetTensorInfo(desc); 195 196 prevLayer = layer; 197 armnn::SoftmaxDescriptor softmaxDescriptor; 198 layer = net->AddSoftmaxLayer(softmaxDescriptor, "sm"); 199 200 prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); 201 layer->GetOutputSlot(0).SetTensorInfo(desc); 202 203 prevLayer = layer; 204 layer = net->AddOutputLayer(0, "ot"); 205 206 prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); 207 208 armnn::IRuntime::CreationOptions options; 209 armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); 210 211 std::vector<armnn::BackendId> backends = { armnn::Compute::Undefined }; 212 std::vector<std::string> errMessages; 213 214 try 215 { 216 Optimize(*net, backends, runtime->GetDeviceSpec(), 217 armnn::OptimizerOptionsOpaque(), errMessages); 218 FAIL("Should have thrown an exception."); 219 } 220 catch (const armnn::InvalidArgumentException&) 221 { 222 // Different exceptions are thrown on different backends 223 } 224 CHECK(errMessages.size() > 0); 225 } 226 227 TEST_CASE("OptimizeValidateWorkloadsUndefinedComputeDeviceWithFallback") 228 { 229 const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32); 230 231 // build up the structure of the network 232 armnn::INetworkPtr net(armnn::INetwork::Create()); 233 234 armnn::NormalizationDescriptor nmDesc; 235 armnn::ActivationDescriptor acDesc; 236 237 // in 238 // | 239 // nm 240 // / | 241 // ac | 242 // \ | 243 // ml 244 // | 245 // sm 246 // | 247 // ot 248 armnn::IConnectableLayer* layer = net->AddInputLayer(0, "in"); 249 layer->GetOutputSlot(0).SetTensorInfo(desc); 250 251 armnn::IConnectableLayer* const normLayer = net->AddNormalizationLayer(nmDesc, "nm"); 252 253 layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0)); 254 normLayer->GetOutputSlot(0).SetTensorInfo(desc); 255 256 layer = net->AddActivationLayer(acDesc, "ac"); 257 258 normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); 259 layer->GetOutputSlot(0).SetTensorInfo(desc); 260 261 armnn::IConnectableLayer* prevLayer = layer; 262 layer = net->AddElementwiseBinaryLayer(armnn::BinaryOperation::Mul, "ml"); 263 264 prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); 265 normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); 266 layer->GetOutputSlot(0).SetTensorInfo(desc); 267 268 prevLayer = layer; 269 armnn::SoftmaxDescriptor softmaxDescriptor; 270 layer = net->AddSoftmaxLayer(softmaxDescriptor, "sm"); 271 272 prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); 273 layer->GetOutputSlot(0).SetTensorInfo(desc); 274 275 prevLayer = layer; 276 layer = net->AddOutputLayer(0, "ot"); 277 278 prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); 279 280 armnn::IRuntime::CreationOptions options; 281 armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); 282 283 std::vector<armnn::BackendId> backends = { armnn::Compute::Undefined, armnn::Compute::CpuRef }; 284 285 armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); 286 CHECK(optNet); 287 288 armnn::Graph& graph = GetGraphForTesting(optNet.get()); 289 graph.AllocateDynamicBuffers(); 290 291 // validate workloads 292 armnn::RefWorkloadFactory fact; 293 for (auto&& layer : graph) 294 { 295 CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); 296 CHECK_NOTHROW( 297 layer->CreateWorkload(fact)); 298 } 299 } 300 301 TEST_CASE("OptimizeValidateWorkloadsDuplicateComputeDeviceWithFallback") 302 { 303 // build up the structure of the network 304 armnn::INetworkPtr net(armnn::INetwork::Create()); 305 306 armnn::IConnectableLayer* input = net->AddInputLayer(0); 307 308 // This layer configuration isn't supported by CpuAcc but it allows to fallback to CpuRef. 309 armnn::NormalizationDescriptor descriptor; 310 armnn::IConnectableLayer* normalize = net->AddNormalizationLayer(descriptor); 311 312 armnn::IConnectableLayer* output = net->AddOutputLayer(0); 313 314 input->GetOutputSlot(0).Connect(normalize->GetInputSlot(0)); 315 normalize->GetOutputSlot(0).Connect(output->GetInputSlot(0)); 316 317 input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); 318 normalize->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32)); 319 320 armnn::IRuntime::CreationOptions options; 321 armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options)); 322 323 std::vector<armnn::BackendId> backends = { armnn::Compute::CpuAcc, 324 armnn::Compute::GpuAcc, 325 armnn::Compute::CpuRef }; 326 327 armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec()); 328 REQUIRE(optNet); 329 330 armnn::Graph& graph = GetGraphForTesting(optNet.get()); 331 graph.AllocateDynamicBuffers(); 332 333 for (auto&& layer : graph) 334 { 335 // If NEON is enabled, Input and Output layers are supported by CpuAcc, 336 // the other layers are supported by CpuRef. 337 // If only CL is enabled, Input and Output layers are supported by GpuAcc, 338 // the other layers are supported by CpuRef. 339 // If neither NEON, nor CL is enabled, all layers are supported by CpuRef. 340 #if defined(ARMCOMPUTENEON_ENABLED) 341 if (layer->GetType() == armnn::LayerType::Input) 342 { 343 CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); 344 } 345 else if (layer->GetType() == armnn::LayerType::Output) 346 { 347 CHECK(layer->GetBackendId() == armnn::Compute::CpuAcc); 348 } 349 else if (layer->GetType() == armnn::LayerType::Normalization) 350 { 351 CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); 352 } 353 #elif defined(ARMCOMPUTECL_ENABLED) 354 if (layer->GetType() == armnn::LayerType::Input) 355 { 356 CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); 357 } 358 else if (layer->GetType() == armnn::LayerType::Output) 359 { 360 CHECK(layer->GetBackendId() == armnn::Compute::GpuAcc); 361 } 362 else if (layer->GetType() == armnn::LayerType::Normalization) 363 { 364 CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); 365 } 366 #else 367 CHECK(layer->GetBackendId() == armnn::Compute::CpuRef); 368 #endif 369 } 370 } 371 372 TEST_CASE("OptimizeNetworkCopy") 373 { 374 armnn::IRuntime::CreationOptions options; 375 armnn::IRuntimePtr runtime = armnn::IRuntime::Create(options); 376 std::vector<armnn::NetworkId> networkIds; 377 378 const std::string layerName("convolution2d"); 379 const armnn::TensorInfo inputInfo ({ 1, 5, 5, 1 }, armnn::DataType::Float32); 380 const armnn::TensorInfo outputInfo({ 1, 2, 2, 1 }, armnn::DataType::Float32); 381 382 const armnn::TensorInfo weightsInfo({ 1, 3, 3, 1 }, armnn::DataType::Float32, 0.0f, 0, true); 383 const armnn::TensorInfo biasesInfo ({ 1 }, armnn::DataType::Float32, 0.0f, 0, true); 384 385 std::vector<float> weightsData = GenerateRandomData<float>(weightsInfo.GetNumElements()); 386 armnn::ConstTensor weights(weightsInfo, weightsData); 387 388 std::vector<float> biasesData = GenerateRandomData<float>(biasesInfo.GetNumElements()); 389 armnn::ConstTensor biases(biasesInfo, biasesData); 390 391 armnn::Convolution2dDescriptor descriptor; 392 descriptor.m_PadLeft = 1; 393 descriptor.m_PadRight = 1; 394 descriptor.m_PadTop = 1; 395 descriptor.m_PadBottom = 1; 396 descriptor.m_StrideX = 2; 397 descriptor.m_StrideY = 2; 398 descriptor.m_DilationX = 2; 399 descriptor.m_DilationY = 2; 400 descriptor.m_BiasEnabled = true; 401 descriptor.m_DataLayout = armnn::DataLayout::NHWC; 402 403 armnn::INetworkPtr network = armnn::INetwork::Create(); 404 armnn::IConnectableLayer* const inputLayer = network->AddInputLayer(0); 405 406 armnn::IConnectableLayer* const convLayer = network->AddConvolution2dLayer(descriptor, layerName.c_str()); 407 armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0); 408 armnn::IConnectableLayer* weightsLayer = network->AddConstantLayer(weights); 409 armnn::IConnectableLayer* biasLayer = network->AddConstantLayer(biases); 410 411 weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo); 412 weightsLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(1u)); 413 414 biasLayer->GetOutputSlot(0).SetTensorInfo(biasesInfo); 415 biasLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(2u)); 416 417 inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); 418 convLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); 419 420 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); 421 convLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); 422 423 std::vector<armnn::BackendId> preferredBackends { "CpuRef" }; 424 armnn::ModelOptions modelOptions; 425 armnn::OptimizerOptionsOpaque optimizerOptions(false, false, false, 426 false, modelOptions, false); 427 std::vector<std::string> errorMessages; 428 429 // optimize the network. 430 armnn::IOptimizedNetworkPtr optNet = Optimize(*network, 431 preferredBackends, 432 runtime->GetDeviceSpec(), 433 optimizerOptions, 434 armnn::Optional<std::vector<std::string>&>(errorMessages)); 435 436 for (unsigned int i = 0; i < 2; ++i) 437 { 438 armnn::ModelOptions optimizedModelOptions; 439 auto copy = armnn::IOptimizedNetworkPtr(new armnn::IOptimizedNetwork(*optNet.get(), optimizedModelOptions), 440 &armnn::IOptimizedNetwork::Destroy); 441 442 CHECK(copy); 443 444 armnn::NetworkId netId; 445 std::string errorMessage; 446 447 CHECK(armnn::Status::Success == runtime->LoadNetwork(netId, std::move(copy), errorMessage)); 448 449 // Record the networkID for the loaded network 450 networkIds.emplace_back(netId); 451 } 452 armnn::NetworkId optNetId; 453 std::string errorMessage; 454 455 // Load the original optNet 456 CHECK(armnn::Status::Success == runtime->LoadNetwork(optNetId, std::move(optNet), errorMessage)); 457 458 std::vector<float> inputData = GenerateRandomData<float>(runtime->GetInputTensorInfo(optNetId, 0).GetNumElements()); 459 std::vector<float> outputData(runtime->GetOutputTensorInfo(optNetId, 0).GetNumElements()); 460 461 armnn::TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(optNetId, 0); 462 inputTensorInfo.SetConstant(true); 463 armnn::InputTensors inputTensors 464 { 465 { 466 0, armnn::ConstTensor(inputTensorInfo, inputData.data()) 467 } 468 }; 469 armnn::OutputTensors outputTensors 470 { 471 { 472 0, armnn::Tensor(runtime->GetOutputTensorInfo(optNetId, 0), outputData.data()) 473 } 474 }; 475 runtime->EnqueueWorkload(optNetId, inputTensors, outputTensors); 476 runtime->UnloadNetwork(optNetId); 477 478 // Record the networkID for the loaded network 479 for (unsigned int i = 0; i < networkIds.size(); ++i) 480 { 481 armnn::NetworkId netId = networkIds[i]; 482 std::vector<float> copyOutputData(runtime->GetOutputTensorInfo(netId, 0).GetNumElements()); 483 484 armnn::TensorInfo inputTensorInfo2 = runtime->GetInputTensorInfo(netId, 0); 485 inputTensorInfo2.SetConstant(true); 486 armnn::InputTensors copyInputTensors 487 { 488 { 489 0, armnn::ConstTensor(inputTensorInfo2, inputData.data()) 490 } 491 }; 492 armnn::OutputTensors copyOutputTensors 493 { 494 { 495 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), copyOutputData.data()) 496 } 497 }; 498 runtime->EnqueueWorkload(netId, copyInputTensors, copyOutputTensors); 499 runtime->UnloadNetwork(netId); 500 501 // Check results are identical to "original" version 502 for (unsigned int j = 0; j < outputData.size(); ++j) 503 { 504 CHECK(outputData[j] == copyOutputData[j]); 505 } 506 } 507 } 508 509 } 510