1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "NetworkUtils.hpp"
7
8 #include "SubgraphViewSelector.hpp"
9
10 #include <armnn/Exceptions.hpp>
11 #include <armnn/BackendRegistry.hpp>
12
13 namespace armnn
14 {
15
16 namespace
17 {
18
UpdateOutputSlotToFp32(OutputSlot & outputSlot)19 void UpdateOutputSlotToFp32(OutputSlot& outputSlot)
20 {
21 const TensorInfo& origTensorInfo = outputSlot.GetTensorInfo();
22 TensorInfo newTensorInfo(origTensorInfo);
23 newTensorInfo.SetDataType(DataType::Float32);
24 outputSlot.SetTensorInfo(newTensorInfo);
25 }
26
ChangeOutputFp16ToFp32(Layer & layer)27 void ChangeOutputFp16ToFp32(Layer& layer)
28 {
29 for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
30 {
31 if (outputSlot->GetTensorInfo().GetDataType() == DataType::Float16)
32 {
33 UpdateOutputSlotToFp32(*outputSlot);
34 }
35 }
36 }
37
38 } // anonymous namespace
39
InsertConvertFp16ToFp32LayersBefore(Graph & graph,Layer & layer,bool expectCorrectInputType)40 std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph,
41 Layer& layer,
42 bool expectCorrectInputType)
43 {
44 std::vector<ConvertFp16ToFp32Layer*> convertLayers;
45 convertLayers.reserve(layer.GetNumInputSlots());
46
47 // Insert a ConvertFp16ToFp32Layer before each input slot
48 for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
49 {
50 bool allowInsert = true;
51 if (expectCorrectInputType)
52 {
53 // Only insert ConvertFp16ToFp32Layer before FP16 input slots
54 OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
55 allowInsert =
56 connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float16;
57 }
58
59 if (allowInsert)
60 {
61 const std::string name =
62 std::string("convert_fp16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
63 layer.GetName();
64 ConvertFp16ToFp32Layer* convertLayer =
65 graph.InsertNewLayer<ConvertFp16ToFp32Layer>(*inputSlot, name.c_str());
66
67 TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
68 convertInfo.SetDataType(DataType::Float32);
69
70 convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
71
72 convertLayers.emplace_back(convertLayer);
73 }
74 }
75
76 return convertLayers;
77 }
78
InsertConvertFp32ToFp16LayersAfter(Graph & graph,Layer & layer)79 std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer)
80 {
81 const unsigned int numOutputSlots = layer.GetNumOutputSlots();
82
83 std::vector<ConvertFp32ToFp16Layer*> convertLayers;
84 convertLayers.reserve(numOutputSlots);
85
86 // Update FP16 output slots to FP32 on current layer
87 ChangeOutputFp16ToFp32(layer);
88
89 // Insert a ConvertFp32ToFp16Layer after each FP32 output slot
90 for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
91 {
92 OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
93 if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
94 {
95 const std::string name =
96 std::string("convert_fp32_to_fp16-" + std::to_string(slotIndex) + "-") + layer.GetName();
97 ConvertFp32ToFp16Layer* convertLayer =
98 graph.InsertNewLayer<ConvertFp32ToFp16Layer>(outputSlot, name.c_str());
99
100 TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
101 convertInfo.SetDataType(DataType::Float16);
102
103 convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
104
105 convertLayers.emplace_back(convertLayer);
106 }
107 }
108
109 return convertLayers;
110 }
111
InsertDebugLayerAfter(Graph & graph,Layer & layer,bool toFile)112 std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer, bool toFile)
113 {
114 std::vector<DebugLayer*> debugLayers;
115 debugLayers.reserve(layer.GetNumOutputSlots());
116
117 // Connect a DebugLayer to each output slot of the layer
118 uint32_t outputSlotIdx = 0;
119 for (auto outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
120 {
121 const std::string debugName = std::string("DebugLayerAfter") + layer.GetNameStr() + "_" +
122 std::to_string(outputSlotIdx);
123
124 DebugLayer* debugLayer =
125 graph.InsertNewLayer<DebugLayer>(*outputSlot, debugName.c_str(), toFile);
126
127 // Sets output tensor info for the debug layer.
128 ARMNN_ASSERT(debugLayer->GetInputSlot(0).GetConnectedOutputSlot() == &(*outputSlot));
129 TensorInfo debugInfo = debugLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
130
131 debugLayer->GetOutputSlot().SetTensorInfo(debugInfo);
132
133 // NOTE: It is OK to do this because DebugLayer is only supported on CpuRef
134 debugLayer->SetBackendId(Compute::CpuRef);
135
136 debugLayers.emplace_back(debugLayer);
137
138 ++outputSlotIdx;
139 }
140
141 return debugLayers;
142 }
143
144 } // namespace armnn
145