xref: /aosp_15_r20/external/armnn/src/armnn/NetworkUtils.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NetworkUtils.hpp"
7 
8 #include "SubgraphViewSelector.hpp"
9 
10 #include <armnn/Exceptions.hpp>
11 #include <armnn/BackendRegistry.hpp>
12 
13 namespace armnn
14 {
15 
16 namespace
17 {
18 
UpdateOutputSlotToFp32(OutputSlot & outputSlot)19 void UpdateOutputSlotToFp32(OutputSlot& outputSlot)
20 {
21     const TensorInfo& origTensorInfo = outputSlot.GetTensorInfo();
22     TensorInfo newTensorInfo(origTensorInfo);
23     newTensorInfo.SetDataType(DataType::Float32);
24     outputSlot.SetTensorInfo(newTensorInfo);
25 }
26 
ChangeOutputFp16ToFp32(Layer & layer)27 void ChangeOutputFp16ToFp32(Layer& layer)
28 {
29     for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
30     {
31         if (outputSlot->GetTensorInfo().GetDataType() == DataType::Float16)
32         {
33             UpdateOutputSlotToFp32(*outputSlot);
34         }
35     }
36 }
37 
38 } // anonymous namespace
39 
InsertConvertFp16ToFp32LayersBefore(Graph & graph,Layer & layer,bool expectCorrectInputType)40 std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph,
41                                                                          Layer& layer,
42                                                                          bool expectCorrectInputType)
43 {
44     std::vector<ConvertFp16ToFp32Layer*> convertLayers;
45     convertLayers.reserve(layer.GetNumInputSlots());
46 
47     // Insert a ConvertFp16ToFp32Layer before each input slot
48     for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
49     {
50         bool allowInsert = true;
51         if (expectCorrectInputType)
52         {
53             // Only insert ConvertFp16ToFp32Layer before FP16 input slots
54             OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
55             allowInsert =
56                 connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float16;
57         }
58 
59         if (allowInsert)
60         {
61             const std::string name =
62                 std::string("convert_fp16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
63                 layer.GetName();
64             ConvertFp16ToFp32Layer* convertLayer =
65                 graph.InsertNewLayer<ConvertFp16ToFp32Layer>(*inputSlot, name.c_str());
66 
67             TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
68             convertInfo.SetDataType(DataType::Float32);
69 
70             convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
71 
72             convertLayers.emplace_back(convertLayer);
73         }
74     }
75 
76     return convertLayers;
77 }
78 
InsertConvertFp32ToFp16LayersAfter(Graph & graph,Layer & layer)79 std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer)
80 {
81     const unsigned int numOutputSlots = layer.GetNumOutputSlots();
82 
83     std::vector<ConvertFp32ToFp16Layer*> convertLayers;
84     convertLayers.reserve(numOutputSlots);
85 
86     // Update FP16 output slots to FP32 on current layer
87     ChangeOutputFp16ToFp32(layer);
88 
89     // Insert a ConvertFp32ToFp16Layer after each FP32 output slot
90     for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
91     {
92         OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
93         if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
94         {
95             const std::string name =
96                 std::string("convert_fp32_to_fp16-" + std::to_string(slotIndex) + "-") + layer.GetName();
97             ConvertFp32ToFp16Layer* convertLayer =
98                 graph.InsertNewLayer<ConvertFp32ToFp16Layer>(outputSlot, name.c_str());
99 
100             TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
101             convertInfo.SetDataType(DataType::Float16);
102 
103             convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
104 
105             convertLayers.emplace_back(convertLayer);
106         }
107     }
108 
109     return convertLayers;
110 }
111 
InsertDebugLayerAfter(Graph & graph,Layer & layer,bool toFile)112 std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer, bool toFile)
113 {
114     std::vector<DebugLayer*> debugLayers;
115     debugLayers.reserve(layer.GetNumOutputSlots());
116 
117     // Connect a DebugLayer to each output slot of the layer
118     uint32_t outputSlotIdx = 0;
119     for (auto outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
120     {
121         const std::string debugName = std::string("DebugLayerAfter") + layer.GetNameStr() + "_" +
122             std::to_string(outputSlotIdx);
123 
124         DebugLayer* debugLayer =
125             graph.InsertNewLayer<DebugLayer>(*outputSlot, debugName.c_str(), toFile);
126 
127         // Sets output tensor info for the debug layer.
128         ARMNN_ASSERT(debugLayer->GetInputSlot(0).GetConnectedOutputSlot() == &(*outputSlot));
129         TensorInfo debugInfo = debugLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
130 
131         debugLayer->GetOutputSlot().SetTensorInfo(debugInfo);
132 
133         // NOTE: It is OK to do this because DebugLayer is only supported on CpuRef
134         debugLayer->SetBackendId(Compute::CpuRef);
135 
136         debugLayers.emplace_back(debugLayer);
137 
138         ++outputSlotIdx;
139     }
140 
141     return debugLayers;
142 }
143 
144 } // namespace armnn
145