xref: /aosp_15_r20/external/armnn/src/backends/reference/workloads/Pooling2d.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "Pooling2d.hpp"
7 
8 #include <armnn/Exceptions.hpp>
9 #include <armnn/Types.hpp>
10 
11 #include <armnnUtils/DataLayoutIndexed.hpp>
12 #include <armnn/utility/NumericCast.hpp>
13 
14 #include <limits>
15 #include <algorithm>
16 #include <functional>
17 
18 namespace
19 {
20     using PoolingAlgorithm = armnn::PoolingAlgorithm;
21 
DefaultInitializer(PoolingAlgorithm algorithm)22     float DefaultInitializer(PoolingAlgorithm algorithm)
23     {
24         switch (algorithm)
25         {
26             case PoolingAlgorithm::Max:
27             {
28                 return std::numeric_limits<float>::lowest();
29             }
30             case PoolingAlgorithm::Average:
31             case PoolingAlgorithm::L2:
32             {
33                 return 0.0f;
34             }
35             default:
36             {
37                 throw armnn::InvalidArgumentException("Unsupported pooling algorithm");
38             }
39         }
40     }
41 
42     using Accumulator = std::function<void(float & accu, float value)>;
43 
GetAccumulator(PoolingAlgorithm algorithm)44     Accumulator GetAccumulator(PoolingAlgorithm algorithm)
45     {
46         switch (algorithm)
47         {
48             case PoolingAlgorithm::Max:
49             {
50                 return [](float & accu, float value) {
51                     if (value > accu) {
52                         accu = value;
53                     }
54                 };
55             }
56 
57             case PoolingAlgorithm::Average:
58             {
59                 return [](float & accu, float value) {
60                     accu += value;
61                 };
62             }
63 
64             case PoolingAlgorithm::L2:
65             {
66                 return [](float & accu, float value) {
67                     accu += (value*value);
68                 };
69             }
70 
71             default:
72             {
73                 throw armnn::InvalidArgumentException("Unsupported pooling algorithm");
74             }
75         }
76     }
77 
78     using Executor = std::function<void(float & accumulated, float kernelSize)>;
79 
GetExecutor(PoolingAlgorithm algorithm)80     Executor GetExecutor(PoolingAlgorithm algorithm)
81     {
82         switch (algorithm)
83         {
84             case PoolingAlgorithm::Max:
85             {
86                 return [](float & /*accumulated*/, float /*kernelSize*/) {};
87             }
88 
89             case PoolingAlgorithm::Average:
90             {
91                 return [](float & accumulated, float kernelSize) {
92                     accumulated /= kernelSize;
93                 };
94             }
95 
96             case PoolingAlgorithm::L2:
97             {
98                 return [](float & accumulated, float kernelSize) {
99                     accumulated = sqrtf(accumulated / kernelSize);
100                 };
101             }
102 
103             default:
104             {
105                 throw armnn::InvalidArgumentException("Unsupported pooling algorithm");
106             }
107         }
108     }
109 
OnPaddingOnly(int start,int end,int maxRange)110     bool OnPaddingOnly(int start, int end, int maxRange)
111     {
112         if (end <= 0 || start > maxRange)
113         {
114             return true;
115         }
116         else
117         {
118             return false;
119         }
120     }
121 
122 
ClampRange(int & start,int & end,int maxRange)123     bool ClampRange(int & start, int & end, int maxRange)
124     {
125         if (start < 0 || end > maxRange)
126         {
127             start = std::min(std::max(start, 0), maxRange);
128             end   = std::min(std::max(end, 0), maxRange);
129             return true;
130         }
131         else
132         {
133             return false;
134         }
135     }
136 }
137 
138 using namespace armnnUtils;
139 
140 namespace armnn
141 {
Pooling2d(Decoder<float> & rInputDecoder,Encoder<float> & rOutputEncoder,const TensorInfo & inputInfo,const TensorInfo & outputInfo,const Pooling2dDescriptor & params)142 void Pooling2d(Decoder<float>& rInputDecoder,
143                Encoder<float>& rOutputEncoder,
144                const TensorInfo& inputInfo,
145                const TensorInfo& outputInfo,
146                const Pooling2dDescriptor& params)
147 {
148     const DataLayoutIndexed dataLayout(params.m_DataLayout);
149     auto channelsIndex = dataLayout.GetChannelsIndex();
150     auto heightIndex = dataLayout.GetHeightIndex();
151     auto widthIndex = dataLayout.GetWidthIndex();
152 
153     const int batchSize    = armnn::numeric_cast<int>(outputInfo.GetShape()[0]);
154     const int channels     = armnn::numeric_cast<int>(outputInfo.GetShape()[channelsIndex]);
155     const int heightOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[heightIndex]);
156     const int widthOutput  = armnn::numeric_cast<int>(outputInfo.GetShape()[widthIndex]);
157     const int heightInput  = armnn::numeric_cast<int>(inputInfo.GetShape()[heightIndex]);
158     const int widthInput   = armnn::numeric_cast<int>(inputInfo.GetShape()[widthIndex]);
159     const int padLeft      = armnn::numeric_cast<int>(params.m_PadLeft);
160     const int padRight     = armnn::numeric_cast<int>(params.m_PadRight);
161     const int padTop       = armnn::numeric_cast<int>(params.m_PadTop);
162     const int padBottom    = armnn::numeric_cast<int>(params.m_PadBottom);
163     const int strideX      = armnn::numeric_cast<int>(params.m_StrideX);
164     const int strideY      = armnn::numeric_cast<int>(params.m_StrideY);
165     const int poolHeight   = armnn::numeric_cast<int>(params.m_PoolHeight);
166     const int poolWidth    = armnn::numeric_cast<int>(params.m_PoolWidth);
167 
168     float defaultInitializer = DefaultInitializer(params.m_PoolType);
169 
170     Accumulator accumulate = GetAccumulator(params.m_PoolType);
171     Executor execute       = GetExecutor(params.m_PoolType);
172 
173     // Check supported padding methods outside the loop to simplify
174     // the inner loop.
175     if (params.m_PaddingMethod != PaddingMethod::Exclude &&
176         params.m_PaddingMethod != PaddingMethod::IgnoreValue)
177     {
178         throw armnn::InvalidArgumentException("Unsupported padding type");
179     }
180 
181     const std::vector<float> decodedInputVec = rInputDecoder.DecodeTensor(inputInfo.GetShape());
182 
183     for (int n = 0; n < batchSize; n++)
184     {
185         for (int c = 0; c < channels; c++)
186         {
187             for (int yOutput = 0; yOutput < heightOutput; yOutput++)
188             {
189                 //  Calculate values independent of the x axis
190                 int hstart = (yOutput * strideY) - padTop;
191                 int hend = hstart + poolHeight;
192                 // Clamp the pooling region inside the valid input area (which includes the padding).
193                 // This is necessary because the final pooling in a row may overlap beyond the padding.
194                 hend = std::min(hend, heightInput + padBottom);
195 
196                 int height = hend - hstart;
197                 bool hclamped = ClampRange(hstart, hend, heightInput);
198 
199                 for (int xOutput = 0; xOutput < widthOutput; xOutput++)
200                 {
201                     int wstart = (xOutput * strideX) - padLeft;
202                     int wend = wstart + poolWidth;
203 
204                     // Clamp the pooling region inside the valid input area (which includes the padding).
205                     // This is necessary because the final pooling in a row may overlap beyond the padding.
206                     wend = std::min(wend, widthInput + padRight);
207 
208                     float result = defaultInitializer;
209                     float poolAreaSize = armnn::numeric_cast<float>(height * (wend - wstart));
210 
211                     // Special case: when the pooling kernel is over a padding region and the padding
212                     //               size is larger or equal to the kernel and the kernel only covers
213                     //               padding and no real values, then we initialize the result as zero
214                     //               by convention. This is because we need to choose a value here and
215                     //               all values we have are padding, which we ignore.
216                     if (OnPaddingOnly(hstart, hend, heightInput) ||
217                         OnPaddingOnly(wstart, wend, widthInput))
218                     {
219                         result = 0.0f;
220 
221                         int outputIndex;
222 
223                         if(dataLayout.GetDataLayout() == DataLayout::NHWC)
224                         {
225                             outputIndex = n * heightOutput * widthOutput * channels +
226                                           yOutput * widthOutput * channels +
227                                           xOutput * channels +
228                                           c;
229                         }
230                         else
231                         {
232                             outputIndex = n * heightOutput * widthOutput * channels +
233                                           c * heightOutput * widthOutput +
234                                           yOutput * widthOutput +
235                                           xOutput;
236                         }
237 
238                         rOutputEncoder[static_cast<unsigned int>(outputIndex)];
239                         rOutputEncoder.Set(result);
240                         continue;
241                     }
242 
243                     bool clamped = hclamped |= ClampRange(wstart, wend, widthInput);
244 
245                     if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude)
246                     {
247                         // When we exclude the padding, it means we calculate with a smaller
248                         // kernel size, so I changed the divisor here.
249                         poolAreaSize = armnn::numeric_cast<float>((hend - hstart) * (wend - wstart));
250                     }
251 
252                     for (auto yInput = hstart; yInput < hend; yInput++)
253                     {
254                         for (auto xInput = wstart; xInput < wend; xInput++)
255                         {
256 
257                             int inputIndex;
258                             if(dataLayout.GetDataLayout() == DataLayout::NHWC)
259                             {
260                                 inputIndex = n * heightInput * widthInput * channels +
261                                              yInput * widthInput * channels +
262                                              xInput * channels +
263                                              c;
264 
265                             }
266                             else
267                             {
268                                 inputIndex = n * heightInput * widthInput * channels +
269                                              c * heightInput * widthInput +
270                                              yInput * widthInput +
271                                              xInput;
272                             }
273 
274                             accumulate(result, decodedInputVec[static_cast<unsigned int>(inputIndex)]);
275                         }
276                     }
277 
278                     execute(result, poolAreaSize);
279 
280                     int outputIndex;
281 
282                     if(dataLayout.GetDataLayout() == DataLayout::NHWC)
283                     {
284                         outputIndex = n * heightOutput * widthOutput * channels +
285                                       yOutput * widthOutput * channels +
286                                       xOutput * channels +
287                                       c;
288                     }
289                     else
290                     {
291                         outputIndex = n * heightOutput * widthOutput * channels +
292                                       c * heightOutput * widthOutput +
293                                       yOutput * widthOutput +
294                                       xOutput;
295                     }
296 
297                     rOutputEncoder[static_cast<unsigned int>(outputIndex)];
298                     rOutputEncoder.Set(result);
299                 }
300             }
301         }
302     }
303 }
304 
305 } //namespace armnn
306