xref: /aosp_15_r20/external/armnn/src/backends/reference/workloads/Pooling3d.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "Pooling3d.hpp"
7 
8 #include <armnn/Exceptions.hpp>
9 #include <armnn/Types.hpp>
10 
11 #include <armnnUtils/DataLayoutIndexed.hpp>
12 #include <armnn/utility/NumericCast.hpp>
13 
14 #include <limits>
15 #include <algorithm>
16 #include <functional>
17 
18 namespace
19 {
20     using PoolingAlgorithm = armnn::PoolingAlgorithm;
21 
DefaultInitializer(PoolingAlgorithm algorithm)22     float DefaultInitializer(PoolingAlgorithm algorithm)
23     {
24         switch (algorithm)
25         {
26             case PoolingAlgorithm::Max:
27             {
28                 return std::numeric_limits<float>::lowest();
29             }
30             case PoolingAlgorithm::Average:
31             case PoolingAlgorithm::L2:
32             {
33                 return 0.0f;
34             }
35             default:
36             {
37                 throw armnn::InvalidArgumentException("Unsupported pooling algorithm");
38             }
39         }
40     }
41 
42     using Accumulator = std::function<void(float & accu, float value)>;
43 
GetAccumulator(PoolingAlgorithm algorithm)44     Accumulator GetAccumulator(PoolingAlgorithm algorithm)
45     {
46         switch (algorithm)
47         {
48             case PoolingAlgorithm::Max:
49             {
50                 return [](float & accu, float value) {
51                     if (value > accu) {
52                         accu = value;
53                     }
54                 };
55             }
56 
57             case PoolingAlgorithm::Average:
58             {
59                 return [](float & accu, float value) {
60                     accu += value;
61                 };
62             }
63 
64             case PoolingAlgorithm::L2:
65             {
66                 return [](float & accu, float value) {
67                     accu += (value*value);
68                 };
69             }
70 
71             default:
72             {
73                 throw armnn::InvalidArgumentException("Unsupported pooling algorithm");
74             }
75         }
76     }
77 
78     using Executor = std::function<void(float & accumulated, float kernelSize)>;
79 
GetExecutor(PoolingAlgorithm algorithm)80     Executor GetExecutor(PoolingAlgorithm algorithm)
81     {
82         switch (algorithm)
83         {
84             case PoolingAlgorithm::Max:
85             {
86                 return [](float & /*accumulated*/, float /*kernelSize*/) {};
87             }
88 
89             case PoolingAlgorithm::Average:
90             {
91                 return [](float & accumulated, float kernelSize) {
92                     accumulated /= kernelSize;
93                 };
94             }
95 
96             case PoolingAlgorithm::L2:
97             {
98                 return [](float & accumulated, float kernelSize) {
99                     accumulated = sqrtf(accumulated / kernelSize);
100                 };
101             }
102 
103             default:
104             {
105                 throw armnn::InvalidArgumentException("Unsupported pooling algorithm");
106             }
107         }
108     }
109 
OnPaddingOnly(int start,int end,int maxRange)110     bool OnPaddingOnly(int start, int end, int maxRange)
111     {
112         if (end <= 0 || start > maxRange)
113         {
114             return true;
115         }
116         else
117         {
118             return false;
119         }
120     }
121 
122 
ClampRange(int & start,int & end,int maxRange)123     bool ClampRange(int & start, int & end, int maxRange)
124     {
125         if (start < 0 || end > maxRange)
126         {
127             start = std::min(std::max(start, 0), maxRange);
128             end   = std::min(std::max(end, 0), maxRange);
129             return true;
130         }
131         else
132         {
133             return false;
134         }
135     }
136 
CalculateIndex(int channels,int depth,int height,int width,int n,int c,int z,int y,int x,armnnUtils::DataLayoutIndexed dataLayout)137     int CalculateIndex(int channels, int depth, int height, int width,
138                              int n, int c, int z, int y, int x,
139                             armnnUtils::DataLayoutIndexed dataLayout) {
140         switch (dataLayout.GetDataLayout())
141         {
142             case armnn::DataLayout::NDHWC:
143             {
144                 int outputIndex = n * depth * height * width * channels +
145                             z * height * width * channels +
146                             y * width * channels +
147                             x * channels +
148                             c;
149                 return outputIndex;
150             }
151             case armnn::DataLayout::NCDHW:
152             {
153                 int outputIndex = n * channels * depth * height * width +
154                             c * depth * height * width +
155                             z * height * width +
156                             y * width +
157                             x;
158                 return outputIndex;
159             }
160             default:
161             {
162                 throw armnn::InvalidArgumentException("Unsupported data layout.");
163             }
164         }
165     }
166 }
167 
168 using namespace armnnUtils;
169 
170 namespace armnn
171 {
Pooling3d(Decoder<float> & rInputDecoder,Encoder<float> & rOutputEncoder,const TensorInfo & inputInfo,const TensorInfo & outputInfo,const Pooling3dDescriptor & params)172 void Pooling3d(Decoder<float>& rInputDecoder,
173                Encoder<float>& rOutputEncoder,
174                const TensorInfo& inputInfo,
175                const TensorInfo& outputInfo,
176                const Pooling3dDescriptor& params)
177 {
178     const DataLayoutIndexed dataLayout(params.m_DataLayout);
179 
180     auto channelsIndex = dataLayout.GetChannelsIndex();
181 
182     auto depthIndex = dataLayout.GetDepthIndex();
183     auto heightIndex = dataLayout.GetHeightIndex();
184     auto widthIndex = dataLayout.GetWidthIndex();
185 
186     const int batchSize    = armnn::numeric_cast<int>(outputInfo.GetShape()[0]);
187     const int channels     = armnn::numeric_cast<int>(outputInfo.GetShape()[channelsIndex]);
188 
189     const int depthOutput  = armnn::numeric_cast<int>(outputInfo.GetShape()[depthIndex]);
190     const int heightOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[heightIndex]);
191     const int widthOutput  = armnn::numeric_cast<int>(outputInfo.GetShape()[widthIndex]);
192 
193     const int depthInput   = armnn::numeric_cast<int>(inputInfo.GetShape()[depthIndex]);
194     const int heightInput  = armnn::numeric_cast<int>(inputInfo.GetShape()[heightIndex]);
195     const int widthInput   = armnn::numeric_cast<int>(inputInfo.GetShape()[widthIndex]);
196 
197     const int padLeft      = armnn::numeric_cast<int>(params.m_PadLeft);
198     const int padRight     = armnn::numeric_cast<int>(params.m_PadRight);
199     const int padTop       = armnn::numeric_cast<int>(params.m_PadTop);
200     const int padBottom    = armnn::numeric_cast<int>(params.m_PadBottom);
201     const int padFront     = armnn::numeric_cast<int>(params.m_PadFront);
202     const int padBack      = armnn::numeric_cast<int>(params.m_PadBack);
203 
204     const int strideX      = armnn::numeric_cast<int>(params.m_StrideX);
205     const int strideY      = armnn::numeric_cast<int>(params.m_StrideY);
206     const int strideZ      = armnn::numeric_cast<int>(params.m_StrideZ);
207 
208     const int poolHeight   = armnn::numeric_cast<int>(params.m_PoolHeight);
209     const int poolWidth    = armnn::numeric_cast<int>(params.m_PoolWidth);
210     const int poolDepth    = armnn::numeric_cast<int>(params.m_PoolDepth);
211 
212     float defaultInitializer = DefaultInitializer(params.m_PoolType);
213     Accumulator accumulate = GetAccumulator(params.m_PoolType);
214     Executor execute       = GetExecutor(params.m_PoolType);
215 
216     // Check supported padding methods outside the loop to simplify
217     // the inner loop.
218     if (params.m_PaddingMethod != PaddingMethod::Exclude &&
219         params.m_PaddingMethod != PaddingMethod::IgnoreValue)
220     {
221         throw armnn::InvalidArgumentException("Unsupported padding type");
222     }
223 
224     const std::vector<float> decodedInputVec = rInputDecoder.DecodeTensor(inputInfo.GetShape());
225 
226     for (int n = 0; n < batchSize; n++)
227     {
228         for (int c = 0; c < channels; c++)
229         {
230             for (int zOutput = 0; zOutput < depthOutput; zOutput++)
231             {
232                 //  Calculate values independent of the x and y axis
233                 int dstart = (zOutput * strideZ) - padFront;
234                 int dend = dstart + poolDepth;
235                 // Clamp the pooling region inside the valid input area (which includes the padding).
236                 // This is necessary because the final pooling in a row may overlap beyond the padding.
237                 dend = std::min(dend, depthInput + padBack);
238 
239                 int depth = dend - dstart;
240                 bool dclamped = ClampRange(dstart, dend, depthInput);
241                 int depthClamped = dend - dstart;
242 
243                 for (int yOutput = 0; yOutput < heightOutput; yOutput++)
244                 {
245                     int hstart = (yOutput * strideY) - padTop;
246                     int hend = hstart + poolHeight;
247                     // Clamp the pooling region inside the valid input area (which includes the padding).
248                     // This is necessary because the final pooling in a row may overlap beyond the padding.
249                     hend = std::min(hend, heightInput + padBottom);
250 
251                     int height = hend - hstart;
252                     bool hclamped = ClampRange(hstart, hend, heightInput);
253                     int heightClamped = hend - hstart;
254 
255                     for (int xOutput = 0; xOutput < widthOutput; xOutput++)
256                     {
257                         int wstart = (xOutput * strideX) - padLeft;
258                         int wend = wstart + poolWidth;
259                         // Clamp the pooling region inside the valid input area (which includes the padding).
260                         // This is necessary because the final pooling in a row may overlap beyond the padding.
261                         wend = std::min(wend, widthInput + padRight);
262 
263                         int width = wend - wstart;
264                         bool wclamped = ClampRange(wstart, wend, widthInput);
265                         int widthClamped = wend - wstart;
266 
267                         float result = defaultInitializer;
268                         float poolAreaSize = armnn::numeric_cast<float>(depth * height * width);
269 
270                         // Special case: when the pooling kernel is over a padding region and the padding
271                         //               size is larger or equal to the kernel and the kernel only covers
272                         //               padding and no real values, then we initialize the result as zero
273                         //               by convention. This is because we need to choose a value here and
274                         //               all values we have are padding, which we ignore.
275                         if (OnPaddingOnly(dstart, dend, depthInput) ||
276                             OnPaddingOnly(hstart, hend, heightInput) ||
277                             OnPaddingOnly(wstart, wend, widthInput))
278                         {
279                             result = 0.0f;
280 
281                             int outputIndex = CalculateIndex(channels, depthOutput, heightOutput, widthOutput,
282                                 n, c, zOutput, yOutput, xOutput, dataLayout);
283 
284                             rOutputEncoder[static_cast<unsigned int>(outputIndex)];
285                             rOutputEncoder.Set(result);
286 
287                             continue;
288                         }
289 
290                         bool clamped = (dclamped | hclamped | wclamped);
291 
292                         if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude)
293                         {
294                             // When we exclude the padding, it means we calculate with a smaller
295                             // kernel size, so I changed the divisor here.
296                             poolAreaSize = armnn::numeric_cast<float>(depthClamped * heightClamped * widthClamped);
297                         }
298 
299                         for (auto zInput = dstart; zInput < dend; zInput++)
300                         {
301                             for (auto yInput = hstart; yInput < hend; yInput++)
302                             {
303                                 for (auto xInput = wstart; xInput < wend; xInput++)
304                                 {
305 
306                                     int inputIndex = CalculateIndex(channels, depthInput, heightInput, widthInput,
307                                 n, c, zInput, yInput, xInput, dataLayout);
308 
309                                     accumulate(result, decodedInputVec[static_cast<unsigned int>(inputIndex)]);
310                                 }
311                             }
312                         }
313 
314                         execute(result, poolAreaSize);
315 
316                         int outputIndex = CalculateIndex(channels, depthOutput, heightOutput, widthOutput,
317                             n, c, zOutput, yOutput, xOutput, dataLayout);
318 
319                         rOutputEncoder[static_cast<unsigned int>(outputIndex)];
320                         rOutputEncoder.Set(result);
321                     }
322                 }
323             }
324         }
325     }
326 }
327 
328 } //namespace armnn
329