xref: /aosp_15_r20/external/tensorflow/tensorflow/core/kernels/eigen_pooling.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_
17 #define TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_
18 
19 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
20 
21 namespace Eigen {
22 
23 /** SpatialMaxPooling
24  * \ingroup CXX11_NeuralNetworks_Module
25  *
26  * \brief Applies a max-pooling over a multichannel input image.
27  *
28  * The input parameter is expected to be a with a rank of 4 (channels, height,
29  * width, others in col-major, and the reverse of that in row-major).
30  *
31  * The result can be assigned to a tensor of rank equal to the rank of the
32  * input. The dimensions of the result will be channels, height, width, and
33  * others (in col-major, and the reverse of that if the input was row-major).
34  *
35  * The order of the width and height dimensions can be swapped if needed.
36  *
37  */
38 template <typename Input>
39 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
40     const Eigen::DSizes<typename internal::traits<Input>::Index,
41                         internal::traits<Input>::NumDimensions>,
42     const TensorReductionOp<
43         internal::MaxReducer<
44             std::remove_const_t<typename internal::traits<Input>::Scalar>>,
45         std::conditional_t<
46             internal::traits<Input>::Layout == ColMajor,
47             const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2>>,
48             const Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>>>,
49         const TensorImagePatchOp<Dynamic, Dynamic, const Input>>>
50 SpatialMaxPooling(const Input& input, DenseIndex patchRows,
51                   DenseIndex patchCols, DenseIndex strideRows,
52                   DenseIndex strideCols, const PaddingType padding_type,
53                   DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1) {
54   EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 4,
55                       YOU_MADE_A_PROGRAMMING_MISTAKE);
56 
57   typedef typename internal::traits<Input>::Index TensorIndex;
58   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
59                    internal::traits<Input>::NumDimensions,
60                    internal::traits<Input>::Layout, TensorIndex> >
61       in(input);
62 
63   const DenseIndex patchRowsEff =
64       patchRows + (patchRows - 1) * (in_strideRows - 1);
65   const DenseIndex patchColsEff =
66       patchCols + (patchCols - 1) * (in_strideCols - 1);
67 
68   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
69   static const int idxRows = isColMajor ? 1 : 2;
70   static const int idxCols = isColMajor ? 2 : 1;
71 
72   // Molds the output of the reduction into the shape expected by the user.
73   // (assuming col-major):
74   // - 1st dim: channels
75   // - 2nd dim: output height
76   // - 3rd dim: output width
77   // - 4th dim and beyond: everything else including batch size
78   Eigen::DSizes<TensorIndex, internal::traits<Input>::NumDimensions>
79       post_reduce_dims;
80   post_reduce_dims[0] = in.dimension(0);
81   if (padding_type == PADDING_VALID) {
82     post_reduce_dims[idxRows] = Eigen::divup(
83         static_cast<DenseIndex>(in.dimension(idxRows)) - patchRowsEff + 1,
84         strideRows);
85     post_reduce_dims[idxCols] = Eigen::divup(
86         static_cast<DenseIndex>(in.dimension(idxCols)) - patchColsEff + 1,
87         strideCols);
88   } else {
89     post_reduce_dims[idxRows] = Eigen::divup(
90         static_cast<DenseIndex>(in.dimension(idxRows)), strideRows);
91     post_reduce_dims[idxCols] = Eigen::divup(
92         static_cast<DenseIndex>(in.dimension(idxCols)), strideCols);
93   }
94   post_reduce_dims[3] = in.dimension(3);
95 
96   // Take advantage of cxx11 to give the compiler information it can use to
97   // optimize the code.
98   std::conditional_t<
99       internal::traits<Input>::Layout == ColMajor,
100       const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2>>,
101       const Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>>>
102       reduction_dims;
103 
104   return input
105       .extract_image_patches(
106           patchRows, patchCols, strideRows, strideCols, in_strideRows,
107           in_strideCols, padding_type,
108           Eigen::NumTraits<std::remove_const_t<
109               typename internal::traits<Input>::Scalar>>::lowest())
110       .maximum(reduction_dims)
111       .reshape(post_reduce_dims);
112 }
113 
114 /** CuboidMaxPooling
115  * \ingroup CXX11_NeuralNetworks_Module
116  *
117  * \brief Applies a max-pooling over a multichannel input volume.
118  *
119  * The input parameter is expected to be a tensor with a rank of 5 (channels,
120  * depth, height, width, others in col-major, and the reverse of that in
121  * row-major).
122  *
123  * The result can be assigned to a tensor of rank equal to the rank of the
124  * input. The dimensions of the result will be channels, depth, height, width,
125  * and others (in col-major, and the reverse of that if the input was
126  * row-major).
127  *
128  * The order of the depth, width and height dimensions can be swapped if
129  * needed.
130  *
131  */
132 template <typename Input>
133 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
134     const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
135     const TensorReductionOp<
136         internal::MaxReducer<float>,
137         const Eigen::IndexList<Eigen::type2index<1> >,
138         const TensorReshapingOp<
139             const Eigen::DSizes<DenseIndex, 3>,
140             const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
141                                       const Input> > > >
CuboidMaxPooling(const Input & input,DenseIndex patchPlanes,DenseIndex patchRows,DenseIndex patchCols,DenseIndex stridePlanes,DenseIndex strideRows,DenseIndex strideCols,const PaddingType padding_type)142 CuboidMaxPooling(const Input& input, DenseIndex patchPlanes,
143                  DenseIndex patchRows, DenseIndex patchCols,
144                  DenseIndex stridePlanes, DenseIndex strideRows,
145                  DenseIndex strideCols, const PaddingType padding_type) {
146   EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 5,
147                       YOU_MADE_A_PROGRAMMING_MISTAKE);
148   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
149 
150   typedef typename internal::traits<Input>::Index TensorIndex;
151   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
152                    internal::traits<Input>::NumDimensions,
153                    internal::traits<Input>::Layout, TensorIndex> >
154       in(input);
155 
156   static const int idxPlanes = isColMajor ? 1 : 3;
157   static const int idxRows = 2;
158   static const int idxCols = isColMajor ? 3 : 1;
159 
160   // Molds the output of the reduction into the shape expected by the used
161   // (assuming col-major):
162   // - 1st dim: channels
163   // - 2nd dim: output depth
164   // - 3rd dim: output height
165   // - 4th dim: output width
166   // - 5th dim and beyond: everything else including batch size
167   Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>
168       post_reduce_dims;
169   post_reduce_dims[0] = in.dimension(0);
170   if (padding_type == PADDING_VALID) {
171     post_reduce_dims[idxPlanes] = Eigen::divup(
172         static_cast<DenseIndex>(in.dimension(idxPlanes)) - patchPlanes + 1,
173         stridePlanes);
174     post_reduce_dims[idxRows] = Eigen::divup(
175         static_cast<DenseIndex>(in.dimension(idxRows)) - patchRows + 1,
176         strideRows);
177     post_reduce_dims[idxCols] = Eigen::divup(
178         static_cast<DenseIndex>(in.dimension(idxCols)) - patchCols + 1,
179         strideCols);
180   } else {
181     post_reduce_dims[idxPlanes] = Eigen::divup(
182         static_cast<DenseIndex>(in.dimension(idxPlanes)), stridePlanes);
183     post_reduce_dims[idxRows] = Eigen::divup(
184         static_cast<DenseIndex>(in.dimension(idxRows)), strideRows);
185     post_reduce_dims[idxCols] = Eigen::divup(
186         static_cast<DenseIndex>(in.dimension(idxCols)), strideCols);
187   }
188   post_reduce_dims[4] = in.dimension(4);
189 
190   Eigen::DSizes<DenseIndex, 3> pre_reduce_dims;
191   pre_reduce_dims[1] = patchRows * patchCols * patchPlanes;
192   if (isColMajor) {
193     pre_reduce_dims[0] = post_reduce_dims[0];
194     pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] *
195                          post_reduce_dims[3] * post_reduce_dims[4];
196   } else {
197     pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] *
198                          post_reduce_dims[2] * post_reduce_dims[3];
199     pre_reduce_dims[2] = post_reduce_dims[4];
200   }
201 
202   // Take advantage of cxx11 to give the compiler information it can use to
203   // optimize the code.
204   Eigen::IndexList<Eigen::type2index<1> > reduction_dims;
205   return input
206       .extract_volume_patches(patchPlanes, patchRows, patchCols, stridePlanes,
207                               strideRows, strideCols, padding_type,
208                               -Eigen::NumTraits<float>::highest())
209       .reshape(pre_reduce_dims)
210       .maximum(reduction_dims)
211       .reshape(post_reduce_dims);
212 }
213 
214 /** SpatialAvgPooling
215  * \ingroup CXX11_NeuralNetworks_Module
216  *
217  * \brief Applies an average pooling over a multichannel input image.
218  *
219  * The input parameter is expected to be a tensor with a rank of 4 (channels,
220  * height, width, others in col-major, and the reverse of that in row-major).
221  *
222  * The result can be assigned to a tensor of rank equal to the rank of the
223  * input. The dimensions of the result will be channels, height, width, and
224  * others (in col-major, and the reverse of that if the input was row-major).
225  *
226  * The order of the width and height dimensions can be swapped if needed.
227  *
228  */
229 namespace internal {
230 
231 template <typename T>
232 struct AvgPoolMeanReducer {
233 #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) && \
234     !defined(__HIPCC__)
235   // We only support packet access for floats.
236   static constexpr bool PacketAccess = internal::is_same<T, float>::value;
237 #else
238   static const bool PacketAccess = false;
239 #endif
240   static constexpr bool IsStateful = true;
241 
AvgPoolMeanReducerAvgPoolMeanReducer242   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE AvgPoolMeanReducer() : scalarCount_(0) {
243     typedef typename packet_traits<T>::type Packet;
244 #if defined(__HIPCC__)
245     packetCount_ = 0;
246 #else
247     packetCount_ = pset1<Packet>(T(0.0));
248 #endif
249   }
250 
reduceAvgPoolMeanReducer251   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) {
252     if (t != -Eigen::NumTraits<T>::highest()) {
253       (*accum) = (*accum) + t;
254       scalarCount_++;
255     }
256   }
257 
initializeAvgPoolMeanReducer258   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
259     return static_cast<T>(0);
260   }
261 
finalizeAvgPoolMeanReducer262   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
263     eigen_assert(scalarCount_ > 0);
264     return accum / T(scalarCount_);
265   }
266 
267 #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) && \
268     !defined(__HIPCC__)
269 #ifdef EIGEN_VECTORIZE_AVX512
270 #define pequal(a, b)   \
271   _mm512_castsi512_ps( \
272       _mm512_maskz_set1_epi32(_mm512_cmp_ps_mask(a, b, _CMP_EQ_UQ), -1))
273 
274   // The ternarylogic function immediate determines the values in the result
275   // In the case below, 0xd8 implies (false_mask) ? (b) : (a)
276   // For details, refer to the vpternlogd instruction table at
277   // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-2c-manual.pdf
278 
279 #define psel(a, b, false_mask)                        \
280   _mm512_castsi512_ps(_mm512_ternarylogic_epi32(      \
281       _mm512_castps_si512(a), _mm512_castps_si512(b), \
282       _mm512_castps_si512(false_mask), 0xd8))
283 #elif defined EIGEN_VECTORIZE_AVX
284 #define pequal(a, b) _mm256_cmp_ps(a, b, _CMP_EQ_UQ)
285 #define psel(a, b, false_mask) _mm256_blendv_ps(a, b, false_mask)
286 #else
287 #define pequal(a, b) _mm_cmpeq_ps(a, b)
288 #define psel(a, b, false_mask) \
289   _mm_or_ps(_mm_andnot_ps(false_mask, a), _mm_and_ps(false_mask, b))
290 #endif
291 
292   template <typename Packet>
reducePacketAvgPoolMeanReducer293   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p,
294                                                           Packet* accum) {
295     reducePacketWithType(static_cast<T>(0), p, accum);
296   }
297 
298   template <typename Packet>
reducePacketWithTypeAvgPoolMeanReducer299   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacketWithType(
300       T, const Packet& p, Packet* accum) {
301     Packet skip_mask =
302         pequal(p, pset1<Packet>(-Eigen::NumTraits<T>::highest()));
303     (*accum) = padd<Packet>(*accum, psel(p, pset1<Packet>(0), skip_mask));
304     packetCount_ = padd<Packet>(
305         packetCount_, psel(pset1<Packet>(1), pset1<Packet>(0), skip_mask));
306   }
307 
308   template <typename Packet>
initializePacketAvgPoolMeanReducer309   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
310     return pset1<Packet>(0);
311   }
312 
313   template <typename Packet>
314   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
finalizePacketAvgPoolMeanReducer315   finalizePacket(const Packet& vaccum) const {
316     return pdiv(vaccum, packetCount_);
317   }
318   template <typename Packet>
319   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T
finalizeBothAvgPoolMeanReducer320   finalizeBoth(const T saccum, const Packet& vaccum) const {
321     return (saccum + predux(vaccum)) / (scalarCount_ + predux(packetCount_));
322   }
323 #endif
324 
325  protected:
326   typedef typename packet_traits<T>::type Packet;
327   int scalarCount_;
328 #if defined(__HIPCC__)
329   int packetCount_;
330 #else
331   Packet packetCount_;
332 #endif
333 };
334 
335 template <typename Device>
336 struct reducer_traits<AvgPoolMeanReducer<float>, Device> {
337   enum {
338     Cost = 1,
339 #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) && \
340     !defined(__HIPCC__)
341     // We only support packet access for floats.
342     PacketAccess = true,
343 #else
344     PacketAccess = false,
345 #endif
346     IsStateful = true,
347     IsExactlyAssociative = false
348   };
349 };
350 
351 template <>
352 struct reducer_traits<AvgPoolMeanReducer<float>, GpuDevice> {
353   enum {
354     Cost = 1,
355     PacketAccess = false,
356     IsStateful = true,
357     IsExactlyAssociative = false
358   };
359 };
360 
361 }  // namespace internal
362 
363 template <typename Input>
364 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
365     const Eigen::DSizes<typename internal::traits<Input>::Index,
366                         internal::traits<Input>::NumDimensions>,
367     const TensorReductionOp<
368         internal::AvgPoolMeanReducer<
369             std::remove_const_t<typename internal::traits<Input>::Scalar>>,
370         std::conditional_t<
371             internal::traits<Input>::Layout == ColMajor,
372             const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2>>,
373             const Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>>>,
374         const TensorImagePatchOp<Dynamic, Dynamic, const Input>>>
375 SpatialAvgPooling(const Input& input, DenseIndex patchRows,
376                   DenseIndex patchCols, DenseIndex strideRows,
377                   DenseIndex strideCols, const PaddingType padding_type,
378                   DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1) {
379   EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 4,
380                       YOU_MADE_A_PROGRAMMING_MISTAKE);
381 
382   typedef typename internal::traits<Input>::Index TensorIndex;
383   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
384                    internal::traits<Input>::NumDimensions,
385                    internal::traits<Input>::Layout, TensorIndex> >
386       in(input);
387 
388   const DenseIndex patchRowsEff =
389       patchRows + (patchRows - 1) * (in_strideRows - 1);
390   const DenseIndex patchColsEff =
391       patchCols + (patchCols - 1) * (in_strideCols - 1);
392 
393   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
394   static const int idxRows = isColMajor ? 1 : 2;
395   static const int idxCols = isColMajor ? 2 : 1;
396 
397   // Molds the output of the reduction into the shape expected by the user.
398   // (assuming col-major):
399   // - 1st dim: channels
400   // - 2nd dim: output height
401   // - 3rd dim: output width
402   // - 4th dim and beyond: everything else including batch size
403   Eigen::DSizes<TensorIndex, internal::traits<Input>::NumDimensions>
404       post_reduce_dims;
405   post_reduce_dims[0] = in.dimension(0);
406   if (padding_type == PADDING_VALID) {
407     post_reduce_dims[idxRows] = Eigen::divup(
408         static_cast<DenseIndex>(in.dimension(idxRows)) - patchRowsEff + 1,
409         strideRows);
410     post_reduce_dims[idxCols] = Eigen::divup(
411         static_cast<DenseIndex>(in.dimension(idxCols)) - patchColsEff + 1,
412         strideCols);
413   } else {
414     post_reduce_dims[idxRows] = Eigen::divup(
415         static_cast<DenseIndex>(in.dimension(idxRows)), strideRows);
416     post_reduce_dims[idxCols] = Eigen::divup(
417         static_cast<DenseIndex>(in.dimension(idxCols)), strideCols);
418   }
419   post_reduce_dims[3] = in.dimension(3);
420 
421   typedef std::remove_const_t<typename internal::traits<Input>::Scalar>
422       CoeffReturnType;
423   internal::AvgPoolMeanReducer<CoeffReturnType> mean_with_nan;
424 
425   // Take advantage of cxx11 to give the compiler information it can use to
426   // optimize the code.
427   std::conditional_t<
428       internal::traits<Input>::Layout == ColMajor,
429       const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2>>,
430       const Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>>>
431       reduction_dims;
432   return input
433       .extract_image_patches(patchRows, patchCols, strideRows, strideCols,
434                              in_strideRows, in_strideCols, padding_type,
435                              -Eigen::NumTraits<CoeffReturnType>::highest())
436       .reduce(reduction_dims, mean_with_nan)
437       .reshape(post_reduce_dims);
438 }
439 
440 /** CuboidAvgPooling
441  * \ingroup CXX11_NeuralNetworks_Module
442  *
443  * \brief Applies an average pooling over a multichannel input volume.
444  *
445  * The input parameter is expected to be a tensor with a rank of 5 (channels,
446  * depth, height, width, others, and the reverse of that in row-major).
447  *
448  * The result can be assigned to a tensor of rank equal to the rank of the
449  * input. The dimensions of the result will be channels, depth, width, and
450  * others (in col-major, and the reverse of that if the input was row-major).
451  *
452  * The order of the depth, width and height dimensions can be swapped if
453  * needed.
454  *
455  */
456 template <typename Input>
457 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
458     const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
459     const TensorReductionOp<
460         internal::AvgPoolMeanReducer<float>,
461         const Eigen::IndexList<Eigen::type2index<1> >,
462         const TensorReshapingOp<
463             const Eigen::DSizes<DenseIndex, 3>,
464             const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
465                                       const Input> > > >
466 CuboidAvgPooling(const Input& input, DenseIndex patchPlanes,
467                  DenseIndex patchRows, DenseIndex patchCols,
468                  DenseIndex stridePlanes, DenseIndex strideRows,
469                  DenseIndex strideCols, const PaddingType padding_type) {
470   EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 5,
471                       YOU_MADE_A_PROGRAMMING_MISTAKE);
472   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
473 
474   typedef typename internal::traits<Input>::Index TensorIndex;
475   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
476                    internal::traits<Input>::NumDimensions,
477                    internal::traits<Input>::Layout, TensorIndex> >
478       in(input);
479 
480   static const int idxPlanes = isColMajor ? 1 : 3;
481   static const int idxRows = 2;
482   static const int idxCols = isColMajor ? 3 : 1;
483   // Molds the output of the reduction into the shape expected by the used
484   // (assuming col-major):
485   // - 1st dim: channels
486   // - 2nd dim: outupt depth
487   // - 3rd dim: output height
488   // - 4th dim: output width
489   // - 5th dim and beyond: everything else including batch size
490   Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>
491       post_reduce_dims;
492   post_reduce_dims[0] = in.dimension(0);
493   if (padding_type == PADDING_VALID) {
494     post_reduce_dims[idxPlanes] = Eigen::divup(
495         static_cast<DenseIndex>(in.dimension(idxPlanes)) - patchPlanes + 1,
496         stridePlanes);
497     post_reduce_dims[idxRows] = Eigen::divup(
498         static_cast<DenseIndex>(in.dimension(idxRows)) - patchRows + 1,
499         strideRows);
500     post_reduce_dims[idxCols] = Eigen::divup(
501         static_cast<DenseIndex>(in.dimension(idxCols)) - patchCols + 1,
502         strideCols);
503   } else {
504     post_reduce_dims[idxPlanes] = Eigen::divup(
505         static_cast<DenseIndex>(in.dimension(idxPlanes)), stridePlanes);
506     post_reduce_dims[idxRows] = Eigen::divup(
507         static_cast<DenseIndex>(in.dimension(idxRows)), strideRows);
508     post_reduce_dims[idxCols] = Eigen::divup(
509         static_cast<DenseIndex>(in.dimension(idxCols)), strideCols);
510   }
511   post_reduce_dims[4] = in.dimension(4);
512 
513   Eigen::DSizes<DenseIndex, 3> pre_reduce_dims;
514   pre_reduce_dims[1] = patchRows * patchCols * patchPlanes;
515   if (isColMajor) {
516     pre_reduce_dims[0] = post_reduce_dims[0];
517     pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] *
518                          post_reduce_dims[3] * post_reduce_dims[4];
519   } else {
520     pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] *
521                          post_reduce_dims[2] * post_reduce_dims[3];
522     pre_reduce_dims[2] = post_reduce_dims[4];
523   }
524 
525   typedef std::remove_const_t<typename internal::traits<Input>::Scalar>
526       CoeffReturnType;
527   internal::AvgPoolMeanReducer<CoeffReturnType> mean_with_nan;
528 
529   // Take advantage of cxx11 to give the compiler information it can use to
530   // optimize the code.
531   Eigen::IndexList<Eigen::type2index<1> > reduction_dims;
532   return input
533       .extract_volume_patches(patchPlanes, patchRows, patchCols, stridePlanes,
534                               strideRows, strideCols, padding_type,
535                               -Eigen::NumTraits<float>::highest())
536       .reshape(pre_reduce_dims)
537       .reduce(reduction_dims, mean_with_nan)
538       .reshape(post_reduce_dims);
539 }
540 
541 }  // end namespace Eigen
542 
543 #endif  // TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_
544