xref: /aosp_15_r20/external/tensorflow/tensorflow/core/kernels/fractional_avg_pool_op.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #define EIGEN_USE_THREADS
17 
18 #include <algorithm>
19 #include <cmath>
20 #include <random>
21 #include <vector>
22 
23 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
24 #include "tensorflow/core/framework/numeric_op.h"
25 #include "tensorflow/core/framework/op_kernel.h"
26 #include "tensorflow/core/kernels/fractional_pool_common.h"
27 #include "tensorflow/core/lib/random/random.h"
28 #include "tensorflow/core/platform/logging.h"
29 #include "tensorflow/core/platform/mutex.h"
30 #include "tensorflow/core/util/guarded_philox_random.h"
31 #include "tensorflow/core/util/overflow.h"
32 
33 namespace tensorflow {
34 typedef Eigen::ThreadPoolDevice CPUDevice;
35 
36 template <typename T>
37 class FractionalAvgPoolOp : public OpKernel {
38  public:
FractionalAvgPoolOp(OpKernelConstruction * context)39   explicit FractionalAvgPoolOp(OpKernelConstruction* context)
40       : OpKernel(context) {
41     OP_REQUIRES_OK(context, context->GetAttr("pooling_ratio", &pooling_ratio_));
42     OP_REQUIRES_OK(context, context->GetAttr("pseudo_random", &pseudo_random_));
43     OP_REQUIRES_OK(context, context->GetAttr("overlapping", &overlapping_));
44     OP_REQUIRES(context, pooling_ratio_.size() == 4,
45                 errors::InvalidArgument(
46                     "pooling_ratio field must specify 4 dimensions"));
47     OP_REQUIRES(
48         context, pooling_ratio_[0] == 1 || pooling_ratio_[3] == 1,
49         errors::Unimplemented("Fractional average pooling is not yet "
50                               "supported on the batch nor channel dimension."));
51     OP_REQUIRES_OK(context, context->GetAttr("deterministic", &deterministic_));
52     OP_REQUIRES_OK(context, context->GetAttr("seed", &seed_));
53     OP_REQUIRES_OK(context, context->GetAttr("seed2", &seed2_));
54     if (deterministic_) {
55       // If both seeds are not set when deterministic_ is true, force set seeds.
56       if ((seed_ == 0) && (seed2_ == 0)) {
57         seed_ = random::New64();
58         seed2_ = random::New64();
59       }
60     } else {
61       OP_REQUIRES(
62           context, (seed_ == 0) && (seed2_ == 0),
63           errors::InvalidArgument(
64               "Both seed and seed2 should be 0 if deterministic is false."));
65     }
66   }
67 
Compute(OpKernelContext * context)68   void Compute(OpKernelContext* context) override {
69     typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
70         ConstEigenMatrixMap;
71     typedef Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
72         EigenMatrixMap;
73 
74     constexpr int tensor_in_and_out_dims = 4;
75 
76     const Tensor& tensor_in = context->input(0);
77     OP_REQUIRES(context, tensor_in.dims() == tensor_in_and_out_dims,
78                 errors::InvalidArgument("tensor_in must be 4-dimensional"));
79 
80     std::vector<int> input_size(tensor_in_and_out_dims);
81     std::vector<int> output_size(tensor_in_and_out_dims);
82     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
83       input_size[i] = tensor_in.dim_size(i);
84       OP_REQUIRES(
85           context, pooling_ratio_[i] <= input_size[i],
86           errors::InvalidArgument(
87               "Pooling ratio cannot be bigger than input tensor dim size."));
88     }
89     // Output size.
90     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
91       output_size[i] =
92           static_cast<int>(std::floor(input_size[i] / pooling_ratio_[i]));
93       DCHECK_GT(output_size[i], 0);
94     }
95 
96     // Generate pooling sequence.
97     std::vector<int64_t> row_cum_seq;
98     std::vector<int64_t> col_cum_seq;
99     GuardedPhiloxRandom generator;
100     generator.Init(seed_, seed2_);
101     row_cum_seq = GeneratePoolingSequence(input_size[1], output_size[1],
102                                           &generator, pseudo_random_);
103     col_cum_seq = GeneratePoolingSequence(input_size[2], output_size[2],
104                                           &generator, pseudo_random_);
105 
106     // Prepare output.
107     Tensor* output_tensor = nullptr;
108     OP_REQUIRES_OK(context, context->allocate_output(
109                                 0,
110                                 TensorShape({output_size[0], output_size[1],
111                                              output_size[2], output_size[3]}),
112                                 &output_tensor));
113     Tensor* output_row_seq_tensor = nullptr;
114     OP_REQUIRES_OK(
115         context, context->allocate_output(
116                      1, TensorShape({static_cast<int64_t>(row_cum_seq.size())}),
117                      &output_row_seq_tensor));
118     Tensor* output_col_seq_tensor = nullptr;
119     OP_REQUIRES_OK(
120         context, context->allocate_output(
121                      2, TensorShape({static_cast<int64_t>(col_cum_seq.size())}),
122                      &output_col_seq_tensor));
123 
124     ConstEigenMatrixMap in_mat(tensor_in.flat<T>().data(), input_size[3],
125                                input_size[2] * input_size[1] * input_size[0]);
126 
127     EigenMatrixMap out_mat(output_tensor->flat<T>().data(), output_size[3],
128                            output_size[2] * output_size[1] * output_size[0]);
129     // out_count corresponds to number of elements in each pooling cell.
130     Eigen::Matrix<T, Eigen::Dynamic, 1> out_count(out_mat.cols());
131 
132     // Initializes the output tensor and out_count with 0.
133     out_mat.setZero();
134     out_count.setZero();
135 
136     auto output_row_seq_flat = output_row_seq_tensor->flat<int64_t>();
137     auto output_col_seq_flat = output_col_seq_tensor->flat<int64_t>();
138 
139     // Set output tensors.
140     for (int i = 0; i < row_cum_seq.size(); ++i) {
141       output_row_seq_flat(i) = row_cum_seq[i];
142     }
143 
144     for (int i = 0; i < col_cum_seq.size(); ++i) {
145       output_col_seq_flat(i) = col_cum_seq[i];
146     }
147 
148     // For both input and output,
149     // 0: batch
150     // 1: row / row
151     // 2: col / col
152     // 3: depth / channel
153     const int64_t row_max = input_size[1] - 1;
154     const int64_t col_max = input_size[2] - 1;
155     for (int64_t b = 0; b < input_size[0]; ++b) {
156       // row sequence.
157       for (int64_t hs = 0; hs < row_cum_seq.size() - 1; ++hs) {
158         // row start and end.
159         const int64_t row_start = row_cum_seq[hs];
160         int64_t row_end =
161             overlapping_ ? row_cum_seq[hs + 1] : row_cum_seq[hs + 1] - 1;
162         row_end = std::min(row_end, row_max);
163 
164         // col sequence.
165         for (int64_t ws = 0; ws < col_cum_seq.size() - 1; ++ws) {
166           const int64_t out_offset =
167               (b * output_size[1] + hs) * output_size[2] + ws;
168           // col start and end.
169           const int64_t col_start = col_cum_seq[ws];
170           int64_t col_end =
171               overlapping_ ? col_cum_seq[ws + 1] : col_cum_seq[ws + 1] - 1;
172           col_end = std::min(col_end, col_max);
173           for (int64_t h = row_start; h <= row_end; ++h) {
174             for (int64_t w = col_start; w <= col_end; ++w) {
175               const int64_t in_offset =
176                   (b * input_size[1] + h) * input_size[2] + w;
177               out_mat.col(out_offset) += in_mat.col(in_offset);
178               out_count(out_offset)++;
179             }
180           }
181         }
182       }
183     }
184     DCHECK_GT(out_count.minCoeff(), 0);
185     out_mat.array().rowwise() /= out_count.transpose().array();
186   }
187 
188  private:
189   bool deterministic_;
190   int64_t seed_;
191   int64_t seed2_;
192   std::vector<float> pooling_ratio_;
193   bool pseudo_random_;
194   bool overlapping_;
195 };
196 
197 #define REGISTER_FRACTIONALAVGPOOL(type)                                      \
198   REGISTER_KERNEL_BUILDER(                                                    \
199       Name("FractionalAvgPool").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
200       FractionalAvgPoolOp<type>)
201 
202 REGISTER_FRACTIONALAVGPOOL(int32);
203 REGISTER_FRACTIONALAVGPOOL(int64_t);
204 REGISTER_FRACTIONALAVGPOOL(float);
205 REGISTER_FRACTIONALAVGPOOL(double);
206 
207 #undef REGISTER_FRACTIONALAVGPOOL
208 
209 template <class T>
210 class FractionalAvgPoolGradOp : public OpKernel {
211  public:
FractionalAvgPoolGradOp(OpKernelConstruction * context)212   explicit FractionalAvgPoolGradOp(OpKernelConstruction* context)
213       : OpKernel(context) {
214     OP_REQUIRES_OK(context, context->GetAttr("overlapping", &overlapping_));
215   }
216 
Compute(OpKernelContext * context)217   void Compute(OpKernelContext* context) override {
218     // Here's the basic idea:
219     // Batch and depth dimension are independent from row and col dimension. And
220     // because FractionalAvgPool currently only support pooling along row and
221     // col, we can basically think of this 4D tensor backpropagation as
222     // operation of a series of 2D planes.
223     //
224     // For each element of a 'slice' (2D plane) of output_backprop, we need to
225     // figure out its contributors when doing FractionalAvgPool operation. This
226     // can be done based on row_pooling_sequence, col_pooling_seq and
227     // overlapping.
228     // Once we figure out the original contributors, we just need to evenly
229     // divide the value of this element among these contributors.
230     //
231     // Internally, we divide the out_backprop tensor and store it in a temporary
232     // tensor of double type. And cast it to the corresponding type.
233     typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
234         ConstEigenMatrixMap;
235     typedef Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>>
236         EigenDoubleMatrixMap;
237 
238     // Grab the inputs.
239     const Tensor& orig_input_tensor_shape = context->input(0);
240     OP_REQUIRES(context,
241                 orig_input_tensor_shape.dims() == 1 &&
242                     orig_input_tensor_shape.NumElements() == 4,
243                 errors::InvalidArgument("original input tensor shape must be"
244                                         "1-dimensional and 4 elements"));
245     int64_t num_elements = 1;
246     for (int i = 0; i < orig_input_tensor_shape.dims(); i++) {
247       OP_REQUIRES(context, orig_input_tensor_shape.dim_size(i) > 0,
248                   errors::InvalidArgument(
249                       "orig_input_tensor_shape must be positive, got: ",
250                       orig_input_tensor_shape.dim_size(i)));
251       num_elements = MultiplyWithoutOverflow(
252           num_elements, orig_input_tensor_shape.dim_size(i));
253       OP_REQUIRES(
254           context, num_elements > 0,
255           errors::InvalidArgument(
256               "The total elements specified by orig_input_tensor_shape",
257               " is too large. Encountered overflow after multiplying ",
258               orig_input_tensor_shape.dim_size(i), ", result: ", num_elements));
259     }
260 
261     const Tensor& out_backprop = context->input(1);
262     OP_REQUIRES(context, out_backprop.dims() == 4,
263                 errors::InvalidArgument("out_backprop must be 4-dimensional"));
264     for (int i = 0; i < out_backprop.dims(); i++) {
265       OP_REQUIRES(context, out_backprop.dim_size(i) > 0,
266                   errors::InvalidArgument(
267                       "out_backprop must be positive for all dimension, got:",
268                       out_backprop.dim_size(i)));
269     }
270 
271     const Tensor& row_seq_tensor = context->input(2);
272     const Tensor& col_seq_tensor = context->input(3);
273 
274     const int64_t out_batch = out_backprop.dim_size(0);
275     const int64_t out_rows = out_backprop.dim_size(1);
276     const int64_t out_cols = out_backprop.dim_size(2);
277     const int64_t out_depth = out_backprop.dim_size(3);
278 
279     OP_REQUIRES(context, row_seq_tensor.NumElements() > out_rows,
280                 errors::InvalidArgument("Given out_backprop shape ",
281                                         out_backprop.shape().DebugString(),
282                                         ", row_seq_tensor must have at least ",
283                                         out_rows + 1, " elements, but got ",
284                                         row_seq_tensor.NumElements()));
285     OP_REQUIRES(context, col_seq_tensor.NumElements() > out_cols,
286                 errors::InvalidArgument("Given out_backprop shape ",
287                                         out_backprop.shape().DebugString(),
288                                         ", col_seq_tensor must have at least ",
289                                         out_cols + 1, " elements, but got ",
290                                         col_seq_tensor.NumElements()));
291 
292     auto row_seq_tensor_flat = row_seq_tensor.flat<int64_t>();
293     auto col_seq_tensor_flat = col_seq_tensor.flat<int64_t>();
294     auto orig_input_tensor_shape_flat = orig_input_tensor_shape.flat<int64_t>();
295 
296     const int64_t in_batch = orig_input_tensor_shape_flat(0);
297     const int64_t in_rows = orig_input_tensor_shape_flat(1);
298     const int64_t in_cols = orig_input_tensor_shape_flat(2);
299     const int64_t in_depth = orig_input_tensor_shape_flat(3);
300     OP_REQUIRES(
301         context, in_batch != 0,
302         errors::InvalidArgument("Batch dimension of input must not be 0"));
303     OP_REQUIRES(
304         context, in_rows != 0,
305         errors::InvalidArgument("Rows dimension of input must not be 0"));
306     OP_REQUIRES(
307         context, in_cols != 0,
308         errors::InvalidArgument("Columns dimension of input must not be 0"));
309     OP_REQUIRES(
310         context, in_depth != 0,
311         errors::InvalidArgument("Depth dimension of input must not be 0"));
312 
313     constexpr int tensor_in_and_out_dims = 4;
314     // Transform orig_input_tensor_shape into TensorShape
315     TensorShape in_shape;
316     for (auto i = 0; i < tensor_in_and_out_dims; ++i) {
317       in_shape.AddDim(orig_input_tensor_shape_flat(i));
318     }
319 
320     // Create intermediate in_backprop.
321     Tensor in_backprop_tensor_temp;
322     OP_REQUIRES_OK(context, context->forward_input_or_allocate_temp(
323                                 {0}, DataTypeToEnum<double>::v(), in_shape,
324                                 &in_backprop_tensor_temp));
325     in_backprop_tensor_temp.flat<double>().setZero();
326     // Transform 4D tensor to 2D matrix.
327     EigenDoubleMatrixMap in_backprop_tensor_temp_mat(
328         in_backprop_tensor_temp.flat<double>().data(), in_depth,
329         in_cols * in_rows * in_batch);
330     ConstEigenMatrixMap out_backprop_mat(out_backprop.flat<T>().data(),
331                                          out_depth,
332                                          out_cols * out_rows * out_batch);
333     // Loop through each element of out_backprop and evenly distribute the
334     // element to the corresponding pooling cell.
335     const int64_t in_max_row_index = in_rows - 1;
336     const int64_t in_max_col_index = in_cols - 1;
337     for (int64_t b = 0; b < out_batch; ++b) {
338       for (int64_t r = 0; r < out_rows; ++r) {
339         const int64_t in_row_start = row_seq_tensor_flat(r);
340 
341         int64_t in_row_end = overlapping_ ? row_seq_tensor_flat(r + 1)
342                                           : row_seq_tensor_flat(r + 1) - 1;
343         in_row_end = std::min(in_row_end, in_max_row_index);
344         OP_REQUIRES(context, in_row_start >= 0 && in_row_end >= 0,
345                     errors::InvalidArgument(
346                         "Row sequence tensor values must not be negative, got ",
347                         row_seq_tensor_flat));
348 
349         for (int64_t c = 0; c < out_cols; ++c) {
350           const int64_t in_col_start = col_seq_tensor_flat(c);
351           int64_t in_col_end = overlapping_ ? col_seq_tensor_flat(c + 1)
352                                             : col_seq_tensor_flat(c + 1) - 1;
353           in_col_end = std::min(in_col_end, in_max_col_index);
354 
355           OP_REQUIRES(
356               context, in_col_start >= 0 && in_col_end >= 0,
357               errors::InvalidArgument(
358                   "Column sequence tensor values must not be negative, got ",
359                   col_seq_tensor_flat));
360           const int64_t num_elements_in_pooling_cell =
361               (in_row_end - in_row_start + 1) * (in_col_end - in_col_start + 1);
362           const int64_t out_index = (b * out_rows + r) * out_cols + c;
363           // Now we can evenly distribute out_backprop(b, h, w, *) to
364           // in_backprop(b, hs:he, ws:we, *).
365           for (int64_t in_r = in_row_start; in_r <= in_row_end; ++in_r) {
366             for (int64_t in_c = in_col_start; in_c <= in_col_end; ++in_c) {
367               const int64_t in_index = (b * in_rows + in_r) * in_cols + in_c;
368               // Walk through each channel (depth).
369               for (int64_t d = 0; d < out_depth; ++d) {
370                 const double out_backprop_element = static_cast<double>(
371                     out_backprop_mat.coeffRef(d, out_index));
372                 double& in_backprop_ref =
373                     in_backprop_tensor_temp_mat.coeffRef(d, in_index);
374                 in_backprop_ref +=
375                     out_backprop_element / num_elements_in_pooling_cell;
376               }
377             }
378           }
379         }
380       }
381     }
382 
383     // Depending on the type, cast double to type T.
384     Tensor* in_backprop_tensor = nullptr;
385     OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
386                                 {0}, 0, in_shape, &in_backprop_tensor));
387     auto in_backprop_tensor_flat = in_backprop_tensor->flat<T>();
388     auto in_backprop_tensor_temp_flat = in_backprop_tensor_temp.flat<double>();
389     for (int64_t i = 0; i < in_backprop_tensor_flat.size(); ++i) {
390       in_backprop_tensor_flat(i) =
391           static_cast<T>(in_backprop_tensor_temp_flat(i));
392     }
393   }
394 
395  private:
396   bool overlapping_;
397 };
398 
399 #define REGISTER_FRACTIONALAVGPOOLGRAD(type)              \
400   REGISTER_KERNEL_BUILDER(Name("FractionalAvgPoolGrad")   \
401                               .Device(DEVICE_CPU)         \
402                               .TypeConstraint<type>("T"), \
403                           FractionalAvgPoolGradOp<type>)
404 
405 REGISTER_FRACTIONALAVGPOOLGRAD(int32);
406 REGISTER_FRACTIONALAVGPOOLGRAD(int64_t);
407 REGISTER_FRACTIONALAVGPOOLGRAD(float);
408 REGISTER_FRACTIONALAVGPOOLGRAD(double);
409 
410 #undef REGISTER_FRACTIONALAVGPOOLGRAD
411 }  // namespace tensorflow
412