xref: /aosp_15_r20/external/pytorch/aten/src/ATen/native/xnnpack/MaxPooling.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #ifdef USE_XNNPACK
2 
3 #include <ATen/native/Pool.h>
4 #include <ATen/native/utils/Factory.h>
5 #include <ATen/native/xnnpack/Common.h>
6 #include <ATen/native/xnnpack/Engine.h>
7 #include <ATen/native/xnnpack/Pooling.h>
8 
9 namespace at::native::xnnpack {
10 
11 // Supports NHWC and NCHW FP32 max pooling with any
12 //  - kernel size
13 //  - padding
14 //  - stride
15 //  - dilation
16 
use_max_pool2d(const Tensor & input,const IntArrayRef kernel_,const IntArrayRef padding_,IntArrayRef stride_,const IntArrayRef dilation_,const bool ceil_mode,const float output_min,const float output_max)17 bool use_max_pool2d(
18     const Tensor& input,
19     const IntArrayRef kernel_,
20     const IntArrayRef padding_,
21     IntArrayRef stride_,
22     const IntArrayRef dilation_,
23     const bool ceil_mode,
24     const float output_min,
25     const float output_max) {
26   using namespace internal;
27 
28   // Make sure we are not dealing with an unorthodox configuration.
29   if (kernel_.empty() || padding_.empty() || dilation_.empty()) {
30     return false;
31   }
32 
33   // Stride can be legitimately empty, in which case it is to be defaulted to kernel size.
34   if (stride_.empty()) {
35     stride_ = kernel_;
36   }
37 
38   // Normalize the parameters.
39   const internal::pooling::Parameters parameters{
40     kernel_,
41     padding_,
42     stride_,
43     dilation_,
44   };
45 
46   // Here are the list of conditions required for this code path to be taken:
47   // * Input must be 4D CPU float tensor with no gradients.
48   // * Kernel must be a 2D IntArrayRef containing two positive numbers.
49   //   Furthermore, 1x1 kernels are not valid as XNNPACK prohibits their use.
50   // * Padding must be a 2D IntArrayRef containing two non-negative numbers.
51   // * Stride must be a 2D IntArrayRef containing two positive numbers.
52   // * Dilation must be a 2D IntArrayRef containing two positive numbers.
53   // * Ceil mode is not supported and must be disabled.
54   // * output_max must be greater than output_min.
55   //   Namely, setting both output_min and output_max to 0 is not valid usage.
56   // * Finally, application of this operator to the input tensor with the given
57   //   max pool 2d parameters must result in an output tensor with a valid shape.
58   const int64_t pt_outputHeight = pooling_output_shape(
59       input.size(Layout::Activation4D::height),
60       parameters.kernel[Layout::Parameter::height],
61       parameters.padding[Layout::Parameter::height],
62       parameters.stride[Layout::Parameter::height],
63       parameters.dilation[Layout::Parameter::height],
64       ceil_mode);
65   const int64_t pt_outputWidth = pooling_output_shape(
66       input.size(Layout::Activation4D::width),
67       parameters.kernel[Layout::Parameter::width],
68       parameters.padding[Layout::Parameter::width],
69       parameters.stride[Layout::Parameter::width],
70       parameters.dilation[Layout::Parameter::width],
71       ceil_mode);
72   const int64_t xnnpack_outputHeight = pooling_output_shape(
73       input.size(Layout::Activation4D::height),
74       parameters.kernel[Layout::Parameter::height],
75       parameters.padding[Layout::Parameter::height],
76       parameters.stride[Layout::Parameter::height],
77       parameters.dilation[Layout::Parameter::height],
78       false);
79   const int64_t xnnpack_outputWidth = pooling_output_shape(
80       input.size(Layout::Activation4D::width),
81       parameters.kernel[Layout::Parameter::width],
82       parameters.padding[Layout::Parameter::width],
83       parameters.stride[Layout::Parameter::width],
84       parameters.dilation[Layout::Parameter::width],
85       false);
86 
87   const bool output_size_eq = (pt_outputHeight == xnnpack_outputHeight) &&
88     (pt_outputWidth == xnnpack_outputWidth);
89 
90   return xnnpack::available() &&
91       // Input
92       (4 == input.dim()) &&
93       (input.device().is_cpu()) &&
94       (kFloat == input.scalar_type()) &&
95       !input.requires_grad() &&
96       // Kernel
97       (2 == parameters.kernel.size()) &&
98       (parameters.kernel[Layout::Parameter::height] > 0) &&
99       (parameters.kernel[Layout::Parameter::width] > 0) &&
100       ((parameters.kernel[Layout::Parameter::height] *
101         parameters.kernel[Layout::Parameter::width]) > 1) &&
102       // Padding
103       (2 == parameters.padding.size()) &&
104       (parameters.padding[Layout::Parameter::height] >= 0) &&
105       (parameters.padding[Layout::Parameter::width] >= 0) &&
106       // Stride
107       (2 == parameters.stride.size()) &&
108       (parameters.stride[Layout::Parameter::height] > 0) &&
109       (parameters.stride[Layout::Parameter::width] > 0) &&
110       // Dilation
111       (2 == parameters.dilation.size()) &&
112       (parameters.dilation[Layout::Parameter::height] > 0) &&
113       (parameters.dilation[Layout::Parameter::width] > 0) &&
114       // Ceil Mode
115       (!ceil_mode || output_size_eq) &&
116       // Output Min / Max
117       (output_max > output_min) &&
118       // Output
119       (pooling_output_shape(
120         input.size(Layout::Activation4D::height),
121         parameters.kernel[Layout::Parameter::height],
122         parameters.padding[Layout::Parameter::height],
123         parameters.stride[Layout::Parameter::height],
124         parameters.dilation[Layout::Parameter::height],
125         ceil_mode) > 0) &&
126       (pooling_output_shape(
127         input.size(Layout::Activation4D::width),
128         parameters.kernel[Layout::Parameter::width],
129         parameters.padding[Layout::Parameter::width],
130         parameters.stride[Layout::Parameter::width],
131         parameters.dilation[Layout::Parameter::width],
132         ceil_mode) > 0) &&
133       true;
134 }
135 
max_pool2d(const Tensor & input,const IntArrayRef kernel_,const IntArrayRef padding_,IntArrayRef stride_,const IntArrayRef dilation_,const bool ceil_mode,const float output_min,const float output_max)136 Tensor max_pool2d(
137     const Tensor& input,
138     const IntArrayRef kernel_,
139     const IntArrayRef padding_,
140     IntArrayRef stride_,
141     const IntArrayRef dilation_,
142     const bool ceil_mode,
143     const float output_min,
144     const float output_max) {
145   using namespace internal;
146 
147   // A call to max_pool2d must have been gated by a call to use_maxpool2d, so
148   // the parameters are guaranteed to be valid at this point.  Still, stride can
149   // be empty, and the parameters not normalized.
150 
151   if (stride_.empty()) {
152     stride_ = kernel_;
153   }
154 
155   const internal::pooling::Parameters parameters{
156     kernel_,
157     padding_,
158     stride_,
159     dilation_,
160   };
161 
162   const Tensor input_padded_contig_nhwc =
163       mobile::allocate_padded_contiguous_if_needed(
164           input,
165           MemoryFormat::ChannelsLast);
166 
167   Tensor output_padded_contig_nhwc = mobile::empty_with_tail_padding(
168       {
169         input_padded_contig_nhwc.size(Layout::Activation4D::batch),
170         input_padded_contig_nhwc.size(Layout::Activation4D::channels),
171         pooling_output_shape(
172             input_padded_contig_nhwc.size(Layout::Activation4D::height),
173             parameters.kernel[Layout::Parameter::height],
174             parameters.padding[Layout::Parameter::height],
175             parameters.stride[Layout::Parameter::height],
176             parameters.dilation[Layout::Parameter::height],
177             ceil_mode),
178         pooling_output_shape(
179             input_padded_contig_nhwc.size(Layout::Activation4D::width),
180             parameters.kernel[Layout::Parameter::width],
181             parameters.padding[Layout::Parameter::width],
182             parameters.stride[Layout::Parameter::width],
183             parameters.dilation[Layout::Parameter::width],
184             ceil_mode),
185       },
186       input_padded_contig_nhwc.options().dtype(),
187       MemoryFormat::ChannelsLast,
188       input_padded_contig_nhwc.opt_names());
189 
190   xnn_operator_t max_pool_op{};
191 
192   const xnn_status create_status = xnn_create_max_pooling2d_nhwc_f32(
193       parameters.padding[Layout::Parameter::height],                  // input_padding_top
194       parameters.padding[Layout::Parameter::width],                   // input_padding_right
195       parameters.padding[Layout::Parameter::height],                  // input_padding_bottom
196       parameters.padding[Layout::Parameter::width],                   // input_padding_left
197       parameters.kernel[Layout::Parameter::height],                   // kernel_height
198       parameters.kernel[Layout::Parameter::width],                    // kernel_width
199       parameters.stride[Layout::Parameter::height],                   // subsampling_height
200       parameters.stride[Layout::Parameter::width],                    // subsampling_width
201       parameters.dilation[Layout::Parameter::height],                 // dilation_height
202       parameters.dilation[Layout::Parameter::width],                  // dilation_width
203       output_min,                                                     // output_min
204       output_max,                                                     // output_max
205       0u,                                                             // flags
206       &max_pool_op);                                                  // operator
207 
208   Operator max_pool_scoped_op(max_pool_op);
209 
210   TORCH_CHECK(
211       xnn_status_success == create_status,
212       "xnn_create_max_pooling2d_nhwc_f32 failed!");
213 
214   const xnn_status reshape_status = xnn_reshape_max_pooling2d_nhwc_f32(
215       max_pool_op,                                                    // operator
216       input_padded_contig_nhwc.size(Layout::Activation4D::batch),     // batch_size
217       input_padded_contig_nhwc.size(Layout::Activation4D::height),    // input_height
218       input_padded_contig_nhwc.size(Layout::Activation4D::width),     // input_width
219       input_padded_contig_nhwc.size(Layout::Activation4D::channels),  // channels
220       input_padded_contig_nhwc.size(Layout::Activation4D::channels),  // input_pixel_stride - NHWC Contiguous
221       output_padded_contig_nhwc.size(Layout::Activation4D::channels), // output_pixel_stride - NHWC Contiguous
222       nullptr,                                                        // output_height_out
223       nullptr,                                                        // output_width_out
224       caffe2::pthreadpool_());                                        // threadpool
225 
226   TORCH_CHECK(
227     xnn_status_success == reshape_status,
228     "xnn_reshape_max_pooling2d_nhwc_f32 failed!");
229 
230   const xnn_status setup_status = xnn_setup_max_pooling2d_nhwc_f32(
231       max_pool_op,                                                  // operator
232       input_padded_contig_nhwc.data_ptr<float>(),                   // input
233       output_padded_contig_nhwc.data_ptr<float>());                 // output
234 
235   TORCH_CHECK(
236       xnn_status_success == setup_status,
237       "xnn_setup_max_pooling2d_nhwc_f32 failed!");
238 
239   const xnn_status run_status = xnn_run_operator(
240       max_pool_op,              // operator
241       caffe2::pthreadpool_());  // threadpool
242 
243   TORCH_INTERNAL_ASSERT(
244       xnn_status_success == run_status,
245       "xnn_run_operator failed!");
246 
247   return output_padded_contig_nhwc.contiguous(input.suggest_memory_format());
248 }
249 
250 } // namespace at::native::xnnpack
251 
252 #endif /* USE_XNNPACK */
253