1 #ifdef USE_XNNPACK
2
3 #include <ATen/native/Pool.h>
4 #include <ATen/native/utils/Factory.h>
5 #include <ATen/native/xnnpack/Common.h>
6 #include <ATen/native/xnnpack/Engine.h>
7 #include <ATen/native/xnnpack/Pooling.h>
8
9 namespace at::native::xnnpack {
10
11 // Supports NHWC and NCHW FP32 max pooling with any
12 // - kernel size
13 // - padding
14 // - stride
15 // - dilation
16
use_max_pool2d(const Tensor & input,const IntArrayRef kernel_,const IntArrayRef padding_,IntArrayRef stride_,const IntArrayRef dilation_,const bool ceil_mode,const float output_min,const float output_max)17 bool use_max_pool2d(
18 const Tensor& input,
19 const IntArrayRef kernel_,
20 const IntArrayRef padding_,
21 IntArrayRef stride_,
22 const IntArrayRef dilation_,
23 const bool ceil_mode,
24 const float output_min,
25 const float output_max) {
26 using namespace internal;
27
28 // Make sure we are not dealing with an unorthodox configuration.
29 if (kernel_.empty() || padding_.empty() || dilation_.empty()) {
30 return false;
31 }
32
33 // Stride can be legitimately empty, in which case it is to be defaulted to kernel size.
34 if (stride_.empty()) {
35 stride_ = kernel_;
36 }
37
38 // Normalize the parameters.
39 const internal::pooling::Parameters parameters{
40 kernel_,
41 padding_,
42 stride_,
43 dilation_,
44 };
45
46 // Here are the list of conditions required for this code path to be taken:
47 // * Input must be 4D CPU float tensor with no gradients.
48 // * Kernel must be a 2D IntArrayRef containing two positive numbers.
49 // Furthermore, 1x1 kernels are not valid as XNNPACK prohibits their use.
50 // * Padding must be a 2D IntArrayRef containing two non-negative numbers.
51 // * Stride must be a 2D IntArrayRef containing two positive numbers.
52 // * Dilation must be a 2D IntArrayRef containing two positive numbers.
53 // * Ceil mode is not supported and must be disabled.
54 // * output_max must be greater than output_min.
55 // Namely, setting both output_min and output_max to 0 is not valid usage.
56 // * Finally, application of this operator to the input tensor with the given
57 // max pool 2d parameters must result in an output tensor with a valid shape.
58 const int64_t pt_outputHeight = pooling_output_shape(
59 input.size(Layout::Activation4D::height),
60 parameters.kernel[Layout::Parameter::height],
61 parameters.padding[Layout::Parameter::height],
62 parameters.stride[Layout::Parameter::height],
63 parameters.dilation[Layout::Parameter::height],
64 ceil_mode);
65 const int64_t pt_outputWidth = pooling_output_shape(
66 input.size(Layout::Activation4D::width),
67 parameters.kernel[Layout::Parameter::width],
68 parameters.padding[Layout::Parameter::width],
69 parameters.stride[Layout::Parameter::width],
70 parameters.dilation[Layout::Parameter::width],
71 ceil_mode);
72 const int64_t xnnpack_outputHeight = pooling_output_shape(
73 input.size(Layout::Activation4D::height),
74 parameters.kernel[Layout::Parameter::height],
75 parameters.padding[Layout::Parameter::height],
76 parameters.stride[Layout::Parameter::height],
77 parameters.dilation[Layout::Parameter::height],
78 false);
79 const int64_t xnnpack_outputWidth = pooling_output_shape(
80 input.size(Layout::Activation4D::width),
81 parameters.kernel[Layout::Parameter::width],
82 parameters.padding[Layout::Parameter::width],
83 parameters.stride[Layout::Parameter::width],
84 parameters.dilation[Layout::Parameter::width],
85 false);
86
87 const bool output_size_eq = (pt_outputHeight == xnnpack_outputHeight) &&
88 (pt_outputWidth == xnnpack_outputWidth);
89
90 return xnnpack::available() &&
91 // Input
92 (4 == input.dim()) &&
93 (input.device().is_cpu()) &&
94 (kFloat == input.scalar_type()) &&
95 !input.requires_grad() &&
96 // Kernel
97 (2 == parameters.kernel.size()) &&
98 (parameters.kernel[Layout::Parameter::height] > 0) &&
99 (parameters.kernel[Layout::Parameter::width] > 0) &&
100 ((parameters.kernel[Layout::Parameter::height] *
101 parameters.kernel[Layout::Parameter::width]) > 1) &&
102 // Padding
103 (2 == parameters.padding.size()) &&
104 (parameters.padding[Layout::Parameter::height] >= 0) &&
105 (parameters.padding[Layout::Parameter::width] >= 0) &&
106 // Stride
107 (2 == parameters.stride.size()) &&
108 (parameters.stride[Layout::Parameter::height] > 0) &&
109 (parameters.stride[Layout::Parameter::width] > 0) &&
110 // Dilation
111 (2 == parameters.dilation.size()) &&
112 (parameters.dilation[Layout::Parameter::height] > 0) &&
113 (parameters.dilation[Layout::Parameter::width] > 0) &&
114 // Ceil Mode
115 (!ceil_mode || output_size_eq) &&
116 // Output Min / Max
117 (output_max > output_min) &&
118 // Output
119 (pooling_output_shape(
120 input.size(Layout::Activation4D::height),
121 parameters.kernel[Layout::Parameter::height],
122 parameters.padding[Layout::Parameter::height],
123 parameters.stride[Layout::Parameter::height],
124 parameters.dilation[Layout::Parameter::height],
125 ceil_mode) > 0) &&
126 (pooling_output_shape(
127 input.size(Layout::Activation4D::width),
128 parameters.kernel[Layout::Parameter::width],
129 parameters.padding[Layout::Parameter::width],
130 parameters.stride[Layout::Parameter::width],
131 parameters.dilation[Layout::Parameter::width],
132 ceil_mode) > 0) &&
133 true;
134 }
135
max_pool2d(const Tensor & input,const IntArrayRef kernel_,const IntArrayRef padding_,IntArrayRef stride_,const IntArrayRef dilation_,const bool ceil_mode,const float output_min,const float output_max)136 Tensor max_pool2d(
137 const Tensor& input,
138 const IntArrayRef kernel_,
139 const IntArrayRef padding_,
140 IntArrayRef stride_,
141 const IntArrayRef dilation_,
142 const bool ceil_mode,
143 const float output_min,
144 const float output_max) {
145 using namespace internal;
146
147 // A call to max_pool2d must have been gated by a call to use_maxpool2d, so
148 // the parameters are guaranteed to be valid at this point. Still, stride can
149 // be empty, and the parameters not normalized.
150
151 if (stride_.empty()) {
152 stride_ = kernel_;
153 }
154
155 const internal::pooling::Parameters parameters{
156 kernel_,
157 padding_,
158 stride_,
159 dilation_,
160 };
161
162 const Tensor input_padded_contig_nhwc =
163 mobile::allocate_padded_contiguous_if_needed(
164 input,
165 MemoryFormat::ChannelsLast);
166
167 Tensor output_padded_contig_nhwc = mobile::empty_with_tail_padding(
168 {
169 input_padded_contig_nhwc.size(Layout::Activation4D::batch),
170 input_padded_contig_nhwc.size(Layout::Activation4D::channels),
171 pooling_output_shape(
172 input_padded_contig_nhwc.size(Layout::Activation4D::height),
173 parameters.kernel[Layout::Parameter::height],
174 parameters.padding[Layout::Parameter::height],
175 parameters.stride[Layout::Parameter::height],
176 parameters.dilation[Layout::Parameter::height],
177 ceil_mode),
178 pooling_output_shape(
179 input_padded_contig_nhwc.size(Layout::Activation4D::width),
180 parameters.kernel[Layout::Parameter::width],
181 parameters.padding[Layout::Parameter::width],
182 parameters.stride[Layout::Parameter::width],
183 parameters.dilation[Layout::Parameter::width],
184 ceil_mode),
185 },
186 input_padded_contig_nhwc.options().dtype(),
187 MemoryFormat::ChannelsLast,
188 input_padded_contig_nhwc.opt_names());
189
190 xnn_operator_t max_pool_op{};
191
192 const xnn_status create_status = xnn_create_max_pooling2d_nhwc_f32(
193 parameters.padding[Layout::Parameter::height], // input_padding_top
194 parameters.padding[Layout::Parameter::width], // input_padding_right
195 parameters.padding[Layout::Parameter::height], // input_padding_bottom
196 parameters.padding[Layout::Parameter::width], // input_padding_left
197 parameters.kernel[Layout::Parameter::height], // kernel_height
198 parameters.kernel[Layout::Parameter::width], // kernel_width
199 parameters.stride[Layout::Parameter::height], // subsampling_height
200 parameters.stride[Layout::Parameter::width], // subsampling_width
201 parameters.dilation[Layout::Parameter::height], // dilation_height
202 parameters.dilation[Layout::Parameter::width], // dilation_width
203 output_min, // output_min
204 output_max, // output_max
205 0u, // flags
206 &max_pool_op); // operator
207
208 Operator max_pool_scoped_op(max_pool_op);
209
210 TORCH_CHECK(
211 xnn_status_success == create_status,
212 "xnn_create_max_pooling2d_nhwc_f32 failed!");
213
214 const xnn_status reshape_status = xnn_reshape_max_pooling2d_nhwc_f32(
215 max_pool_op, // operator
216 input_padded_contig_nhwc.size(Layout::Activation4D::batch), // batch_size
217 input_padded_contig_nhwc.size(Layout::Activation4D::height), // input_height
218 input_padded_contig_nhwc.size(Layout::Activation4D::width), // input_width
219 input_padded_contig_nhwc.size(Layout::Activation4D::channels), // channels
220 input_padded_contig_nhwc.size(Layout::Activation4D::channels), // input_pixel_stride - NHWC Contiguous
221 output_padded_contig_nhwc.size(Layout::Activation4D::channels), // output_pixel_stride - NHWC Contiguous
222 nullptr, // output_height_out
223 nullptr, // output_width_out
224 caffe2::pthreadpool_()); // threadpool
225
226 TORCH_CHECK(
227 xnn_status_success == reshape_status,
228 "xnn_reshape_max_pooling2d_nhwc_f32 failed!");
229
230 const xnn_status setup_status = xnn_setup_max_pooling2d_nhwc_f32(
231 max_pool_op, // operator
232 input_padded_contig_nhwc.data_ptr<float>(), // input
233 output_padded_contig_nhwc.data_ptr<float>()); // output
234
235 TORCH_CHECK(
236 xnn_status_success == setup_status,
237 "xnn_setup_max_pooling2d_nhwc_f32 failed!");
238
239 const xnn_status run_status = xnn_run_operator(
240 max_pool_op, // operator
241 caffe2::pthreadpool_()); // threadpool
242
243 TORCH_INTERNAL_ASSERT(
244 xnn_status_success == run_status,
245 "xnn_run_operator failed!");
246
247 return output_padded_contig_nhwc.contiguous(input.suggest_memory_format());
248 }
249
250 } // namespace at::native::xnnpack
251
252 #endif /* USE_XNNPACK */
253