xref: /aosp_15_r20/external/pytorch/aten/src/ATen/native/mkldnn/Pooling.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #define TORCH_ASSERT_ONLY_METHOD_OPERATORS
2 #include <ATen/core/Tensor.h>
3 #include <ATen/Config.h>
4 #include <ATen/core/grad_mode.h>
5 #include <ATen/native/Resize.h>
6 #include <ATen/native/utils/ParamUtils.h>
7 #include <c10/util/irange.h>
8 #include <tuple>
9 
10 #ifndef AT_PER_OPERATOR_HEADERS
11 #include <ATen/NativeFunctions.h>
12 #else
13 #include <ATen/ops/adaptive_avg_pool2d_native.h>
14 #include <ATen/ops/avg_pool2d_backward_native.h>
15 #include <ATen/ops/avg_pool2d_native.h>
16 #include <ATen/ops/avg_pool3d_backward_native.h>
17 #include <ATen/ops/avg_pool3d_native.h>
18 #include <ATen/ops/mkldnn_adaptive_avg_pool2d_backward_native.h>
19 #include <ATen/ops/mkldnn_adaptive_avg_pool2d_native.h>
20 #include <ATen/ops/mkldnn_max_pool2d_backward_native.h>
21 #include <ATen/ops/mkldnn_max_pool2d_native.h>
22 #include <ATen/ops/mkldnn_max_pool3d_backward_native.h>
23 #include <ATen/ops/mkldnn_max_pool3d_native.h>
24 #endif
25 
26 
27 #if !AT_MKLDNN_ENABLED()
28 
29 namespace at {
30 namespace native {
31 
mkldnn_max_pool2d(const Tensor & self,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,IntArrayRef dilation,bool ceil_mode)32 Tensor mkldnn_max_pool2d(
33     const Tensor& self,
34     IntArrayRef kernel_size,
35     IntArrayRef stride,
36     IntArrayRef padding,
37     IntArrayRef dilation,
38     bool ceil_mode) {
39   TORCH_CHECK(false, "mkldnn_max_pool2d: ATen not compiled with MKLDNN support");
40 }
41 
mkldnn_max_pool3d(const Tensor & self,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,IntArrayRef dilation,bool ceil_mode)42 Tensor mkldnn_max_pool3d(
43     const Tensor& self,
44     IntArrayRef kernel_size,
45     IntArrayRef stride,
46     IntArrayRef padding,
47     IntArrayRef dilation,
48     bool ceil_mode) {
49   TORCH_CHECK(false, "mkldnn_max_pool3d: ATen not compiled with MKLDNN support");
50 }
51 
mkldnn_avg_pool2d(const Tensor & self,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,bool ceil_mode,bool count_include_pad,std::optional<int64_t> divisor_override)52 Tensor mkldnn_avg_pool2d(
53     const Tensor& self,
54     IntArrayRef kernel_size,
55     IntArrayRef stride,
56     IntArrayRef padding,
57     bool ceil_mode,
58     bool count_include_pad,
59     std::optional<int64_t> divisor_override) {
60   TORCH_CHECK(false, "mkldnn_avg_pool2d: ATen not compiled with MKLDNN support");
61 }
62 
mkldnn_avg_pool2d_out(const Tensor & self,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,bool ceil_mode,bool count_include_pad,std::optional<int64_t> divisor_override,Tensor & output)63 Tensor& mkldnn_avg_pool2d_out(const Tensor& self,
64     IntArrayRef kernel_size,
65     IntArrayRef stride,
66     IntArrayRef padding,
67     bool ceil_mode,
68     bool count_include_pad,
69     std::optional<int64_t> divisor_override,
70     Tensor& output) {
71   TORCH_CHECK(false, "mkldnn_avg_pool2d_out: ATen not compiled with MKLDNN support");
72 }
73 
mkldnn_avg_pool3d(const Tensor & self,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,bool ceil_mode,bool count_include_pad,std::optional<int64_t> divisor_override)74 Tensor mkldnn_avg_pool3d(
75     const Tensor& self,
76     IntArrayRef kernel_size,
77     IntArrayRef stride,
78     IntArrayRef padding,
79     bool ceil_mode,
80     bool count_include_pad,
81     std::optional<int64_t> divisor_override) {
82   TORCH_CHECK(false, "mkldnn_avg_pool3d: ATen not compiled with MKLDNN support");
83 }
84 
mkldnn_avg_pool3d_out(const Tensor & self,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,bool ceil_mode,bool count_include_pad,std::optional<int64_t> divisor_override,Tensor & output)85 Tensor& mkldnn_avg_pool3d_out(const Tensor& self,
86     IntArrayRef kernel_size,
87     IntArrayRef stride,
88     IntArrayRef padding,
89     bool ceil_mode,
90     bool count_include_pad,
91     std::optional<int64_t> divisor_override,
92     Tensor& output) {
93   TORCH_CHECK(false, "mkldnn_avg_pool3d_out: ATen not compiled with MKLDNN support");
94 }
95 
mkldnn_adaptive_avg_pool2d(Tensor const & input,IntArrayRef output_size)96 Tensor mkldnn_adaptive_avg_pool2d(Tensor const& input, IntArrayRef output_size) {
97   TORCH_CHECK(false, "mkldnn_adaptive_avg_pool2d: ATen not compiled with MKLDNN support");
98 }
99 
mkldnn_adaptive_avg_pool2d_out_stub(const Tensor & input,IntArrayRef output_size,Tensor & output)100 Tensor& mkldnn_adaptive_avg_pool2d_out_stub(const Tensor& input,
101     IntArrayRef output_size,
102     Tensor& output) {
103   TORCH_CHECK(false, "mkldnn_adaptive_avg_pool2d_out_stub: ATen not compiled with MKLDNN support");
104 }
105 
mkldnn_adaptive_avg_pool2d_out(const Tensor & input,IntArrayRef output_size,Tensor & output)106 Tensor& mkldnn_adaptive_avg_pool2d_out(const Tensor& input,
107     IntArrayRef output_size,
108     Tensor& output) {
109   TORCH_CHECK(false, "mkldnn_adaptive_avg_pool2d_out: ATen not compiled with MKLDNN support");
110 }
111 
mkldnn_max_pool2d_backward(const Tensor & grad_output,const Tensor & output,const Tensor & input,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,IntArrayRef dilation,bool ceil_mode)112 Tensor mkldnn_max_pool2d_backward(
113     const Tensor& grad_output,
114     const Tensor& output,
115     const Tensor& input,
116     IntArrayRef kernel_size,
117     IntArrayRef stride,
118     IntArrayRef padding,
119     IntArrayRef dilation,
120     bool ceil_mode) {
121   TORCH_CHECK(false, "mkldnn_max_pool2d_backward: ATen not compiled with MKLDNN support");
122 }
123 
mkldnn_max_pool3d_backward(const Tensor & grad_output,const Tensor & output,const Tensor & input,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,IntArrayRef dilation,bool ceil_mode)124 Tensor mkldnn_max_pool3d_backward(
125     const Tensor& grad_output,
126     const Tensor& output,
127     const Tensor& input,
128     IntArrayRef kernel_size,
129     IntArrayRef stride,
130     IntArrayRef padding,
131     IntArrayRef dilation,
132     bool ceil_mode) {
133   TORCH_CHECK(false, "mkldnn_max_pool3d_backward: ATen not compiled with MKLDNN support");
134 }
135 
mkldnn_avg_pool2d_backward_out(const Tensor & grad_output,const Tensor & input,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,bool ceil_mode,bool count_include_pad,std::optional<int64_t> divisor_override,Tensor & grad_input)136 Tensor& mkldnn_avg_pool2d_backward_out(const Tensor & grad_output,
137     const Tensor & input,
138     IntArrayRef kernel_size,
139     IntArrayRef stride,
140     IntArrayRef padding,
141     bool ceil_mode,
142     bool count_include_pad,
143     std::optional<int64_t> divisor_override,
144     Tensor & grad_input) {
145   TORCH_CHECK(false, "mkldnn_avg_pool2d_backward_out: ATen not compiled with MKLDNN support");
146 }
147 
mkldnn_avg_pool2d_backward(const Tensor & grad_output,const Tensor & input,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,bool ceil_mode,bool count_include_pad,std::optional<int64_t> divisor_override)148 Tensor mkldnn_avg_pool2d_backward(
149     const Tensor& grad_output,
150     const Tensor& input,
151     IntArrayRef kernel_size,
152     IntArrayRef stride,
153     IntArrayRef padding,
154     bool ceil_mode,
155     bool count_include_pad,
156     std::optional<int64_t> divisor_override) {
157   TORCH_CHECK(false, "mkldnn_avg_pool2d_backward: ATen not compiled with MKLDNN support");
158 }
159 
mkldnn_avg_pool3d_backward_out(const Tensor & grad_output,const Tensor & input,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,bool ceil_mode,bool count_include_pad,std::optional<int64_t> divisor_override,Tensor & grad_input)160 Tensor& mkldnn_avg_pool3d_backward_out(const Tensor & grad_output,
161     const Tensor & input,
162     IntArrayRef kernel_size,
163     IntArrayRef stride,
164     IntArrayRef padding,
165     bool ceil_mode,
166     bool count_include_pad,
167     std::optional<int64_t> divisor_override,
168     Tensor & grad_input) {
169   TORCH_CHECK(false, "mkldnn_avg_pool3d_backward_out: ATen not compiled with MKLDNN support");
170 }
171 
mkldnn_avg_pool3d_backward(const Tensor & grad_output,const Tensor & input,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,bool ceil_mode,bool count_include_pad,std::optional<int64_t> divisor_override)172 Tensor mkldnn_avg_pool3d_backward(
173     const Tensor& grad_output,
174     const Tensor& input,
175     IntArrayRef kernel_size,
176     IntArrayRef stride,
177     IntArrayRef padding,
178     bool ceil_mode,
179     bool count_include_pad,
180     std::optional<int64_t> divisor_override) {
181   TORCH_CHECK(false, "mkldnn_avg_pool3d_backward: ATen not compiled with MKLDNN support");
182 }
183 
mkldnn_adaptive_avg_pool2d_backward(const Tensor & grad_output,const Tensor & input)184 Tensor mkldnn_adaptive_avg_pool2d_backward(
185     const Tensor& grad_output,
186     const Tensor& input) {
187   TORCH_CHECK(false, "mkldnn_adaptive_avg_pool2d_backward: ATen not compiled with MKLDNN support");
188 }
189 
190 } // namespace native
191 } // namespace at
192 
193 #else // AT_MKLDNN_ENABLED
194 
195 #include <ATen/native/mkldnn/MKLDNNCommon.h>
196 #include <ATen/native/mkldnn/Utils.h>
197 
198 namespace at {
199 namespace native {
200 
_mkldnn_pooling(const Tensor & input,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,IntArrayRef dilation,bool ceil_mode,ideep::algorithm algo)201 static Tensor _mkldnn_pooling(
202     const Tensor& input,
203     IntArrayRef kernel_size,
204     IntArrayRef stride,
205     IntArrayRef padding,
206     IntArrayRef dilation,
207     bool ceil_mode,
208     ideep::algorithm algo) {
209   const int64_t dims = input.dim() - 2;
210   auto kernel_size_vec = expand_param_if_needed(kernel_size, "kernel_size", dims);
211   if (stride.empty()) stride = kernel_size;
212   auto stride_vec = expand_param_if_needed(stride, "stride", dims);
213   auto padding_vec = expand_param_if_needed(padding, "padding", dims);
214   // NOLINTNEXTLINE(performance-unnecessary-copy-initialization)
215   auto padding_vec_l = padding_vec;
216   auto padding_vec_r = padding_vec;
217   auto dilation_vec = expand_param_if_needed(dilation, "dilation", dims);
218 
219   const ideep::tensor& x = itensor_from_mkldnn(input);
220   std::vector<int64_t> output_sizes;
221 
222   if (ceil_mode) {
223     // MKLDNN does not support ceil mode, so we adjust padding
224     // on the right side to match behavior. Adjust output size
225     // accordingly.
226     const std::vector<int64_t> output_sizes_ceil = pool_output_sizes(
227         input.sizes(),
228         kernel_size_vec,
229         stride_vec,
230         padding_vec_l,
231         padding_vec_r,
232         dilation_vec,
233         true /* ceil_mode */);
234 
235     // adjust padding until output sizes agree
236     bool all_equal = false;
237     while (!all_equal) {
238       output_sizes = pool_output_sizes(
239           input.sizes(),
240           kernel_size_vec,
241           stride_vec,
242           padding_vec_l,
243           padding_vec_r,
244           dilation_vec,
245           false /*ceil_mode */);
246 
247       all_equal = true;
248       for (const auto i : c10::irange(2, input.sizes().size())) {
249         if (output_sizes[i] < output_sizes_ceil[i]) {
250            padding_vec_r[i - 2]++;
251            all_equal = false;
252         }
253       }
254     }
255   } else {
256     output_sizes = pool_output_sizes(
257         input.sizes(),
258         kernel_size_vec,
259         stride_vec,
260         padding_vec_l,
261         padding_vec_r,
262         dilation_vec,
263         false /*ceil_mode */);
264   }
265 
266   auto aprop_kind = ideep::prop_kind::forward;
267   // for max_pool, prop_kind::forward will save indices as workspace for backward use,
268   // for inference, don't need the indices, set aprop_kind to prop_kind::forward_inference
269   // can reduce the memory use.
270   if (ideep::algorithm::pooling_max == algo
271       && !((input.requires_grad() && at::GradMode::is_enabled()) || input._fw_grad(/*level */ 0).defined())) {
272     aprop_kind = ideep::prop_kind::forward_inference;
273   }
274 
275   ideep::tensor y;
276   ideep::pooling_forward::compute(
277       x,
278       {output_sizes.cbegin(), output_sizes.cend()},
279       y,
280       {stride_vec.cbegin(), stride_vec.cend()},
281       {kernel_size_vec.cbegin(), kernel_size_vec.cend()},
282       {padding_vec_l.cbegin(), padding_vec_l.cend()},
283       {padding_vec_r.cbegin(), padding_vec_r.cend()},
284       algo,
285       aprop_kind);
286 
287   return new_with_itensor_mkldnn(std::move(y), optTypeMetaToScalarType(input.options().dtype_opt()), input.options().device_opt());
288 }
289 
_mkldnn_pooling_backward(const Tensor & grad_output,const Tensor & output,const Tensor & input,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,IntArrayRef dilation,bool ceil_mode,ideep::algorithm algo)290 static Tensor _mkldnn_pooling_backward(
291     const Tensor& grad_output,
292     const Tensor& output,
293     const Tensor& input,
294     IntArrayRef kernel_size,
295     IntArrayRef stride,
296     IntArrayRef padding,
297     IntArrayRef dilation,
298     bool ceil_mode,
299     ideep::algorithm algo) {
300 
301   const int64_t dims = input.dim() - 2;
302   auto kernel_size_vec = expand_param_if_needed(kernel_size, "kernel_size", dims);
303   auto stride_vec = expand_param_if_needed(stride, "stride", dims);
304   auto padding_vec = expand_param_if_needed(padding, "padding", dims);
305   // NOLINTNEXTLINE(performance-unnecessary-copy-initialization)
306   auto padding_vec_l = padding_vec;
307   auto padding_vec_r = padding_vec;
308   auto dilation_vec = expand_param_if_needed(dilation, "dilation", dims);
309 
310   if (ceil_mode) {
311     // MKLDNN does not support ceil mode, so we adjust padding
312     // on the right side to match behavior. Adjust output size
313     // accordingly.
314     const std::vector<int64_t> output_sizes_ceil = pool_output_sizes(
315         input.sizes(),
316         kernel_size_vec,
317         stride_vec,
318         padding_vec_l,
319         padding_vec_r,
320         dilation_vec,
321         true /* ceil_mode */);
322 
323     // adjust padding until output sizes agree
324     bool all_equal = false;
325     std::vector<int64_t> output_sizes;
326     while (!all_equal) {
327       output_sizes = pool_output_sizes(
328           input.sizes(),
329           kernel_size_vec,
330           stride_vec,
331           padding_vec_l,
332           padding_vec_r,
333           dilation_vec,
334           false /*ceil_mode */);
335 
336       all_equal = true;
337       for (const auto i : c10::irange(2, input.sizes().size())) {
338         if (output_sizes[i] < output_sizes_ceil[i]) {
339            padding_vec_r[i - 2]++;
340            all_equal = false;
341         }
342       }
343     }
344   }
345 
346   const ideep::tensor& grady = itensor_from_mkldnn(grad_output);
347   const ideep::tensor& y = itensor_from_mkldnn(output);
348   const ideep::tensor& x = itensor_from_mkldnn(input);
349   ideep::tensor gradx;
350   ideep::pooling_backward::compute(
351       grady,
352       y,
353       x,
354       gradx,
355       {stride_vec.cbegin(), stride_vec.cend()},
356       {kernel_size_vec.cbegin(), kernel_size_vec.cend()},
357       {padding_vec_l.cbegin(), padding_vec_l.cend()},
358       {padding_vec_r.cbegin(), padding_vec_r.cend()},
359       algo);
360 
361   return new_with_itensor_mkldnn(std::move(gradx),
362                                  optTypeMetaToScalarType(grad_output.options().dtype_opt()),
363                                  grad_output.options().device_opt());
364 }
365 
mkldnn_max_pool2d(const Tensor & input,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,IntArrayRef dilation,bool ceil_mode)366 Tensor mkldnn_max_pool2d(
367     const Tensor& input,
368     IntArrayRef kernel_size,
369     IntArrayRef stride,
370     IntArrayRef padding,
371     IntArrayRef dilation,
372     bool ceil_mode) {
373   TORCH_CHECK(std::all_of(dilation.cbegin(), dilation.cend(), [](int64_t i) { return 1 == i; }),
374       "mkldnn_max_pool2d does not support dilation case");
375   if (input.scalar_type() == ScalarType::BFloat16) {
376     TORCH_CHECK(mkldnn_bf16_device_check(),
377         "mkldnn_max_pool2d: bf16 path needs the cpu support avx512bw, avx512vl and avx512dq");
378   }
379 
380   return _mkldnn_pooling(
381       input,
382       kernel_size,
383       stride,
384       padding,
385       dilation,
386       ceil_mode,
387       ideep::algorithm::pooling_max);
388 }
389 
mkldnn_max_pool3d(const Tensor & input,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,IntArrayRef dilation,bool ceil_mode)390 Tensor mkldnn_max_pool3d(
391     const Tensor& input,
392     IntArrayRef kernel_size,
393     IntArrayRef stride,
394     IntArrayRef padding,
395     IntArrayRef dilation,
396     bool ceil_mode) {
397   TORCH_CHECK(std::all_of(dilation.cbegin(), dilation.cend(), [](int64_t i) { return 1 == i; }),
398       "mkldnn_max_pool3d does not support dilation case");
399   if (input.scalar_type() == ScalarType::BFloat16) {
400     TORCH_CHECK(mkldnn_bf16_device_check(),
401         "mkldnn_max_pool3d: bf16 path needs the cpu support avx512bw, avx512vl and avx512dq");
402   }
403 
404   return _mkldnn_pooling(
405       input,
406       kernel_size,
407       stride,
408       padding,
409       dilation,
410       ceil_mode,
411       ideep::algorithm::pooling_max);
412 }
413 
mkldnn_avg_pool2d(const Tensor & input,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,bool ceil_mode,bool count_include_pad,std::optional<int64_t> divisor_override)414 Tensor mkldnn_avg_pool2d(
415     const Tensor& input,
416     IntArrayRef kernel_size,
417     IntArrayRef stride,
418     IntArrayRef padding,
419     bool ceil_mode,
420     bool count_include_pad,
421     std::optional<int64_t> divisor_override) {
422   TORCH_CHECK(!divisor_override.has_value(),
423       "mkldnn_avg_pool2d operator does not support divisor");
424   if (input.scalar_type() == ScalarType::BFloat16) {
425     TORCH_CHECK(mkldnn_bf16_device_check(),
426         "mkldnn_avg_pool2d: bf16 path needs the cpu support avx512bw, avx512vl and avx512dq");
427   }
428 
429   return _mkldnn_pooling(
430       input,
431       kernel_size,
432       stride,
433       padding,
434       /*dilation*/ std::vector<int64_t>{1, 1},
435       ceil_mode,
436       count_include_pad ? ideep::algorithm::pooling_avg_include_padding
437                         : ideep::algorithm::pooling_avg_exclude_padding);
438 }
439 
mkldnn_avg_pool2d_out(const Tensor & input,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,bool ceil_mode,bool count_include_pad,std::optional<int64_t> divisor_override,Tensor & output)440 Tensor& mkldnn_avg_pool2d_out(const Tensor& input,
441     IntArrayRef kernel_size,
442     IntArrayRef stride,
443     IntArrayRef padding,
444     bool ceil_mode,
445     bool count_include_pad,
446     std::optional<int64_t> divisor_override,
447     Tensor& output) {
448   TORCH_CHECK(false, "mkldnn_avg_pool2d_out: in-place mkldnn operations are not supported yet");
449 }
450 
mkldnn_avg_pool3d(const Tensor & input,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,bool ceil_mode,bool count_include_pad,std::optional<int64_t> divisor_override)451 Tensor mkldnn_avg_pool3d(
452     const Tensor& input,
453     IntArrayRef kernel_size,
454     IntArrayRef stride,
455     IntArrayRef padding,
456     bool ceil_mode,
457     bool count_include_pad,
458     std::optional<int64_t> divisor_override) {
459   TORCH_CHECK(!divisor_override.has_value(), "mkldnn_avg_pool3d operator does not support divisor");
460   if (input.scalar_type() == ScalarType::BFloat16) {
461     TORCH_CHECK(mkldnn_bf16_device_check(),
462         "mkldnn_avg_pool3d: bf16 path needs the cpu support avx512bw, avx512vl and avx512dq");
463   }
464 
465   return _mkldnn_pooling(
466       input,
467       kernel_size,
468       stride,
469       padding,
470       /*dilation*/ std::vector<int64_t>{1, 1, 1},
471       ceil_mode,
472       count_include_pad ? ideep::algorithm::pooling_avg_include_padding
473                         : ideep::algorithm::pooling_avg_exclude_padding);
474 }
475 
mkldnn_avg_pool3d_out(const Tensor & input,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,bool ceil_mode,bool count_include_pad,std::optional<int64_t> divisor_override,Tensor & output)476 Tensor& mkldnn_avg_pool3d_out(const Tensor& input,
477     IntArrayRef kernel_size,
478     IntArrayRef stride,
479     IntArrayRef padding,
480     bool ceil_mode,
481     bool count_include_pad,
482     std::optional<int64_t> divisor_override,
483     Tensor& output) {
484   TORCH_CHECK(false, "mkldnn_avg_pool3d_out: in-place mkldnn operations are not supported yet");
485 }
486 
mkldnn_adaptive_avg_pool2d(Tensor const & input,IntArrayRef output_size)487 Tensor mkldnn_adaptive_avg_pool2d(
488     Tensor const& input,
489     IntArrayRef output_size) {
490   TORCH_CHECK(input.dim() == 4, "mkldnn_adaptive_avg_pool2d: Expect 2D input");
491   if (input.scalar_type() == ScalarType::BFloat16) {
492     TORCH_CHECK(mkldnn_bf16_device_check(),
493         "mkldnn_adaptive_avg_pool2d: bf16 path needs the cpu support avx512bw, avx512vl and avx512dq");
494   }
495   auto output_size_vec =
496       expand_param_if_needed(output_size, "output_size", input.dim() - 2);
497   std::vector<int64_t> kernel_size(input.dim() - 2);
498   for (const auto i : c10::irange(2, input.dim())) {
499     auto s1 = input.size(i);
500     auto s2 = output_size_vec[i - 2];
501     TORCH_CHECK(s2 != 0, "output size can not be zero");
502     TORCH_CHECK(
503         s1 % s2 == 0,
504         "input size is not divisible by the output size is not supported yet");
505     kernel_size[i - 2] = s1 / s2;
506   }
507   return _mkldnn_pooling(
508       input,
509       kernel_size,
510       /*stride*/ kernel_size,
511       /*padding*/ {0, 0},
512       /*dilation*/ {1, 1},
513       /*ceil_mode*/ false,
514       /*algo*/ ideep::algorithm::pooling_avg_exclude_padding);
515 }
516 
mkldnn_adaptive_avg_pool2d_out_stub(const Tensor & input,IntArrayRef output_size,Tensor & output)517 Tensor& mkldnn_adaptive_avg_pool2d_out_stub(const Tensor& input,
518     IntArrayRef output_size,
519     Tensor& output) {
520   TORCH_CHECK(false, "mkldnn_adaptive_avg_pool2d_out_stub: in-place mkldnn operations are not supported yet");
521 }
522 
mkldnn_adaptive_avg_pool2d_out(const Tensor & input,IntArrayRef output_size,Tensor & output)523 Tensor& mkldnn_adaptive_avg_pool2d_out(const Tensor& input,
524     IntArrayRef output_size,
525     Tensor& output) {
526   auto tmp_output = at::native::mkldnn_adaptive_avg_pool2d(input, output_size);
527   at::native::resize_output(output, tmp_output.sizes());
528   output.copy_(tmp_output);
529   return output;
530 }
531 
mkldnn_max_pool2d_backward(const Tensor & grad_output,const Tensor & output,const Tensor & input,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,IntArrayRef dilation,bool ceil_mode)532 Tensor mkldnn_max_pool2d_backward(
533     const Tensor& grad_output,
534     const Tensor& output,
535     const Tensor& input,
536     IntArrayRef kernel_size,
537     IntArrayRef stride,
538     IntArrayRef padding,
539     IntArrayRef dilation,
540     bool ceil_mode) {
541   return _mkldnn_pooling_backward(
542       grad_output,
543       output,
544       input,
545       kernel_size,
546       stride,
547       padding,
548       dilation,
549       ceil_mode,
550       ideep::algorithm::pooling_max);
551 }
552 
mkldnn_max_pool3d_backward(const Tensor & grad_output,const Tensor & output,const Tensor & input,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,IntArrayRef dilation,bool ceil_mode)553 Tensor mkldnn_max_pool3d_backward(
554     const Tensor& grad_output,
555     const Tensor& output,
556     const Tensor& input,
557     IntArrayRef kernel_size,
558     IntArrayRef stride,
559     IntArrayRef padding,
560     IntArrayRef dilation,
561     bool ceil_mode) {
562   return _mkldnn_pooling_backward(
563       grad_output,
564       output,
565       input,
566       kernel_size,
567       stride,
568       padding,
569       dilation,
570       ceil_mode,
571       ideep::algorithm::pooling_max);
572 }
573 
mkldnn_avg_pool2d_backward(const Tensor & grad_output,const Tensor & input,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,bool ceil_mode,bool count_include_pad,std::optional<int64_t> divisor_override)574 Tensor mkldnn_avg_pool2d_backward(
575     const Tensor& grad_output,
576     const Tensor& input,
577     IntArrayRef kernel_size,
578     IntArrayRef stride,
579     IntArrayRef padding,
580     bool ceil_mode,
581     bool count_include_pad,
582     std::optional<int64_t> divisor_override) {
583   return _mkldnn_pooling_backward(
584       grad_output,
585       grad_output,
586       input,
587       kernel_size,
588       stride,
589       padding,
590       /*dilation*/ std::vector<int64_t>{1, 1},
591       ceil_mode,
592       count_include_pad ? ideep::algorithm::pooling_avg_include_padding
593                         : ideep::algorithm::pooling_avg_exclude_padding);
594 }
595 
mkldnn_avg_pool2d_backward_out(const Tensor & grad_output,const Tensor & input,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,bool ceil_mode,bool count_include_pad,std::optional<int64_t> divisor_override,Tensor & grad_input)596 Tensor& mkldnn_avg_pool2d_backward_out(const Tensor & grad_output,
597     const Tensor & input,
598     IntArrayRef kernel_size,
599     IntArrayRef stride,
600     IntArrayRef padding,
601     bool ceil_mode,
602     bool count_include_pad,
603     std::optional<int64_t> divisor_override,
604     Tensor & grad_input) {
605   TORCH_CHECK(false, "mkldnn_avg_pool2d_backward_out: in-place mkldnn operations are not supported yet");
606 }
607 
mkldnn_avg_pool3d_backward(const Tensor & grad_output,const Tensor & input,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,bool ceil_mode,bool count_include_pad,std::optional<int64_t> divisor_override)608 Tensor mkldnn_avg_pool3d_backward(
609     const Tensor& grad_output,
610     const Tensor& input,
611     IntArrayRef kernel_size,
612     IntArrayRef stride,
613     IntArrayRef padding,
614     bool ceil_mode,
615     bool count_include_pad,
616     std::optional<int64_t> divisor_override) {
617   return _mkldnn_pooling_backward(
618       grad_output,
619       grad_output,
620       input,
621       kernel_size,
622       stride,
623       padding,
624       /*dilation*/ std::vector<int64_t>{1, 1, 1},
625       ceil_mode,
626       count_include_pad ? ideep::algorithm::pooling_avg_include_padding
627                         : ideep::algorithm::pooling_avg_exclude_padding);
628 }
629 
mkldnn_avg_pool3d_backward_out(const Tensor & grad_output,const Tensor & input,IntArrayRef kernel_size,IntArrayRef stride,IntArrayRef padding,bool ceil_mode,bool count_include_pad,std::optional<int64_t> divisor_override,Tensor & grad_input)630 Tensor& mkldnn_avg_pool3d_backward_out(const Tensor & grad_output,
631     const Tensor & input,
632     IntArrayRef kernel_size,
633     IntArrayRef stride,
634     IntArrayRef padding,
635     bool ceil_mode,
636     bool count_include_pad,
637     std::optional<int64_t> divisor_override,
638     Tensor & grad_input) {
639   TORCH_CHECK(false, "mkldnn_avg_pool3d_backward_out: in-place mkldnn operations are not supported yet");
640 }
641 
mkldnn_adaptive_avg_pool2d_backward(const Tensor & grad_output,const Tensor & input)642 Tensor mkldnn_adaptive_avg_pool2d_backward(
643     const Tensor& grad_output,
644     const Tensor& input) {
645   TORCH_CHECK(input.dim() == 4, "mkldnn_adaptive_avg_pool2d: Input is expected a 4D tensor");
646 
647   auto output_size_vec = grad_output.sizes();
648   std::vector<int64_t> kernel_size(input.dim() - 2);
649   for (const auto i: c10::irange(2, input.dim())) {
650     auto s1 = input.size(i);
651     auto s2 = output_size_vec[i];
652     TORCH_CHECK(s2 != 0, "output size can not be zero");
653     TORCH_CHECK(
654         s1 % s2 == 0,
655         "input size is not divisible by the output size is not supported yet");
656         kernel_size[i - 2] = s1 / s2;
657   }
658   return _mkldnn_pooling_backward(
659       grad_output,
660       grad_output,
661       input,
662       kernel_size,
663       /*stride*/ kernel_size,
664       /*padding*/ {0, 0},
665       /*dilation*/{1, 1},
666       false,
667       /*algo*/ ideep::algorithm::pooling_avg_exclude_padding);
668 }
669 
670 } // namespace native
671 } // namespace at
672 
673 #endif // AT_MKLDNN_ENABLED
674