xref: /aosp_15_r20/external/XNNPACK/src/operators/max-pooling-nhwc.c (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <assert.h>
10 #include <math.h>
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16 
17 #include <fp16.h>
18 
19 #include <xnnpack.h>
20 #include <xnnpack/allocator.h>
21 #include <xnnpack/common.h>
22 #include <xnnpack/indirection.h>
23 #include <xnnpack/log.h>
24 #include <xnnpack/math.h>
25 #include <xnnpack/operator.h>
26 #include <xnnpack/microparams-init.h>
27 #include <xnnpack/params.h>
28 
29 
compute_output_dimension_with_tf_same_padding(size_t input_dimension,size_t stride_dimension)30 static inline size_t compute_output_dimension_with_tf_same_padding(
31     size_t input_dimension,
32     size_t stride_dimension)
33 {
34   return divide_round_up(input_dimension, stride_dimension);
35 }
36 
create_max_pooling2d_nhwc(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,const void * params,size_t params_size,uint32_t datatype_init_flags,enum xnn_operator_type operator_type,xnn_operator_t * max_pooling_op_out)37 static enum xnn_status create_max_pooling2d_nhwc(
38     uint32_t input_padding_top,
39     uint32_t input_padding_right,
40     uint32_t input_padding_bottom,
41     uint32_t input_padding_left,
42     uint32_t pooling_height,
43     uint32_t pooling_width,
44     uint32_t stride_height,
45     uint32_t stride_width,
46     uint32_t dilation_height,
47     uint32_t dilation_width,
48     size_t channels,
49     size_t input_pixel_stride,
50     size_t output_pixel_stride,
51     uint32_t flags,
52     const void* params,
53     size_t params_size,
54     uint32_t datatype_init_flags,
55     enum xnn_operator_type operator_type,
56     xnn_operator_t* max_pooling_op_out)
57 {
58   xnn_operator_t max_pooling_op = NULL;
59   enum xnn_status status = xnn_status_uninitialized;
60 
61   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
62     xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
63       xnn_operator_type_to_string(operator_type));
64     return xnn_status_uninitialized;
65   }
66 
67   status = xnn_status_unsupported_hardware;
68 
69   if ((xnn_params.init_flags & datatype_init_flags) != datatype_init_flags) {
70     xnn_log_error(
71       "failed to create %s operator: operations on data type are not supported",
72       xnn_operator_type_to_string(operator_type));
73     goto error;
74   }
75 
76   status = xnn_status_invalid_parameter;
77 
78   const uint32_t pooling_size = pooling_height * pooling_width;
79   if (pooling_size == 0) {
80     xnn_log_error(
81       "failed to create %s operator with %" PRIu32 "x%" PRIu32 " pooling size: "
82       "pooling size dimensions must be non-zero",
83       xnn_operator_type_to_string(operator_type),
84       pooling_width, pooling_height);
85     goto error;
86   }
87 
88   if (pooling_size == 1) {
89     xnn_log_error(
90       "failed to create %s operator with 1 pooling element: 1x1 pooling is meaningless",
91       xnn_operator_type_to_string(operator_type));
92     goto error;
93   }
94 
95   if (stride_height == 0 || stride_width == 0) {
96     xnn_log_error(
97       "failed to create %s operator with %" PRIu32 "x%" PRIu32 " stride: stride dimensions must be non-zero",
98       xnn_operator_type_to_string(operator_type), stride_width, stride_height);
99     goto error;
100   }
101 
102   if (dilation_height == 0 || dilation_width == 0) {
103     xnn_log_error(
104       "failed to create %s operator with %" PRIu32 "x%" PRIu32 " dilation: dilation dimensions must be non-zero",
105       xnn_operator_type_to_string(operator_type), dilation_width, dilation_height);
106     goto error;
107   }
108 
109   if (stride_height > pooling_height) {
110     xnn_log_error(
111       "failed to define %s operator with %" PRIu32 " stride height: must be less than pooling height %" PRIu32,
112       xnn_operator_type_to_string(operator_type), stride_height, pooling_height);
113     return xnn_status_invalid_parameter;
114   }
115 
116   if (stride_width > pooling_width) {
117     xnn_log_error(
118       "failed to define %s operator with %" PRIu32 " stride width: must be less than pooling width %" PRIu32,
119       xnn_operator_type_to_string(operator_type), stride_width, pooling_width);
120     return xnn_status_invalid_parameter;
121   }
122 
123   if (channels == 0) {
124     xnn_log_error(
125       "failed to create %s operator with %zu channels: number of channels must be non-zero",
126       xnn_operator_type_to_string(operator_type), channels);
127     goto error;
128   }
129 
130   if (input_pixel_stride < channels) {
131     xnn_log_error(
132       "failed to create %s operator with input pixel stride of %zu: "
133       "stride must be at least as large as the number of channels (%zu)",
134       xnn_operator_type_to_string(operator_type), input_pixel_stride, channels);
135     goto error;
136   }
137 
138   if (output_pixel_stride < channels) {
139     xnn_log_error(
140       "failed to create %s operator with output pixel stride of %zu: "
141       "stride must be at least as large as the number of channels (%zu)",
142       xnn_operator_type_to_string(operator_type), output_pixel_stride, channels);
143     goto error;
144   }
145 
146   const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
147   if ((flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0) {
148     if (any_padding) {
149       xnn_log_error(
150         "failed to create %s operator with %" PRIu32 "+%" PRIu32 "x%" PRIu32 "+%" PRIu32" padding: "
151         "TensorFlow SAME padding can't be combined with explicit padding specification",
152         xnn_operator_type_to_string(operator_type),
153         input_padding_top, input_padding_left, input_padding_bottom, input_padding_right);
154       goto error;
155     }
156   }
157 
158   status = xnn_status_out_of_memory;
159 
160   max_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
161   if (max_pooling_op == NULL) {
162     xnn_log_error(
163       "failed to allocate %zu bytes for %s operator descriptor",
164       sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
165     goto error;
166   }
167 
168   max_pooling_op->padding_top = input_padding_top;
169   max_pooling_op->padding_right = input_padding_right;
170   max_pooling_op->padding_bottom = input_padding_bottom;
171   max_pooling_op->padding_left = input_padding_left;
172 
173   max_pooling_op->kernel_height = pooling_height;
174   max_pooling_op->kernel_width = pooling_width;
175   max_pooling_op->stride_height = stride_height;
176   max_pooling_op->stride_width = stride_width;
177   max_pooling_op->dilation_height = dilation_height;
178   max_pooling_op->dilation_width = dilation_width;
179   max_pooling_op->channels = channels;
180   max_pooling_op->input_pixel_stride = input_pixel_stride;
181   max_pooling_op->output_pixel_stride = output_pixel_stride;
182 
183   memcpy(&max_pooling_op->params, params, params_size);
184   max_pooling_op->type = operator_type;
185   max_pooling_op->flags = flags;
186 
187   max_pooling_op->state = xnn_run_state_invalid;
188 
189   *max_pooling_op_out = max_pooling_op;
190   return xnn_status_success;
191 
192 error:
193   xnn_delete_operator(max_pooling_op);
194   return status;
195 }
196 
setup_max_pooling2d_nhwc(xnn_operator_t max_pooling_op,enum xnn_operator_type expected_operator_type,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,uint32_t log2_input_element_size,uint32_t log2_output_element_size,struct maxpool_parameters maxpool[restrict XNN_MIN_ELEMENTS (1)],const void * params,size_t params_size,size_t num_threads)197 static enum xnn_status setup_max_pooling2d_nhwc(
198   xnn_operator_t max_pooling_op,
199   enum xnn_operator_type expected_operator_type,
200   size_t batch_size,
201   size_t input_height,
202   size_t input_width,
203   const void* input,
204   void* output,
205   uint32_t log2_input_element_size,
206   uint32_t log2_output_element_size,
207   struct maxpool_parameters maxpool[restrict XNN_MIN_ELEMENTS(1)],
208   const void* params,
209   size_t params_size,
210   size_t num_threads)
211 {
212   if (max_pooling_op->type != expected_operator_type) {
213     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
214       xnn_operator_type_to_string(expected_operator_type),
215       xnn_operator_type_to_string(max_pooling_op->type));
216     return xnn_status_invalid_parameter;
217   }
218   max_pooling_op->state = xnn_run_state_invalid;
219 
220   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
221     xnn_log_error(
222       "failed to setup %s operator: XNNPACK is not initialized",
223       xnn_operator_type_to_string(max_pooling_op->type));
224     return xnn_status_uninitialized;
225   }
226 
227   if (input_width == 0 || input_height == 0) {
228     xnn_log_error(
229       "failed to setup %s operator with %zux%zu input: input dimensions must be non-zero",
230       xnn_operator_type_to_string(max_pooling_op->type), input_width, input_height);
231     return xnn_status_invalid_parameter;
232   }
233 
234   if (batch_size == 0) {
235     max_pooling_op->state = xnn_run_state_skip;
236     return xnn_status_success;
237   }
238 
239   max_pooling_op->input_height = input_height;
240   max_pooling_op->input_width = input_width;
241   max_pooling_op->input = input;
242 
243   if (max_pooling_op->flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) {
244     max_pooling_op->output_height = compute_output_dimension_with_tf_same_padding(
245         input_height, max_pooling_op->stride_height);
246     max_pooling_op->output_width = compute_output_dimension_with_tf_same_padding(
247         input_width, max_pooling_op->stride_width);
248 
249     const uint32_t effective_kernel_height = (max_pooling_op->kernel_height - 1) * max_pooling_op->dilation_height + 1;
250     const uint32_t effective_kernel_width = (max_pooling_op->kernel_width - 1) * max_pooling_op->dilation_width + 1;
251     const uint32_t total_padding_height =
252       doz((max_pooling_op->output_height - 1) * max_pooling_op->stride_height + effective_kernel_height, input_height);
253     const uint32_t total_padding_width =
254       doz((max_pooling_op->output_width - 1) * max_pooling_op->stride_width + effective_kernel_width, input_width);
255     max_pooling_op->padding_top = total_padding_height / 2;
256     max_pooling_op->padding_left = total_padding_width / 2;
257     max_pooling_op->padding_bottom = total_padding_height - max_pooling_op->padding_top;
258     max_pooling_op->padding_right = total_padding_width - max_pooling_op->padding_left;
259   } else {
260     max_pooling_op->output_height = xnn_compute_convolution_output_dimension(
261         max_pooling_op->padding_top + input_height + max_pooling_op->padding_bottom,
262         max_pooling_op->kernel_height,
263         max_pooling_op->dilation_height,
264         max_pooling_op->stride_height);
265     max_pooling_op->output_width = xnn_compute_convolution_output_dimension(
266         max_pooling_op->padding_left + input_width + max_pooling_op->padding_right,
267         max_pooling_op->kernel_width,
268         max_pooling_op->dilation_width,
269         max_pooling_op->stride_width);
270   }
271 
272   const size_t pooling_height = max_pooling_op->kernel_height;
273   const size_t pooling_width = max_pooling_op->kernel_width;
274   const size_t pooling_size = pooling_height * pooling_width;
275   const size_t output_height = max_pooling_op->output_height;
276   const size_t output_width = max_pooling_op->output_width;
277   const uint32_t mr = maxpool->mr;
278 
279   const size_t step_width =
280     max_pooling_op->dilation_width > 1 ? pooling_width : min(max_pooling_op->stride_width, pooling_width);
281   const size_t step_height = pooling_size + (output_width - 1) * step_width * pooling_height;
282 
283   if (input_height != max_pooling_op->last_input_height ||
284       input_width != max_pooling_op->last_input_width)
285   {
286     // Micro-kernel may read up to (mr - 1) elements after the end of indirection buffer.
287     const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + output_height * step_height);
288     const void** indirection_buffer =
289       (const void**) xnn_reallocate_memory(max_pooling_op->indirection_buffer, indirection_buffer_size);
290     if (indirection_buffer == NULL) {
291       xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
292       return xnn_status_out_of_memory;
293     }
294     max_pooling_op->indirection_buffer = indirection_buffer;
295 
296     xnn_indirection_init_maxpool2d(max_pooling_op, step_height, step_width, log2_input_element_size);
297 
298     max_pooling_op->last_input = input;
299     max_pooling_op->last_input_height = input_height;
300     max_pooling_op->last_input_width = input_width;
301   }
302 
303   const uint32_t qr = maxpool->qr;
304   const size_t channels = max_pooling_op->channels;
305 
306   const size_t indirect_input_height_stride = step_height * sizeof(void*);
307   const size_t output_width_stride = max_pooling_op->output_pixel_stride << log2_output_element_size;
308   const size_t output_height_stride = output_width * output_width_stride;
309   const size_t multipass_adjustment = round_up(doz(pooling_size, mr), qr) + mr;
310 
311   max_pooling_op->context.max_pooling = (struct max_pooling_context) {
312     .indirect_input = max_pooling_op->indirection_buffer,
313     .indirect_input_height_stride = indirect_input_height_stride,
314     .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) max_pooling_op->last_input),
315     .input_batch_stride = (input_height * input_width * max_pooling_op->input_pixel_stride) << log2_input_element_size,
316     .output = output,
317     .output_batch_stride = output_height * output_height_stride,
318     .output_height_stride = output_height_stride,
319     .output_width = output_width,
320     .pooling_size = pooling_size,
321     .channels = channels,
322     .input_increment = (pooling_height * step_width - multipass_adjustment) * sizeof(void*),
323     .output_increment = output_width_stride - (channels << log2_output_element_size),
324     .ukernel = maxpool->ukernel,
325   };
326   memcpy(&max_pooling_op->context.max_pooling.params, params, params_size);
327 
328   max_pooling_op->compute.type = xnn_parallelization_type_2d;
329   max_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_max_pooling;
330   max_pooling_op->compute.range[0] = batch_size;
331   max_pooling_op->compute.range[1] = output_height;
332   max_pooling_op->state = xnn_run_state_ready;
333 
334   return xnn_status_success;
335 }
336 
xnn_create_max_pooling2d_nhwc_s8(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,int8_t output_min,int8_t output_max,uint32_t flags,xnn_operator_t * max_pooling_op_out)337 enum xnn_status xnn_create_max_pooling2d_nhwc_s8(
338     uint32_t input_padding_top,
339     uint32_t input_padding_right,
340     uint32_t input_padding_bottom,
341     uint32_t input_padding_left,
342     uint32_t pooling_height,
343     uint32_t pooling_width,
344     uint32_t stride_height,
345     uint32_t stride_width,
346     uint32_t dilation_height,
347     uint32_t dilation_width,
348     size_t channels,
349     size_t input_pixel_stride,
350     size_t output_pixel_stride,
351     int8_t output_min,
352     int8_t output_max,
353     uint32_t flags,
354     xnn_operator_t* max_pooling_op_out)
355 {
356   if (output_min >= output_max) {
357     xnn_log_error(
358       "failed to create %s operator with [%" PRId8 ", %" PRId8 "] output range: range min must be below range max",
359       xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_s8), output_min, output_max);
360     return xnn_status_invalid_parameter;
361   }
362 
363   union xnn_s8_minmax_params params;
364   xnn_params.s8.maxpool.init.s8(&params, output_min, output_max);
365   return create_max_pooling2d_nhwc(
366     input_padding_top, input_padding_right, input_padding_bottom, input_padding_left,
367     pooling_height, pooling_width,
368     stride_height, stride_width,
369     dilation_height, dilation_width,
370     channels, input_pixel_stride, output_pixel_stride,
371     flags,
372     &params, sizeof(params), XNN_INIT_FLAG_S8,
373     xnn_operator_type_max_pooling_nhwc_s8,
374     max_pooling_op_out);
375 }
376 
xnn_create_max_pooling2d_nhwc_u8(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * max_pooling_op_out)377 enum xnn_status xnn_create_max_pooling2d_nhwc_u8(
378     uint32_t input_padding_top,
379     uint32_t input_padding_right,
380     uint32_t input_padding_bottom,
381     uint32_t input_padding_left,
382     uint32_t pooling_height,
383     uint32_t pooling_width,
384     uint32_t stride_height,
385     uint32_t stride_width,
386     uint32_t dilation_height,
387     uint32_t dilation_width,
388     size_t channels,
389     size_t input_pixel_stride,
390     size_t output_pixel_stride,
391     uint8_t output_min,
392     uint8_t output_max,
393     uint32_t flags,
394     xnn_operator_t* max_pooling_op_out)
395 {
396   if (output_min >= output_max) {
397     xnn_log_error(
398       "failed to create %s operator with [%" PRIu8 ", %" PRIu8 "] output range: range min must be below range max",
399       xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_u8), output_min, output_max);
400     return xnn_status_invalid_parameter;
401   }
402 
403   union xnn_u8_minmax_params params;
404   xnn_params.u8.maxpool.init.u8(&params, output_min, output_max);
405   return create_max_pooling2d_nhwc(
406     input_padding_top, input_padding_right, input_padding_bottom, input_padding_left,
407     pooling_height, pooling_width,
408     stride_height, stride_width,
409     dilation_height, dilation_width,
410     channels, input_pixel_stride, output_pixel_stride,
411     flags,
412     &params, sizeof(params), XNN_INIT_FLAG_U8,
413     xnn_operator_type_max_pooling_nhwc_u8,
414     max_pooling_op_out);
415 }
416 
xnn_create_max_pooling2d_nhwc_f32(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * max_pooling_op_out)417 enum xnn_status xnn_create_max_pooling2d_nhwc_f32(
418     uint32_t input_padding_top,
419     uint32_t input_padding_right,
420     uint32_t input_padding_bottom,
421     uint32_t input_padding_left,
422     uint32_t pooling_height,
423     uint32_t pooling_width,
424     uint32_t stride_height,
425     uint32_t stride_width,
426     uint32_t dilation_height,
427     uint32_t dilation_width,
428     size_t channels,
429     size_t input_pixel_stride,
430     size_t output_pixel_stride,
431     float output_min,
432     float output_max,
433     uint32_t flags,
434     xnn_operator_t* max_pooling_op_out)
435 {
436   if (isnan(output_min)) {
437     xnn_log_error(
438       "failed to create %s with NaN output lower bound: lower bound must be non-NaN",
439       xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f32));
440     return xnn_status_invalid_parameter;
441   }
442 
443   if (isnan(output_max)) {
444     xnn_log_error(
445       "failed to create %s with NaN output upper bound: upper bound must be non-NaN",
446       xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f32));
447     return xnn_status_invalid_parameter;
448   }
449 
450   if (output_min >= output_max) {
451     xnn_log_error(
452       "failed to create %s with [%.7g, %.7g] output range: lower bound must be below upper bound",
453       xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f32), output_min, output_max);
454     return xnn_status_invalid_parameter;
455   }
456 
457   union xnn_f32_minmax_params params;
458   xnn_params.f32.maxpool.init.f32(&params, output_min, output_max);
459   return create_max_pooling2d_nhwc(
460     input_padding_top, input_padding_right, input_padding_bottom, input_padding_left,
461     pooling_height, pooling_width,
462     stride_height, stride_width,
463     dilation_height, dilation_width,
464     channels, input_pixel_stride, output_pixel_stride,
465     flags,
466     &params, sizeof(params), XNN_INIT_FLAG_F32,
467     xnn_operator_type_max_pooling_nhwc_f32,
468     max_pooling_op_out);
469 }
470 
xnn_create_max_pooling2d_nhwc_f16(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * max_pooling_op_out)471 enum xnn_status xnn_create_max_pooling2d_nhwc_f16(
472     uint32_t input_padding_top,
473     uint32_t input_padding_right,
474     uint32_t input_padding_bottom,
475     uint32_t input_padding_left,
476     uint32_t pooling_height,
477     uint32_t pooling_width,
478     uint32_t stride_height,
479     uint32_t stride_width,
480     uint32_t dilation_height,
481     uint32_t dilation_width,
482     size_t channels,
483     size_t input_pixel_stride,
484     size_t output_pixel_stride,
485     float output_min,
486     float output_max,
487     uint32_t flags,
488     xnn_operator_t* max_pooling_op_out)
489 {
490   if (isnan(output_min)) {
491     xnn_log_error(
492       "failed to create %s with NaN output lower bound: lower bound must be non-NaN",
493       xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f16));
494     return xnn_status_invalid_parameter;
495   }
496 
497   if (isnan(output_max)) {
498     xnn_log_error(
499       "failed to create %s with NaN output upper bound: upper bound must be non-NaN",
500       xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f16));
501     return xnn_status_invalid_parameter;
502   }
503 
504   const uint16_t output_min_as_half = fp16_ieee_from_fp32_value(output_min);
505   const uint16_t output_max_as_half = fp16_ieee_from_fp32_value(output_max);
506   output_min = fp16_ieee_to_fp32_value(output_min_as_half);
507   output_max = fp16_ieee_to_fp32_value(output_max_as_half);
508   if (output_min >= output_max) {
509     xnn_log_error(
510       "failed to create %s operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
511       xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f16), output_min, output_max);
512     return xnn_status_invalid_parameter;
513   }
514 
515   union xnn_f16_minmax_params params;
516   if (xnn_params.f16.maxpool.init.f16 != NULL) {
517     xnn_params.f16.maxpool.init.f16(&params, output_min_as_half, output_max_as_half);
518   }
519   return create_max_pooling2d_nhwc(
520     input_padding_top, input_padding_right, input_padding_bottom, input_padding_left,
521     pooling_height, pooling_width,
522     stride_height, stride_width,
523     dilation_height, dilation_width,
524     channels, input_pixel_stride, output_pixel_stride,
525     flags,
526     &params, sizeof(params), XNN_INIT_FLAG_F16,
527     xnn_operator_type_max_pooling_nhwc_f16,
528     max_pooling_op_out);
529 }
530 
xnn_setup_max_pooling2d_nhwc_s8(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const int8_t * input,int8_t * output,pthreadpool_t threadpool)531 enum xnn_status xnn_setup_max_pooling2d_nhwc_s8(
532     xnn_operator_t max_pooling_op,
533     size_t batch_size,
534     size_t input_height,
535     size_t input_width,
536     const int8_t* input,
537     int8_t* output,
538     pthreadpool_t threadpool)
539 {
540   return setup_max_pooling2d_nhwc(
541     max_pooling_op, xnn_operator_type_max_pooling_nhwc_s8,
542     batch_size, input_height, input_width,
543     input, output,
544     0 /* log2(sizeof(input element)) = log2(sizeof(int8_t)) */,
545     0 /* log2(sizeof(output element)) = log2(sizeof(int8_t)) */,
546     &xnn_params.s8.maxpool,
547     &max_pooling_op->params.s8_minmax, sizeof(max_pooling_op->params.s8_minmax),
548     pthreadpool_get_threads_count(threadpool));
549 }
550 
xnn_setup_max_pooling2d_nhwc_u8(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)551 enum xnn_status xnn_setup_max_pooling2d_nhwc_u8(
552     xnn_operator_t max_pooling_op,
553     size_t batch_size,
554     size_t input_height,
555     size_t input_width,
556     const uint8_t* input,
557     uint8_t* output,
558     pthreadpool_t threadpool)
559 {
560   return setup_max_pooling2d_nhwc(
561     max_pooling_op, xnn_operator_type_max_pooling_nhwc_u8,
562     batch_size, input_height, input_width,
563     input, output,
564     0 /* log2(sizeof(input element)) = log2(sizeof(uint8_t)) */,
565     0 /* log2(sizeof(output element)) = log2(sizeof(uint8_t)) */,
566     &xnn_params.u8.maxpool,
567     &max_pooling_op->params.u8_minmax, sizeof(max_pooling_op->params.u8_minmax),
568     pthreadpool_get_threads_count(threadpool));
569 }
570 
xnn_setup_max_pooling2d_nhwc_f16(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)571 enum xnn_status xnn_setup_max_pooling2d_nhwc_f16(
572     xnn_operator_t max_pooling_op,
573     size_t batch_size,
574     size_t input_height,
575     size_t input_width,
576     const void* input,
577     void* output,
578     pthreadpool_t threadpool)
579 {
580   return setup_max_pooling2d_nhwc(
581     max_pooling_op, xnn_operator_type_max_pooling_nhwc_f16,
582     batch_size, input_height, input_width,
583     input, output,
584     1 /* log2(sizeof(input element)) = log2(sizeof(uint16_t)) */,
585     1 /* log2(sizeof(output element)) = log2(sizeof(uint16_t)) */,
586     &xnn_params.f16.maxpool,
587     &max_pooling_op->params.f16_minmax, sizeof(max_pooling_op->params.f16_minmax),
588     pthreadpool_get_threads_count(threadpool));
589 }
590 
xnn_setup_max_pooling2d_nhwc_f32(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const float * input,float * output,pthreadpool_t threadpool)591 enum xnn_status xnn_setup_max_pooling2d_nhwc_f32(
592     xnn_operator_t max_pooling_op,
593     size_t batch_size,
594     size_t input_height,
595     size_t input_width,
596     const float* input,
597     float* output,
598     pthreadpool_t threadpool)
599 {
600   return setup_max_pooling2d_nhwc(
601     max_pooling_op, xnn_operator_type_max_pooling_nhwc_f32,
602     batch_size, input_height, input_width,
603     input, output,
604     2 /* log2(sizeof(input element)) = log2(sizeof(float)) */,
605     2 /* log2(sizeof(output element)) = log2(sizeof(float)) */,
606     &xnn_params.f32.maxpool,
607     &max_pooling_op->params.f32_minmax, sizeof(max_pooling_op->params.f32_minmax),
608     pthreadpool_get_threads_count(threadpool));
609 }
610 
611