1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <assert.h>
10 #include <math.h>
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16
17 #include <fp16.h>
18
19 #include <xnnpack.h>
20 #include <xnnpack/allocator.h>
21 #include <xnnpack/common.h>
22 #include <xnnpack/indirection.h>
23 #include <xnnpack/log.h>
24 #include <xnnpack/math.h>
25 #include <xnnpack/operator.h>
26 #include <xnnpack/microparams-init.h>
27 #include <xnnpack/params.h>
28
29
compute_output_dimension_with_tf_same_padding(size_t input_dimension,size_t stride_dimension)30 static inline size_t compute_output_dimension_with_tf_same_padding(
31 size_t input_dimension,
32 size_t stride_dimension)
33 {
34 return divide_round_up(input_dimension, stride_dimension);
35 }
36
create_max_pooling2d_nhwc(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,const void * params,size_t params_size,uint32_t datatype_init_flags,enum xnn_operator_type operator_type,xnn_operator_t * max_pooling_op_out)37 static enum xnn_status create_max_pooling2d_nhwc(
38 uint32_t input_padding_top,
39 uint32_t input_padding_right,
40 uint32_t input_padding_bottom,
41 uint32_t input_padding_left,
42 uint32_t pooling_height,
43 uint32_t pooling_width,
44 uint32_t stride_height,
45 uint32_t stride_width,
46 uint32_t dilation_height,
47 uint32_t dilation_width,
48 size_t channels,
49 size_t input_pixel_stride,
50 size_t output_pixel_stride,
51 uint32_t flags,
52 const void* params,
53 size_t params_size,
54 uint32_t datatype_init_flags,
55 enum xnn_operator_type operator_type,
56 xnn_operator_t* max_pooling_op_out)
57 {
58 xnn_operator_t max_pooling_op = NULL;
59 enum xnn_status status = xnn_status_uninitialized;
60
61 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
62 xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
63 xnn_operator_type_to_string(operator_type));
64 return xnn_status_uninitialized;
65 }
66
67 status = xnn_status_unsupported_hardware;
68
69 if ((xnn_params.init_flags & datatype_init_flags) != datatype_init_flags) {
70 xnn_log_error(
71 "failed to create %s operator: operations on data type are not supported",
72 xnn_operator_type_to_string(operator_type));
73 goto error;
74 }
75
76 status = xnn_status_invalid_parameter;
77
78 const uint32_t pooling_size = pooling_height * pooling_width;
79 if (pooling_size == 0) {
80 xnn_log_error(
81 "failed to create %s operator with %" PRIu32 "x%" PRIu32 " pooling size: "
82 "pooling size dimensions must be non-zero",
83 xnn_operator_type_to_string(operator_type),
84 pooling_width, pooling_height);
85 goto error;
86 }
87
88 if (pooling_size == 1) {
89 xnn_log_error(
90 "failed to create %s operator with 1 pooling element: 1x1 pooling is meaningless",
91 xnn_operator_type_to_string(operator_type));
92 goto error;
93 }
94
95 if (stride_height == 0 || stride_width == 0) {
96 xnn_log_error(
97 "failed to create %s operator with %" PRIu32 "x%" PRIu32 " stride: stride dimensions must be non-zero",
98 xnn_operator_type_to_string(operator_type), stride_width, stride_height);
99 goto error;
100 }
101
102 if (dilation_height == 0 || dilation_width == 0) {
103 xnn_log_error(
104 "failed to create %s operator with %" PRIu32 "x%" PRIu32 " dilation: dilation dimensions must be non-zero",
105 xnn_operator_type_to_string(operator_type), dilation_width, dilation_height);
106 goto error;
107 }
108
109 if (stride_height > pooling_height) {
110 xnn_log_error(
111 "failed to define %s operator with %" PRIu32 " stride height: must be less than pooling height %" PRIu32,
112 xnn_operator_type_to_string(operator_type), stride_height, pooling_height);
113 return xnn_status_invalid_parameter;
114 }
115
116 if (stride_width > pooling_width) {
117 xnn_log_error(
118 "failed to define %s operator with %" PRIu32 " stride width: must be less than pooling width %" PRIu32,
119 xnn_operator_type_to_string(operator_type), stride_width, pooling_width);
120 return xnn_status_invalid_parameter;
121 }
122
123 if (channels == 0) {
124 xnn_log_error(
125 "failed to create %s operator with %zu channels: number of channels must be non-zero",
126 xnn_operator_type_to_string(operator_type), channels);
127 goto error;
128 }
129
130 if (input_pixel_stride < channels) {
131 xnn_log_error(
132 "failed to create %s operator with input pixel stride of %zu: "
133 "stride must be at least as large as the number of channels (%zu)",
134 xnn_operator_type_to_string(operator_type), input_pixel_stride, channels);
135 goto error;
136 }
137
138 if (output_pixel_stride < channels) {
139 xnn_log_error(
140 "failed to create %s operator with output pixel stride of %zu: "
141 "stride must be at least as large as the number of channels (%zu)",
142 xnn_operator_type_to_string(operator_type), output_pixel_stride, channels);
143 goto error;
144 }
145
146 const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
147 if ((flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0) {
148 if (any_padding) {
149 xnn_log_error(
150 "failed to create %s operator with %" PRIu32 "+%" PRIu32 "x%" PRIu32 "+%" PRIu32" padding: "
151 "TensorFlow SAME padding can't be combined with explicit padding specification",
152 xnn_operator_type_to_string(operator_type),
153 input_padding_top, input_padding_left, input_padding_bottom, input_padding_right);
154 goto error;
155 }
156 }
157
158 status = xnn_status_out_of_memory;
159
160 max_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
161 if (max_pooling_op == NULL) {
162 xnn_log_error(
163 "failed to allocate %zu bytes for %s operator descriptor",
164 sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
165 goto error;
166 }
167
168 max_pooling_op->padding_top = input_padding_top;
169 max_pooling_op->padding_right = input_padding_right;
170 max_pooling_op->padding_bottom = input_padding_bottom;
171 max_pooling_op->padding_left = input_padding_left;
172
173 max_pooling_op->kernel_height = pooling_height;
174 max_pooling_op->kernel_width = pooling_width;
175 max_pooling_op->stride_height = stride_height;
176 max_pooling_op->stride_width = stride_width;
177 max_pooling_op->dilation_height = dilation_height;
178 max_pooling_op->dilation_width = dilation_width;
179 max_pooling_op->channels = channels;
180 max_pooling_op->input_pixel_stride = input_pixel_stride;
181 max_pooling_op->output_pixel_stride = output_pixel_stride;
182
183 memcpy(&max_pooling_op->params, params, params_size);
184 max_pooling_op->type = operator_type;
185 max_pooling_op->flags = flags;
186
187 max_pooling_op->state = xnn_run_state_invalid;
188
189 *max_pooling_op_out = max_pooling_op;
190 return xnn_status_success;
191
192 error:
193 xnn_delete_operator(max_pooling_op);
194 return status;
195 }
196
setup_max_pooling2d_nhwc(xnn_operator_t max_pooling_op,enum xnn_operator_type expected_operator_type,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,uint32_t log2_input_element_size,uint32_t log2_output_element_size,struct maxpool_parameters maxpool[restrict XNN_MIN_ELEMENTS (1)],const void * params,size_t params_size,size_t num_threads)197 static enum xnn_status setup_max_pooling2d_nhwc(
198 xnn_operator_t max_pooling_op,
199 enum xnn_operator_type expected_operator_type,
200 size_t batch_size,
201 size_t input_height,
202 size_t input_width,
203 const void* input,
204 void* output,
205 uint32_t log2_input_element_size,
206 uint32_t log2_output_element_size,
207 struct maxpool_parameters maxpool[restrict XNN_MIN_ELEMENTS(1)],
208 const void* params,
209 size_t params_size,
210 size_t num_threads)
211 {
212 if (max_pooling_op->type != expected_operator_type) {
213 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
214 xnn_operator_type_to_string(expected_operator_type),
215 xnn_operator_type_to_string(max_pooling_op->type));
216 return xnn_status_invalid_parameter;
217 }
218 max_pooling_op->state = xnn_run_state_invalid;
219
220 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
221 xnn_log_error(
222 "failed to setup %s operator: XNNPACK is not initialized",
223 xnn_operator_type_to_string(max_pooling_op->type));
224 return xnn_status_uninitialized;
225 }
226
227 if (input_width == 0 || input_height == 0) {
228 xnn_log_error(
229 "failed to setup %s operator with %zux%zu input: input dimensions must be non-zero",
230 xnn_operator_type_to_string(max_pooling_op->type), input_width, input_height);
231 return xnn_status_invalid_parameter;
232 }
233
234 if (batch_size == 0) {
235 max_pooling_op->state = xnn_run_state_skip;
236 return xnn_status_success;
237 }
238
239 max_pooling_op->input_height = input_height;
240 max_pooling_op->input_width = input_width;
241 max_pooling_op->input = input;
242
243 if (max_pooling_op->flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) {
244 max_pooling_op->output_height = compute_output_dimension_with_tf_same_padding(
245 input_height, max_pooling_op->stride_height);
246 max_pooling_op->output_width = compute_output_dimension_with_tf_same_padding(
247 input_width, max_pooling_op->stride_width);
248
249 const uint32_t effective_kernel_height = (max_pooling_op->kernel_height - 1) * max_pooling_op->dilation_height + 1;
250 const uint32_t effective_kernel_width = (max_pooling_op->kernel_width - 1) * max_pooling_op->dilation_width + 1;
251 const uint32_t total_padding_height =
252 doz((max_pooling_op->output_height - 1) * max_pooling_op->stride_height + effective_kernel_height, input_height);
253 const uint32_t total_padding_width =
254 doz((max_pooling_op->output_width - 1) * max_pooling_op->stride_width + effective_kernel_width, input_width);
255 max_pooling_op->padding_top = total_padding_height / 2;
256 max_pooling_op->padding_left = total_padding_width / 2;
257 max_pooling_op->padding_bottom = total_padding_height - max_pooling_op->padding_top;
258 max_pooling_op->padding_right = total_padding_width - max_pooling_op->padding_left;
259 } else {
260 max_pooling_op->output_height = xnn_compute_convolution_output_dimension(
261 max_pooling_op->padding_top + input_height + max_pooling_op->padding_bottom,
262 max_pooling_op->kernel_height,
263 max_pooling_op->dilation_height,
264 max_pooling_op->stride_height);
265 max_pooling_op->output_width = xnn_compute_convolution_output_dimension(
266 max_pooling_op->padding_left + input_width + max_pooling_op->padding_right,
267 max_pooling_op->kernel_width,
268 max_pooling_op->dilation_width,
269 max_pooling_op->stride_width);
270 }
271
272 const size_t pooling_height = max_pooling_op->kernel_height;
273 const size_t pooling_width = max_pooling_op->kernel_width;
274 const size_t pooling_size = pooling_height * pooling_width;
275 const size_t output_height = max_pooling_op->output_height;
276 const size_t output_width = max_pooling_op->output_width;
277 const uint32_t mr = maxpool->mr;
278
279 const size_t step_width =
280 max_pooling_op->dilation_width > 1 ? pooling_width : min(max_pooling_op->stride_width, pooling_width);
281 const size_t step_height = pooling_size + (output_width - 1) * step_width * pooling_height;
282
283 if (input_height != max_pooling_op->last_input_height ||
284 input_width != max_pooling_op->last_input_width)
285 {
286 // Micro-kernel may read up to (mr - 1) elements after the end of indirection buffer.
287 const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + output_height * step_height);
288 const void** indirection_buffer =
289 (const void**) xnn_reallocate_memory(max_pooling_op->indirection_buffer, indirection_buffer_size);
290 if (indirection_buffer == NULL) {
291 xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
292 return xnn_status_out_of_memory;
293 }
294 max_pooling_op->indirection_buffer = indirection_buffer;
295
296 xnn_indirection_init_maxpool2d(max_pooling_op, step_height, step_width, log2_input_element_size);
297
298 max_pooling_op->last_input = input;
299 max_pooling_op->last_input_height = input_height;
300 max_pooling_op->last_input_width = input_width;
301 }
302
303 const uint32_t qr = maxpool->qr;
304 const size_t channels = max_pooling_op->channels;
305
306 const size_t indirect_input_height_stride = step_height * sizeof(void*);
307 const size_t output_width_stride = max_pooling_op->output_pixel_stride << log2_output_element_size;
308 const size_t output_height_stride = output_width * output_width_stride;
309 const size_t multipass_adjustment = round_up(doz(pooling_size, mr), qr) + mr;
310
311 max_pooling_op->context.max_pooling = (struct max_pooling_context) {
312 .indirect_input = max_pooling_op->indirection_buffer,
313 .indirect_input_height_stride = indirect_input_height_stride,
314 .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) max_pooling_op->last_input),
315 .input_batch_stride = (input_height * input_width * max_pooling_op->input_pixel_stride) << log2_input_element_size,
316 .output = output,
317 .output_batch_stride = output_height * output_height_stride,
318 .output_height_stride = output_height_stride,
319 .output_width = output_width,
320 .pooling_size = pooling_size,
321 .channels = channels,
322 .input_increment = (pooling_height * step_width - multipass_adjustment) * sizeof(void*),
323 .output_increment = output_width_stride - (channels << log2_output_element_size),
324 .ukernel = maxpool->ukernel,
325 };
326 memcpy(&max_pooling_op->context.max_pooling.params, params, params_size);
327
328 max_pooling_op->compute.type = xnn_parallelization_type_2d;
329 max_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_max_pooling;
330 max_pooling_op->compute.range[0] = batch_size;
331 max_pooling_op->compute.range[1] = output_height;
332 max_pooling_op->state = xnn_run_state_ready;
333
334 return xnn_status_success;
335 }
336
xnn_create_max_pooling2d_nhwc_s8(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,int8_t output_min,int8_t output_max,uint32_t flags,xnn_operator_t * max_pooling_op_out)337 enum xnn_status xnn_create_max_pooling2d_nhwc_s8(
338 uint32_t input_padding_top,
339 uint32_t input_padding_right,
340 uint32_t input_padding_bottom,
341 uint32_t input_padding_left,
342 uint32_t pooling_height,
343 uint32_t pooling_width,
344 uint32_t stride_height,
345 uint32_t stride_width,
346 uint32_t dilation_height,
347 uint32_t dilation_width,
348 size_t channels,
349 size_t input_pixel_stride,
350 size_t output_pixel_stride,
351 int8_t output_min,
352 int8_t output_max,
353 uint32_t flags,
354 xnn_operator_t* max_pooling_op_out)
355 {
356 if (output_min >= output_max) {
357 xnn_log_error(
358 "failed to create %s operator with [%" PRId8 ", %" PRId8 "] output range: range min must be below range max",
359 xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_s8), output_min, output_max);
360 return xnn_status_invalid_parameter;
361 }
362
363 union xnn_s8_minmax_params params;
364 xnn_params.s8.maxpool.init.s8(¶ms, output_min, output_max);
365 return create_max_pooling2d_nhwc(
366 input_padding_top, input_padding_right, input_padding_bottom, input_padding_left,
367 pooling_height, pooling_width,
368 stride_height, stride_width,
369 dilation_height, dilation_width,
370 channels, input_pixel_stride, output_pixel_stride,
371 flags,
372 ¶ms, sizeof(params), XNN_INIT_FLAG_S8,
373 xnn_operator_type_max_pooling_nhwc_s8,
374 max_pooling_op_out);
375 }
376
xnn_create_max_pooling2d_nhwc_u8(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * max_pooling_op_out)377 enum xnn_status xnn_create_max_pooling2d_nhwc_u8(
378 uint32_t input_padding_top,
379 uint32_t input_padding_right,
380 uint32_t input_padding_bottom,
381 uint32_t input_padding_left,
382 uint32_t pooling_height,
383 uint32_t pooling_width,
384 uint32_t stride_height,
385 uint32_t stride_width,
386 uint32_t dilation_height,
387 uint32_t dilation_width,
388 size_t channels,
389 size_t input_pixel_stride,
390 size_t output_pixel_stride,
391 uint8_t output_min,
392 uint8_t output_max,
393 uint32_t flags,
394 xnn_operator_t* max_pooling_op_out)
395 {
396 if (output_min >= output_max) {
397 xnn_log_error(
398 "failed to create %s operator with [%" PRIu8 ", %" PRIu8 "] output range: range min must be below range max",
399 xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_u8), output_min, output_max);
400 return xnn_status_invalid_parameter;
401 }
402
403 union xnn_u8_minmax_params params;
404 xnn_params.u8.maxpool.init.u8(¶ms, output_min, output_max);
405 return create_max_pooling2d_nhwc(
406 input_padding_top, input_padding_right, input_padding_bottom, input_padding_left,
407 pooling_height, pooling_width,
408 stride_height, stride_width,
409 dilation_height, dilation_width,
410 channels, input_pixel_stride, output_pixel_stride,
411 flags,
412 ¶ms, sizeof(params), XNN_INIT_FLAG_U8,
413 xnn_operator_type_max_pooling_nhwc_u8,
414 max_pooling_op_out);
415 }
416
xnn_create_max_pooling2d_nhwc_f32(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * max_pooling_op_out)417 enum xnn_status xnn_create_max_pooling2d_nhwc_f32(
418 uint32_t input_padding_top,
419 uint32_t input_padding_right,
420 uint32_t input_padding_bottom,
421 uint32_t input_padding_left,
422 uint32_t pooling_height,
423 uint32_t pooling_width,
424 uint32_t stride_height,
425 uint32_t stride_width,
426 uint32_t dilation_height,
427 uint32_t dilation_width,
428 size_t channels,
429 size_t input_pixel_stride,
430 size_t output_pixel_stride,
431 float output_min,
432 float output_max,
433 uint32_t flags,
434 xnn_operator_t* max_pooling_op_out)
435 {
436 if (isnan(output_min)) {
437 xnn_log_error(
438 "failed to create %s with NaN output lower bound: lower bound must be non-NaN",
439 xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f32));
440 return xnn_status_invalid_parameter;
441 }
442
443 if (isnan(output_max)) {
444 xnn_log_error(
445 "failed to create %s with NaN output upper bound: upper bound must be non-NaN",
446 xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f32));
447 return xnn_status_invalid_parameter;
448 }
449
450 if (output_min >= output_max) {
451 xnn_log_error(
452 "failed to create %s with [%.7g, %.7g] output range: lower bound must be below upper bound",
453 xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f32), output_min, output_max);
454 return xnn_status_invalid_parameter;
455 }
456
457 union xnn_f32_minmax_params params;
458 xnn_params.f32.maxpool.init.f32(¶ms, output_min, output_max);
459 return create_max_pooling2d_nhwc(
460 input_padding_top, input_padding_right, input_padding_bottom, input_padding_left,
461 pooling_height, pooling_width,
462 stride_height, stride_width,
463 dilation_height, dilation_width,
464 channels, input_pixel_stride, output_pixel_stride,
465 flags,
466 ¶ms, sizeof(params), XNN_INIT_FLAG_F32,
467 xnn_operator_type_max_pooling_nhwc_f32,
468 max_pooling_op_out);
469 }
470
xnn_create_max_pooling2d_nhwc_f16(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * max_pooling_op_out)471 enum xnn_status xnn_create_max_pooling2d_nhwc_f16(
472 uint32_t input_padding_top,
473 uint32_t input_padding_right,
474 uint32_t input_padding_bottom,
475 uint32_t input_padding_left,
476 uint32_t pooling_height,
477 uint32_t pooling_width,
478 uint32_t stride_height,
479 uint32_t stride_width,
480 uint32_t dilation_height,
481 uint32_t dilation_width,
482 size_t channels,
483 size_t input_pixel_stride,
484 size_t output_pixel_stride,
485 float output_min,
486 float output_max,
487 uint32_t flags,
488 xnn_operator_t* max_pooling_op_out)
489 {
490 if (isnan(output_min)) {
491 xnn_log_error(
492 "failed to create %s with NaN output lower bound: lower bound must be non-NaN",
493 xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f16));
494 return xnn_status_invalid_parameter;
495 }
496
497 if (isnan(output_max)) {
498 xnn_log_error(
499 "failed to create %s with NaN output upper bound: upper bound must be non-NaN",
500 xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f16));
501 return xnn_status_invalid_parameter;
502 }
503
504 const uint16_t output_min_as_half = fp16_ieee_from_fp32_value(output_min);
505 const uint16_t output_max_as_half = fp16_ieee_from_fp32_value(output_max);
506 output_min = fp16_ieee_to_fp32_value(output_min_as_half);
507 output_max = fp16_ieee_to_fp32_value(output_max_as_half);
508 if (output_min >= output_max) {
509 xnn_log_error(
510 "failed to create %s operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
511 xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f16), output_min, output_max);
512 return xnn_status_invalid_parameter;
513 }
514
515 union xnn_f16_minmax_params params;
516 if (xnn_params.f16.maxpool.init.f16 != NULL) {
517 xnn_params.f16.maxpool.init.f16(¶ms, output_min_as_half, output_max_as_half);
518 }
519 return create_max_pooling2d_nhwc(
520 input_padding_top, input_padding_right, input_padding_bottom, input_padding_left,
521 pooling_height, pooling_width,
522 stride_height, stride_width,
523 dilation_height, dilation_width,
524 channels, input_pixel_stride, output_pixel_stride,
525 flags,
526 ¶ms, sizeof(params), XNN_INIT_FLAG_F16,
527 xnn_operator_type_max_pooling_nhwc_f16,
528 max_pooling_op_out);
529 }
530
xnn_setup_max_pooling2d_nhwc_s8(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const int8_t * input,int8_t * output,pthreadpool_t threadpool)531 enum xnn_status xnn_setup_max_pooling2d_nhwc_s8(
532 xnn_operator_t max_pooling_op,
533 size_t batch_size,
534 size_t input_height,
535 size_t input_width,
536 const int8_t* input,
537 int8_t* output,
538 pthreadpool_t threadpool)
539 {
540 return setup_max_pooling2d_nhwc(
541 max_pooling_op, xnn_operator_type_max_pooling_nhwc_s8,
542 batch_size, input_height, input_width,
543 input, output,
544 0 /* log2(sizeof(input element)) = log2(sizeof(int8_t)) */,
545 0 /* log2(sizeof(output element)) = log2(sizeof(int8_t)) */,
546 &xnn_params.s8.maxpool,
547 &max_pooling_op->params.s8_minmax, sizeof(max_pooling_op->params.s8_minmax),
548 pthreadpool_get_threads_count(threadpool));
549 }
550
xnn_setup_max_pooling2d_nhwc_u8(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)551 enum xnn_status xnn_setup_max_pooling2d_nhwc_u8(
552 xnn_operator_t max_pooling_op,
553 size_t batch_size,
554 size_t input_height,
555 size_t input_width,
556 const uint8_t* input,
557 uint8_t* output,
558 pthreadpool_t threadpool)
559 {
560 return setup_max_pooling2d_nhwc(
561 max_pooling_op, xnn_operator_type_max_pooling_nhwc_u8,
562 batch_size, input_height, input_width,
563 input, output,
564 0 /* log2(sizeof(input element)) = log2(sizeof(uint8_t)) */,
565 0 /* log2(sizeof(output element)) = log2(sizeof(uint8_t)) */,
566 &xnn_params.u8.maxpool,
567 &max_pooling_op->params.u8_minmax, sizeof(max_pooling_op->params.u8_minmax),
568 pthreadpool_get_threads_count(threadpool));
569 }
570
xnn_setup_max_pooling2d_nhwc_f16(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)571 enum xnn_status xnn_setup_max_pooling2d_nhwc_f16(
572 xnn_operator_t max_pooling_op,
573 size_t batch_size,
574 size_t input_height,
575 size_t input_width,
576 const void* input,
577 void* output,
578 pthreadpool_t threadpool)
579 {
580 return setup_max_pooling2d_nhwc(
581 max_pooling_op, xnn_operator_type_max_pooling_nhwc_f16,
582 batch_size, input_height, input_width,
583 input, output,
584 1 /* log2(sizeof(input element)) = log2(sizeof(uint16_t)) */,
585 1 /* log2(sizeof(output element)) = log2(sizeof(uint16_t)) */,
586 &xnn_params.f16.maxpool,
587 &max_pooling_op->params.f16_minmax, sizeof(max_pooling_op->params.f16_minmax),
588 pthreadpool_get_threads_count(threadpool));
589 }
590
xnn_setup_max_pooling2d_nhwc_f32(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const float * input,float * output,pthreadpool_t threadpool)591 enum xnn_status xnn_setup_max_pooling2d_nhwc_f32(
592 xnn_operator_t max_pooling_op,
593 size_t batch_size,
594 size_t input_height,
595 size_t input_width,
596 const float* input,
597 float* output,
598 pthreadpool_t threadpool)
599 {
600 return setup_max_pooling2d_nhwc(
601 max_pooling_op, xnn_operator_type_max_pooling_nhwc_f32,
602 batch_size, input_height, input_width,
603 input, output,
604 2 /* log2(sizeof(input element)) = log2(sizeof(float)) */,
605 2 /* log2(sizeof(output element)) = log2(sizeof(float)) */,
606 &xnn_params.f32.maxpool,
607 &max_pooling_op->params.f32_minmax, sizeof(max_pooling_op->params.f32_minmax),
608 pthreadpool_get_threads_count(threadpool));
609 }
610
611