xref: /aosp_15_r20/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/max-pooling.c (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #include <assert.h>
10 #include <math.h>
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16 
17 #include <pytorch_qnnpack.h>
18 #include <qnnpack/common.h>
19 #include <qnnpack/indirection.h>
20 #include <qnnpack/log.h>
21 #include <qnnpack/math.h>
22 #include <qnnpack/operator.h>
23 #include <qnnpack/params.h>
24 
compute_output_dimension(size_t padded_input_dimension,size_t kernel_dimension,size_t dilation_dimension,size_t stride_dimension)25 static inline size_t compute_output_dimension(
26     size_t padded_input_dimension,
27     size_t kernel_dimension,
28     size_t dilation_dimension,
29     size_t stride_dimension) {
30   const size_t effective_kernel_dimension =
31       (kernel_dimension - 1) * dilation_dimension + 1;
32   return (padded_input_dimension - effective_kernel_dimension) /
33       stride_dimension +
34       1;
35 }
36 
pytorch_qnnp_create_max_pooling2d_nhwc_u8(uint32_t input_padding_height,uint32_t input_padding_width,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,uint8_t output_min,uint8_t output_max,uint32_t flags,pytorch_qnnp_operator_t * max_pooling_out)37 enum pytorch_qnnp_status pytorch_qnnp_create_max_pooling2d_nhwc_u8(
38     uint32_t input_padding_height,
39     uint32_t input_padding_width,
40     uint32_t pooling_height,
41     uint32_t pooling_width,
42     uint32_t stride_height,
43     uint32_t stride_width,
44     uint32_t dilation_height,
45     uint32_t dilation_width,
46     size_t channels,
47     uint8_t output_min,
48     uint8_t output_max,
49     uint32_t flags,
50     pytorch_qnnp_operator_t* max_pooling_out) {
51   pytorch_qnnp_operator_t max_pooling = NULL;
52   enum pytorch_qnnp_status status = pytorch_qnnp_status_uninitialized;
53 
54   if (!pytorch_qnnp_params.initialized) {
55     pytorch_qnnp_log_error(
56         "pytorch_qnnp_create_max_pooling2d_nhwc_u8 failed because QNNPACK is not properly initialized");
57     goto error;
58   }
59 
60   status = pytorch_qnnp_status_invalid_parameter;
61 
62   const uint32_t pooling_size = pooling_height * pooling_width;
63   if (pooling_size == 0) {
64     pytorch_qnnp_log_error(
65         "failed to create max pooling with %" PRIu32 "x%" PRIu32
66         " pooling size: "
67         "pooling size dimensions must be non-zero",
68         pooling_width,
69         pooling_height);
70     goto error;
71   }
72 
73   if (pooling_size == 1) {
74     pytorch_qnnp_log_error(
75         "failed to create max pooling with 1 pooling element: "
76         "1x1 pooling is meaningless");
77     goto error;
78   }
79 
80   if (stride_height == 0 || stride_width == 0) {
81     pytorch_qnnp_log_error(
82         "failed to create max pooling with %" PRIu32 "x%" PRIu32
83         " stride: "
84         "stride dimensions must be non-zero",
85         stride_width,
86         stride_height);
87     goto error;
88   }
89 
90   if (dilation_height == 0 || dilation_width == 0) {
91     pytorch_qnnp_log_error(
92         "failed to create max pooling with %" PRIu32 "x%" PRIu32
93         " dilation: "
94         "dilation dimensions must be non-zero",
95         dilation_width,
96         dilation_height);
97     goto error;
98   }
99 
100   if (channels == 0) {
101     pytorch_qnnp_log_error(
102         "failed to create max pooling with %zu channels: "
103         "number of channels must be non-zero",
104         channels);
105     goto error;
106   }
107 
108   status = pytorch_qnnp_status_out_of_memory;
109 
110   max_pooling = calloc(1, sizeof(struct pytorch_qnnp_operator));
111   if (max_pooling == NULL) {
112     pytorch_qnnp_log_error(
113         "failed to allocate %zu bytes for pytorch_qnnp_operator structure",
114         sizeof(struct pytorch_qnnp_operator));
115     goto error;
116   }
117 
118   max_pooling->input_padding_height = input_padding_height;
119   max_pooling->input_padding_width = input_padding_width;
120 
121   max_pooling->kernel_height = pooling_height;
122   max_pooling->kernel_width = pooling_width;
123   max_pooling->stride_height = stride_height;
124   max_pooling->stride_width = stride_width;
125   max_pooling->dilation_height = dilation_height;
126   max_pooling->dilation_width = dilation_width;
127   max_pooling->channels = channels;
128 
129   max_pooling->u8_clamping_params =
130       pytorch_qnnp_compute_u8_clamping_params(output_min, output_max);
131 
132   max_pooling->ukernel_type = pytorch_qnnp_ukernel_type_max_pooling;
133   max_pooling->format = pytorch_qnnp_format_quint8;
134 
135   *max_pooling_out = max_pooling;
136   return pytorch_qnnp_status_success;
137 
138 error:
139   pytorch_qnnp_delete_operator(max_pooling);
140   return status;
141 }
142 
pytorch_qnnp_setup_max_pooling2d_nhwc_u8(pytorch_qnnp_operator_t max_pooling,size_t batch_size,size_t input_height,size_t input_width,const uint8_t * input,size_t input_pixel_stride,uint8_t * output,size_t output_pixel_stride,pthreadpool_t threadpool)143 enum pytorch_qnnp_status pytorch_qnnp_setup_max_pooling2d_nhwc_u8(
144     pytorch_qnnp_operator_t max_pooling,
145     size_t batch_size,
146     size_t input_height,
147     size_t input_width,
148     const uint8_t* input,
149     size_t input_pixel_stride,
150     uint8_t* output,
151     size_t output_pixel_stride,
152     pthreadpool_t threadpool) {
153   if (!pytorch_qnnp_params.initialized) {
154     pytorch_qnnp_log_error(
155         "pytorch_qnnp_setup_max_pooling2d_nhwc_u8 failed because QNNPACK is not properly initialized");
156     return pytorch_qnnp_status_uninitialized;
157   }
158 
159   if (batch_size == 0) {
160     max_pooling->batch_size = 0;
161     return pytorch_qnnp_status_success;
162   }
163 
164   if (input_width == 0 || input_height == 0) {
165     pytorch_qnnp_log_error(
166         "failed to setup max pooling with %zux%zu input: input dimensions must be non-zero",
167         input_width,
168         input_height);
169     return pytorch_qnnp_status_invalid_parameter;
170   }
171 
172   max_pooling->batch_size = batch_size;
173   max_pooling->input_height = input_height;
174   max_pooling->input_width = input_width;
175   max_pooling->input = input;
176   max_pooling->input_pixel_stride = input_pixel_stride;
177 
178   max_pooling->output_height = compute_output_dimension(
179       input_height + max_pooling->input_padding_height * 2,
180       max_pooling->kernel_height,
181       max_pooling->dilation_height,
182       max_pooling->stride_height);
183   max_pooling->output_width = compute_output_dimension(
184       input_width + max_pooling->input_padding_width * 2,
185       max_pooling->kernel_width,
186       max_pooling->dilation_width,
187       max_pooling->stride_width);
188   max_pooling->output = output;
189   max_pooling->output_pixel_stride = output_pixel_stride;
190 
191   size_t valid_batch_size = 0;
192   if (input == max_pooling->last_input &&
193       input_height == max_pooling->last_input_height &&
194       input_width == max_pooling->last_input_width) {
195     valid_batch_size = max_pooling->valid_batch_size;
196     if (batch_size <= valid_batch_size) {
197       return pytorch_qnnp_status_success;
198     }
199   }
200 
201   /* Micro-kernel may read up to (mr - 1) elements after the end of indirection
202    * buffer */
203   const uint32_t mr = pytorch_qnnp_params.u8maxpool.mr;
204 
205   pytorch_qnnp_indirection_set_step_dimensions(max_pooling);
206   const size_t indirection_buffer_size = sizeof(void*) *
207       ((mr - 1) +
208        batch_size * max_pooling->output_height * max_pooling->step_height);
209 
210   const void** indirection_buffer = (const void**)realloc(
211       max_pooling->indirection_buffer, indirection_buffer_size);
212   if (indirection_buffer == NULL) {
213     pytorch_qnnp_log_error(
214         "failed to allocate %zu bytes for indirection buffer",
215         indirection_buffer_size);
216     return pytorch_qnnp_status_out_of_memory;
217   }
218   max_pooling->indirection_buffer = indirection_buffer;
219 
220   pytorch_qnnp_indirection_init_maxpool2d(max_pooling, valid_batch_size);
221 
222   max_pooling->last_input = input;
223   max_pooling->last_input_height = input_height;
224   max_pooling->last_input_width = input_width;
225   max_pooling->valid_batch_size = max(valid_batch_size, batch_size);
226 
227   return pytorch_qnnp_status_success;
228 }
229