xref: /aosp_15_r20/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/average-pooling.c (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #include <assert.h>
10 #include <math.h>
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16 
17 #include <pytorch_qnnpack.h>
18 #include <qnnpack/common.h>
19 #include <qnnpack/indirection.h>
20 #include <qnnpack/log.h>
21 #include <qnnpack/math.h>
22 #include <qnnpack/operator.h>
23 #include <qnnpack/params.h>
24 
compute_output_dimension(size_t padded_input_dimension,size_t pooling_dimension,size_t stride_dimension)25 static inline size_t compute_output_dimension(
26     size_t padded_input_dimension,
27     size_t pooling_dimension,
28     size_t stride_dimension) {
29   return (padded_input_dimension - pooling_dimension) / stride_dimension + 1;
30 }
31 
pytorch_qnnp_create_average_pooling2d_nhwc_q8(uint32_t input_padding_height,uint32_t input_padding_width,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,size_t channels,uint8_t input_zero_point,float input_scale,uint8_t output_zero_point,float output_scale,uint8_t output_min,uint8_t output_max,uint32_t flags,pytorch_qnnp_operator_t * average_pooling_out)32 enum pytorch_qnnp_status pytorch_qnnp_create_average_pooling2d_nhwc_q8(
33     uint32_t input_padding_height,
34     uint32_t input_padding_width,
35     uint32_t pooling_height,
36     uint32_t pooling_width,
37     uint32_t stride_height,
38     uint32_t stride_width,
39     size_t channels,
40     uint8_t input_zero_point,
41     float input_scale,
42     uint8_t output_zero_point,
43     float output_scale,
44     uint8_t output_min,
45     uint8_t output_max,
46     uint32_t flags,
47     pytorch_qnnp_operator_t* average_pooling_out) {
48   pytorch_qnnp_operator_t average_pooling = NULL;
49   enum pytorch_qnnp_status status = pytorch_qnnp_status_uninitialized;
50 
51   if (!pytorch_qnnp_params.initialized) {
52     pytorch_qnnp_log_error(
53         "pytorch_qnnp_create_average_pooling2d_nhwc_q8 failed because QNNPACK is not properly initialized");
54     goto error;
55   }
56 
57   status = pytorch_qnnp_status_invalid_parameter;
58 
59   const uint32_t pooling_size = pooling_height * pooling_width;
60   if (pooling_size == 0) {
61     pytorch_qnnp_log_error(
62         "failed to create average pooling with %" PRIu32 "x%" PRIu32
63         " pooling size: "
64         "pooling size dimensions must be non-zero",
65         pooling_width,
66         pooling_height);
67     goto error;
68   }
69 
70   if (pooling_size == 1) {
71     pytorch_qnnp_log_error(
72         "failed to create average pooling with 1 pooling element: "
73         "1x1 pooling is meaningless");
74     goto error;
75   }
76 
77   if (stride_height == 0 || stride_width == 0) {
78     pytorch_qnnp_log_error(
79         "failed to create average pooling with %" PRIu32 "x%" PRIu32
80         " stride: "
81         "stride dimensions must be non-zero",
82         stride_width,
83         stride_height);
84     goto error;
85   }
86 
87   if (channels == 0) {
88     pytorch_qnnp_log_error(
89         "failed to create average pooling with %zu channels: "
90         "number of channels must be non-zero",
91         channels);
92     goto error;
93   }
94 
95   if (input_scale <= 0.0f || !isnormal(input_scale)) {
96     pytorch_qnnp_log_error(
97         "failed to create average pooling with %.7g input scale: "
98         "scale must be finite and positive",
99         input_scale);
100     goto error;
101   }
102 
103   if (output_scale <= 0.0f || !isnormal(output_scale)) {
104     pytorch_qnnp_log_error(
105         "failed to create average pooling with %.7g output scale: "
106         "scale must be finite and positive",
107         output_scale);
108     goto error;
109   }
110 
111   status = pytorch_qnnp_status_unsupported_parameter;
112 
113   const float input_output_scale = input_scale / output_scale;
114   if (input_output_scale < 0x1.0p-8f || input_output_scale >= 0x1.0p+8f) {
115     pytorch_qnnp_log_error(
116         "failed to create average pooling with %.7g input scale and %.7g output scale: "
117         "input-to-output scale ratio (%.7f) must be in [2**-8, 2**8) range",
118         input_scale,
119         output_scale,
120         input_output_scale);
121     goto error;
122   }
123 
124   if (pooling_size >= 16777216) {
125     pytorch_qnnp_log_error(
126         "failed to create average pooling with %" PRIu32 " (%" PRIu32
127         "x%" PRIu32
128         ") pooling elements: "
129         "the number of elements in the pooling area must be below 2**24",
130         pooling_size,
131         pooling_width,
132         pooling_height);
133     goto error;
134   }
135 
136   status = pytorch_qnnp_status_out_of_memory;
137 
138   average_pooling = calloc(1, sizeof(struct pytorch_qnnp_operator));
139   if (average_pooling == NULL) {
140     pytorch_qnnp_log_error(
141         "failed to allocate %zu bytes for pytorch_qnnp_operator structure",
142         sizeof(struct pytorch_qnnp_operator));
143     goto error;
144   }
145 
146   const bool any_padding = (input_padding_width | input_padding_height) != 0;
147   const uint32_t kr = pytorch_qnnp_params.q8avgpool.kr;
148   const uint32_t mr = pytorch_qnnp_params.q8avgpool.mr;
149   const uint32_t qr = pytorch_qnnp_params.q8avgpool.qr;
150   if (any_padding || (channels >= kr || (pooling_size - mr) % qr != 0)) {
151     void* zero_buffer = malloc(channels);
152     if (zero_buffer == NULL) {
153       pytorch_qnnp_log_error(
154           "failed to allocate %zu bytes for zero padding", channels);
155       goto error;
156     }
157     memset(zero_buffer, input_zero_point, channels);
158     average_pooling->zero_buffer = zero_buffer;
159     average_pooling->zero_pointer = zero_buffer;
160   }
161 
162   average_pooling->input_padding_depth = 0;
163   average_pooling->input_padding_height = input_padding_height;
164   average_pooling->input_padding_width = input_padding_width;
165   average_pooling->kernel_depth = 1;
166   average_pooling->kernel_height = pooling_height;
167   average_pooling->kernel_width = pooling_width;
168   average_pooling->stride_depth = 1;
169   average_pooling->stride_height = stride_height;
170   average_pooling->stride_width = stride_width;
171   average_pooling->dilation_depth = 1;
172   average_pooling->dilation_height = 1;
173   average_pooling->dilation_width = 1;
174   average_pooling->channels = channels;
175 
176   size_t nrows = pooling_height * pooling_width;
177   if (channels >= pytorch_qnnp_params.q8avgpool.kr) {
178     if (nrows <= mr) {
179       nrows = mr;
180     } else {
181       nrows = round_up(nrows - mr, qr) + mr;
182     }
183   }
184 
185   average_pooling->avgpool_quantization_params =
186       pytorch_qnnp_compute_avgpool_quantization_params(
187           (int32_t) - ((uint32_t)input_zero_point * (uint32_t)nrows),
188           input_scale / (output_scale * (float)pooling_size),
189           output_zero_point,
190           output_min,
191           output_max);
192 
193   average_pooling->ukernel_type = pytorch_qnnp_ukernel_type_average_pooling;
194   average_pooling->format = pytorch_qnnp_format_quint8;
195 
196   *average_pooling_out = average_pooling;
197   return pytorch_qnnp_status_success;
198 
199 error:
200   pytorch_qnnp_delete_operator(average_pooling);
201   return status;
202 }
203 
pytorch_qnnp_setup_average_pooling2d_nhwc_q8(pytorch_qnnp_operator_t average_pooling,size_t batch_size,size_t input_height,size_t input_width,const uint8_t * input,size_t input_pixel_stride,uint8_t * output,size_t output_pixel_stride,pthreadpool_t threadpool)204 enum pytorch_qnnp_status pytorch_qnnp_setup_average_pooling2d_nhwc_q8(
205     pytorch_qnnp_operator_t average_pooling,
206     size_t batch_size,
207     size_t input_height,
208     size_t input_width,
209     const uint8_t* input,
210     size_t input_pixel_stride,
211     uint8_t* output,
212     size_t output_pixel_stride,
213     pthreadpool_t threadpool) {
214   if (!pytorch_qnnp_params.initialized) {
215     pytorch_qnnp_log_error(
216         "pytorch_qnnp_setup_average_pooling2d_nhwc_q8 failed because QNNPACK is not properly initialized");
217     return pytorch_qnnp_status_uninitialized;
218   }
219 
220   if (batch_size == 0) {
221     average_pooling->batch_size = 0;
222     return pytorch_qnnp_status_success;
223   }
224 
225   if (input_width == 0 || input_height == 0) {
226     pytorch_qnnp_log_error(
227         "failed to setup average pooling with %zux%zu input: input dimensions must be non-zero",
228         input_width,
229         input_height);
230     return pytorch_qnnp_status_invalid_parameter;
231   }
232 
233   average_pooling->batch_size = batch_size;
234   average_pooling->input_depth = 1;
235   average_pooling->input_height = input_height;
236   average_pooling->input_width = input_width;
237   average_pooling->input = input;
238   average_pooling->input_pixel_stride = input_pixel_stride;
239 
240   average_pooling->output_height = compute_output_dimension(
241       input_height + average_pooling->input_padding_height * 2,
242       average_pooling->kernel_height,
243       average_pooling->stride_height);
244   average_pooling->output_width = compute_output_dimension(
245       input_width + average_pooling->input_padding_width * 2,
246       average_pooling->kernel_width,
247       average_pooling->stride_width);
248   average_pooling->output_depth = 1;
249   average_pooling->output = output;
250   average_pooling->output_pixel_stride = output_pixel_stride;
251 
252   size_t valid_batch_size = 0;
253   if (input == average_pooling->last_input &&
254       input_height == average_pooling->last_input_height &&
255       input_width == average_pooling->last_input_width) {
256     valid_batch_size = average_pooling->valid_batch_size;
257     if (batch_size <= valid_batch_size) {
258       return pytorch_qnnp_status_success;
259     }
260   }
261 
262   /* Micro-kernel may read up to (mr - 1) elements after the end of indirection
263    * buffer */
264   const uint32_t mr = pytorch_qnnp_params.q8avgpool.mr;
265 
266   pytorch_qnnp_indirection_set_step_dimensions(average_pooling);
267   const size_t indirection_buffer_size = sizeof(void*) *
268       ((mr - 1) +
269        batch_size * average_pooling->output_height *
270            average_pooling->step_height);
271 
272   const void** indirection_buffer = (const void**)realloc(
273       average_pooling->indirection_buffer, indirection_buffer_size);
274   if (indirection_buffer == NULL) {
275     pytorch_qnnp_log_error(
276         "failed to allocate %zu bytes for indirection buffer",
277         indirection_buffer_size);
278     return pytorch_qnnp_status_out_of_memory;
279   }
280   average_pooling->indirection_buffer = indirection_buffer;
281 
282   pytorch_qnnp_indirection_init_dwconv(average_pooling, valid_batch_size);
283 
284   average_pooling->last_input = input;
285   average_pooling->last_input_height = input_height;
286   average_pooling->last_input_width = input_width;
287   average_pooling->valid_batch_size = max(valid_batch_size, batch_size);
288 
289   return pytorch_qnnp_status_success;
290 }
291