1 /*
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9 #include <assert.h>
10 #include <math.h>
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16
17 #include <pytorch_qnnpack.h>
18 #include <qnnpack/common.h>
19 #include <qnnpack/indirection.h>
20 #include <qnnpack/log.h>
21 #include <qnnpack/math.h>
22 #include <qnnpack/operator.h>
23 #include <qnnpack/params.h>
24
compute_output_dimension(size_t padded_input_dimension,size_t pooling_dimension,size_t stride_dimension)25 static inline size_t compute_output_dimension(
26 size_t padded_input_dimension,
27 size_t pooling_dimension,
28 size_t stride_dimension) {
29 return (padded_input_dimension - pooling_dimension) / stride_dimension + 1;
30 }
31
pytorch_qnnp_create_average_pooling2d_nhwc_q8(uint32_t input_padding_height,uint32_t input_padding_width,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,size_t channels,uint8_t input_zero_point,float input_scale,uint8_t output_zero_point,float output_scale,uint8_t output_min,uint8_t output_max,uint32_t flags,pytorch_qnnp_operator_t * average_pooling_out)32 enum pytorch_qnnp_status pytorch_qnnp_create_average_pooling2d_nhwc_q8(
33 uint32_t input_padding_height,
34 uint32_t input_padding_width,
35 uint32_t pooling_height,
36 uint32_t pooling_width,
37 uint32_t stride_height,
38 uint32_t stride_width,
39 size_t channels,
40 uint8_t input_zero_point,
41 float input_scale,
42 uint8_t output_zero_point,
43 float output_scale,
44 uint8_t output_min,
45 uint8_t output_max,
46 uint32_t flags,
47 pytorch_qnnp_operator_t* average_pooling_out) {
48 pytorch_qnnp_operator_t average_pooling = NULL;
49 enum pytorch_qnnp_status status = pytorch_qnnp_status_uninitialized;
50
51 if (!pytorch_qnnp_params.initialized) {
52 pytorch_qnnp_log_error(
53 "pytorch_qnnp_create_average_pooling2d_nhwc_q8 failed because QNNPACK is not properly initialized");
54 goto error;
55 }
56
57 status = pytorch_qnnp_status_invalid_parameter;
58
59 const uint32_t pooling_size = pooling_height * pooling_width;
60 if (pooling_size == 0) {
61 pytorch_qnnp_log_error(
62 "failed to create average pooling with %" PRIu32 "x%" PRIu32
63 " pooling size: "
64 "pooling size dimensions must be non-zero",
65 pooling_width,
66 pooling_height);
67 goto error;
68 }
69
70 if (pooling_size == 1) {
71 pytorch_qnnp_log_error(
72 "failed to create average pooling with 1 pooling element: "
73 "1x1 pooling is meaningless");
74 goto error;
75 }
76
77 if (stride_height == 0 || stride_width == 0) {
78 pytorch_qnnp_log_error(
79 "failed to create average pooling with %" PRIu32 "x%" PRIu32
80 " stride: "
81 "stride dimensions must be non-zero",
82 stride_width,
83 stride_height);
84 goto error;
85 }
86
87 if (channels == 0) {
88 pytorch_qnnp_log_error(
89 "failed to create average pooling with %zu channels: "
90 "number of channels must be non-zero",
91 channels);
92 goto error;
93 }
94
95 if (input_scale <= 0.0f || !isnormal(input_scale)) {
96 pytorch_qnnp_log_error(
97 "failed to create average pooling with %.7g input scale: "
98 "scale must be finite and positive",
99 input_scale);
100 goto error;
101 }
102
103 if (output_scale <= 0.0f || !isnormal(output_scale)) {
104 pytorch_qnnp_log_error(
105 "failed to create average pooling with %.7g output scale: "
106 "scale must be finite and positive",
107 output_scale);
108 goto error;
109 }
110
111 status = pytorch_qnnp_status_unsupported_parameter;
112
113 const float input_output_scale = input_scale / output_scale;
114 if (input_output_scale < 0x1.0p-8f || input_output_scale >= 0x1.0p+8f) {
115 pytorch_qnnp_log_error(
116 "failed to create average pooling with %.7g input scale and %.7g output scale: "
117 "input-to-output scale ratio (%.7f) must be in [2**-8, 2**8) range",
118 input_scale,
119 output_scale,
120 input_output_scale);
121 goto error;
122 }
123
124 if (pooling_size >= 16777216) {
125 pytorch_qnnp_log_error(
126 "failed to create average pooling with %" PRIu32 " (%" PRIu32
127 "x%" PRIu32
128 ") pooling elements: "
129 "the number of elements in the pooling area must be below 2**24",
130 pooling_size,
131 pooling_width,
132 pooling_height);
133 goto error;
134 }
135
136 status = pytorch_qnnp_status_out_of_memory;
137
138 average_pooling = calloc(1, sizeof(struct pytorch_qnnp_operator));
139 if (average_pooling == NULL) {
140 pytorch_qnnp_log_error(
141 "failed to allocate %zu bytes for pytorch_qnnp_operator structure",
142 sizeof(struct pytorch_qnnp_operator));
143 goto error;
144 }
145
146 const bool any_padding = (input_padding_width | input_padding_height) != 0;
147 const uint32_t kr = pytorch_qnnp_params.q8avgpool.kr;
148 const uint32_t mr = pytorch_qnnp_params.q8avgpool.mr;
149 const uint32_t qr = pytorch_qnnp_params.q8avgpool.qr;
150 if (any_padding || (channels >= kr || (pooling_size - mr) % qr != 0)) {
151 void* zero_buffer = malloc(channels);
152 if (zero_buffer == NULL) {
153 pytorch_qnnp_log_error(
154 "failed to allocate %zu bytes for zero padding", channels);
155 goto error;
156 }
157 memset(zero_buffer, input_zero_point, channels);
158 average_pooling->zero_buffer = zero_buffer;
159 average_pooling->zero_pointer = zero_buffer;
160 }
161
162 average_pooling->input_padding_depth = 0;
163 average_pooling->input_padding_height = input_padding_height;
164 average_pooling->input_padding_width = input_padding_width;
165 average_pooling->kernel_depth = 1;
166 average_pooling->kernel_height = pooling_height;
167 average_pooling->kernel_width = pooling_width;
168 average_pooling->stride_depth = 1;
169 average_pooling->stride_height = stride_height;
170 average_pooling->stride_width = stride_width;
171 average_pooling->dilation_depth = 1;
172 average_pooling->dilation_height = 1;
173 average_pooling->dilation_width = 1;
174 average_pooling->channels = channels;
175
176 size_t nrows = pooling_height * pooling_width;
177 if (channels >= pytorch_qnnp_params.q8avgpool.kr) {
178 if (nrows <= mr) {
179 nrows = mr;
180 } else {
181 nrows = round_up(nrows - mr, qr) + mr;
182 }
183 }
184
185 average_pooling->avgpool_quantization_params =
186 pytorch_qnnp_compute_avgpool_quantization_params(
187 (int32_t) - ((uint32_t)input_zero_point * (uint32_t)nrows),
188 input_scale / (output_scale * (float)pooling_size),
189 output_zero_point,
190 output_min,
191 output_max);
192
193 average_pooling->ukernel_type = pytorch_qnnp_ukernel_type_average_pooling;
194 average_pooling->format = pytorch_qnnp_format_quint8;
195
196 *average_pooling_out = average_pooling;
197 return pytorch_qnnp_status_success;
198
199 error:
200 pytorch_qnnp_delete_operator(average_pooling);
201 return status;
202 }
203
pytorch_qnnp_setup_average_pooling2d_nhwc_q8(pytorch_qnnp_operator_t average_pooling,size_t batch_size,size_t input_height,size_t input_width,const uint8_t * input,size_t input_pixel_stride,uint8_t * output,size_t output_pixel_stride,pthreadpool_t threadpool)204 enum pytorch_qnnp_status pytorch_qnnp_setup_average_pooling2d_nhwc_q8(
205 pytorch_qnnp_operator_t average_pooling,
206 size_t batch_size,
207 size_t input_height,
208 size_t input_width,
209 const uint8_t* input,
210 size_t input_pixel_stride,
211 uint8_t* output,
212 size_t output_pixel_stride,
213 pthreadpool_t threadpool) {
214 if (!pytorch_qnnp_params.initialized) {
215 pytorch_qnnp_log_error(
216 "pytorch_qnnp_setup_average_pooling2d_nhwc_q8 failed because QNNPACK is not properly initialized");
217 return pytorch_qnnp_status_uninitialized;
218 }
219
220 if (batch_size == 0) {
221 average_pooling->batch_size = 0;
222 return pytorch_qnnp_status_success;
223 }
224
225 if (input_width == 0 || input_height == 0) {
226 pytorch_qnnp_log_error(
227 "failed to setup average pooling with %zux%zu input: input dimensions must be non-zero",
228 input_width,
229 input_height);
230 return pytorch_qnnp_status_invalid_parameter;
231 }
232
233 average_pooling->batch_size = batch_size;
234 average_pooling->input_depth = 1;
235 average_pooling->input_height = input_height;
236 average_pooling->input_width = input_width;
237 average_pooling->input = input;
238 average_pooling->input_pixel_stride = input_pixel_stride;
239
240 average_pooling->output_height = compute_output_dimension(
241 input_height + average_pooling->input_padding_height * 2,
242 average_pooling->kernel_height,
243 average_pooling->stride_height);
244 average_pooling->output_width = compute_output_dimension(
245 input_width + average_pooling->input_padding_width * 2,
246 average_pooling->kernel_width,
247 average_pooling->stride_width);
248 average_pooling->output_depth = 1;
249 average_pooling->output = output;
250 average_pooling->output_pixel_stride = output_pixel_stride;
251
252 size_t valid_batch_size = 0;
253 if (input == average_pooling->last_input &&
254 input_height == average_pooling->last_input_height &&
255 input_width == average_pooling->last_input_width) {
256 valid_batch_size = average_pooling->valid_batch_size;
257 if (batch_size <= valid_batch_size) {
258 return pytorch_qnnp_status_success;
259 }
260 }
261
262 /* Micro-kernel may read up to (mr - 1) elements after the end of indirection
263 * buffer */
264 const uint32_t mr = pytorch_qnnp_params.q8avgpool.mr;
265
266 pytorch_qnnp_indirection_set_step_dimensions(average_pooling);
267 const size_t indirection_buffer_size = sizeof(void*) *
268 ((mr - 1) +
269 batch_size * average_pooling->output_height *
270 average_pooling->step_height);
271
272 const void** indirection_buffer = (const void**)realloc(
273 average_pooling->indirection_buffer, indirection_buffer_size);
274 if (indirection_buffer == NULL) {
275 pytorch_qnnp_log_error(
276 "failed to allocate %zu bytes for indirection buffer",
277 indirection_buffer_size);
278 return pytorch_qnnp_status_out_of_memory;
279 }
280 average_pooling->indirection_buffer = indirection_buffer;
281
282 pytorch_qnnp_indirection_init_dwconv(average_pooling, valid_batch_size);
283
284 average_pooling->last_input = input;
285 average_pooling->last_input_height = input_height;
286 average_pooling->last_input_width = input_width;
287 average_pooling->valid_batch_size = max(valid_batch_size, batch_size);
288
289 return pytorch_qnnp_status_success;
290 }
291