1 /*
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9 #include <assert.h>
10 #include <math.h>
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16
17 #include <pytorch_qnnpack.h>
18 #include <qnnpack/common.h>
19 #include <qnnpack/indirection.h>
20 #include <qnnpack/log.h>
21 #include <qnnpack/math.h>
22 #include <qnnpack/operator.h>
23 #include <qnnpack/params.h>
24
compute_output_dimension(size_t padded_input_dimension,size_t kernel_dimension,size_t dilation_dimension,size_t stride_dimension)25 static inline size_t compute_output_dimension(
26 size_t padded_input_dimension,
27 size_t kernel_dimension,
28 size_t dilation_dimension,
29 size_t stride_dimension) {
30 const size_t effective_kernel_dimension =
31 (kernel_dimension - 1) * dilation_dimension + 1;
32 return (padded_input_dimension - effective_kernel_dimension) /
33 stride_dimension +
34 1;
35 }
36
pytorch_qnnp_create_max_pooling2d_nhwc_u8(uint32_t input_padding_height,uint32_t input_padding_width,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,uint8_t output_min,uint8_t output_max,uint32_t flags,pytorch_qnnp_operator_t * max_pooling_out)37 enum pytorch_qnnp_status pytorch_qnnp_create_max_pooling2d_nhwc_u8(
38 uint32_t input_padding_height,
39 uint32_t input_padding_width,
40 uint32_t pooling_height,
41 uint32_t pooling_width,
42 uint32_t stride_height,
43 uint32_t stride_width,
44 uint32_t dilation_height,
45 uint32_t dilation_width,
46 size_t channels,
47 uint8_t output_min,
48 uint8_t output_max,
49 uint32_t flags,
50 pytorch_qnnp_operator_t* max_pooling_out) {
51 pytorch_qnnp_operator_t max_pooling = NULL;
52 enum pytorch_qnnp_status status = pytorch_qnnp_status_uninitialized;
53
54 if (!pytorch_qnnp_params.initialized) {
55 pytorch_qnnp_log_error(
56 "pytorch_qnnp_create_max_pooling2d_nhwc_u8 failed because QNNPACK is not properly initialized");
57 goto error;
58 }
59
60 status = pytorch_qnnp_status_invalid_parameter;
61
62 const uint32_t pooling_size = pooling_height * pooling_width;
63 if (pooling_size == 0) {
64 pytorch_qnnp_log_error(
65 "failed to create max pooling with %" PRIu32 "x%" PRIu32
66 " pooling size: "
67 "pooling size dimensions must be non-zero",
68 pooling_width,
69 pooling_height);
70 goto error;
71 }
72
73 if (pooling_size == 1) {
74 pytorch_qnnp_log_error(
75 "failed to create max pooling with 1 pooling element: "
76 "1x1 pooling is meaningless");
77 goto error;
78 }
79
80 if (stride_height == 0 || stride_width == 0) {
81 pytorch_qnnp_log_error(
82 "failed to create max pooling with %" PRIu32 "x%" PRIu32
83 " stride: "
84 "stride dimensions must be non-zero",
85 stride_width,
86 stride_height);
87 goto error;
88 }
89
90 if (dilation_height == 0 || dilation_width == 0) {
91 pytorch_qnnp_log_error(
92 "failed to create max pooling with %" PRIu32 "x%" PRIu32
93 " dilation: "
94 "dilation dimensions must be non-zero",
95 dilation_width,
96 dilation_height);
97 goto error;
98 }
99
100 if (channels == 0) {
101 pytorch_qnnp_log_error(
102 "failed to create max pooling with %zu channels: "
103 "number of channels must be non-zero",
104 channels);
105 goto error;
106 }
107
108 status = pytorch_qnnp_status_out_of_memory;
109
110 max_pooling = calloc(1, sizeof(struct pytorch_qnnp_operator));
111 if (max_pooling == NULL) {
112 pytorch_qnnp_log_error(
113 "failed to allocate %zu bytes for pytorch_qnnp_operator structure",
114 sizeof(struct pytorch_qnnp_operator));
115 goto error;
116 }
117
118 max_pooling->input_padding_height = input_padding_height;
119 max_pooling->input_padding_width = input_padding_width;
120
121 max_pooling->kernel_height = pooling_height;
122 max_pooling->kernel_width = pooling_width;
123 max_pooling->stride_height = stride_height;
124 max_pooling->stride_width = stride_width;
125 max_pooling->dilation_height = dilation_height;
126 max_pooling->dilation_width = dilation_width;
127 max_pooling->channels = channels;
128
129 max_pooling->u8_clamping_params =
130 pytorch_qnnp_compute_u8_clamping_params(output_min, output_max);
131
132 max_pooling->ukernel_type = pytorch_qnnp_ukernel_type_max_pooling;
133 max_pooling->format = pytorch_qnnp_format_quint8;
134
135 *max_pooling_out = max_pooling;
136 return pytorch_qnnp_status_success;
137
138 error:
139 pytorch_qnnp_delete_operator(max_pooling);
140 return status;
141 }
142
pytorch_qnnp_setup_max_pooling2d_nhwc_u8(pytorch_qnnp_operator_t max_pooling,size_t batch_size,size_t input_height,size_t input_width,const uint8_t * input,size_t input_pixel_stride,uint8_t * output,size_t output_pixel_stride,pthreadpool_t threadpool)143 enum pytorch_qnnp_status pytorch_qnnp_setup_max_pooling2d_nhwc_u8(
144 pytorch_qnnp_operator_t max_pooling,
145 size_t batch_size,
146 size_t input_height,
147 size_t input_width,
148 const uint8_t* input,
149 size_t input_pixel_stride,
150 uint8_t* output,
151 size_t output_pixel_stride,
152 pthreadpool_t threadpool) {
153 if (!pytorch_qnnp_params.initialized) {
154 pytorch_qnnp_log_error(
155 "pytorch_qnnp_setup_max_pooling2d_nhwc_u8 failed because QNNPACK is not properly initialized");
156 return pytorch_qnnp_status_uninitialized;
157 }
158
159 if (batch_size == 0) {
160 max_pooling->batch_size = 0;
161 return pytorch_qnnp_status_success;
162 }
163
164 if (input_width == 0 || input_height == 0) {
165 pytorch_qnnp_log_error(
166 "failed to setup max pooling with %zux%zu input: input dimensions must be non-zero",
167 input_width,
168 input_height);
169 return pytorch_qnnp_status_invalid_parameter;
170 }
171
172 max_pooling->batch_size = batch_size;
173 max_pooling->input_height = input_height;
174 max_pooling->input_width = input_width;
175 max_pooling->input = input;
176 max_pooling->input_pixel_stride = input_pixel_stride;
177
178 max_pooling->output_height = compute_output_dimension(
179 input_height + max_pooling->input_padding_height * 2,
180 max_pooling->kernel_height,
181 max_pooling->dilation_height,
182 max_pooling->stride_height);
183 max_pooling->output_width = compute_output_dimension(
184 input_width + max_pooling->input_padding_width * 2,
185 max_pooling->kernel_width,
186 max_pooling->dilation_width,
187 max_pooling->stride_width);
188 max_pooling->output = output;
189 max_pooling->output_pixel_stride = output_pixel_stride;
190
191 size_t valid_batch_size = 0;
192 if (input == max_pooling->last_input &&
193 input_height == max_pooling->last_input_height &&
194 input_width == max_pooling->last_input_width) {
195 valid_batch_size = max_pooling->valid_batch_size;
196 if (batch_size <= valid_batch_size) {
197 return pytorch_qnnp_status_success;
198 }
199 }
200
201 /* Micro-kernel may read up to (mr - 1) elements after the end of indirection
202 * buffer */
203 const uint32_t mr = pytorch_qnnp_params.u8maxpool.mr;
204
205 pytorch_qnnp_indirection_set_step_dimensions(max_pooling);
206 const size_t indirection_buffer_size = sizeof(void*) *
207 ((mr - 1) +
208 batch_size * max_pooling->output_height * max_pooling->step_height);
209
210 const void** indirection_buffer = (const void**)realloc(
211 max_pooling->indirection_buffer, indirection_buffer_size);
212 if (indirection_buffer == NULL) {
213 pytorch_qnnp_log_error(
214 "failed to allocate %zu bytes for indirection buffer",
215 indirection_buffer_size);
216 return pytorch_qnnp_status_out_of_memory;
217 }
218 max_pooling->indirection_buffer = indirection_buffer;
219
220 pytorch_qnnp_indirection_init_maxpool2d(max_pooling, valid_batch_size);
221
222 max_pooling->last_input = input;
223 max_pooling->last_input_height = input_height;
224 max_pooling->last_input_width = input_width;
225 max_pooling->valid_batch_size = max(valid_batch_size, batch_size);
226
227 return pytorch_qnnp_status_success;
228 }
229