1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <assert.h>
7 #include <math.h>
8 #include <stdbool.h>
9 #include <stddef.h>
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <string.h>
13
14 #include <xnnpack.h>
15 #include <xnnpack/allocator.h>
16 #include <xnnpack/operator.h>
17 #include <xnnpack/log.h>
18 #include <xnnpack/common.h>
19 #include <xnnpack/math.h>
20 #include <xnnpack/params.h>
21 #include <xnnpack/indirection.h>
22
23
xnn_create_resize_bilinear2d_nchw_f32(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,xnn_operator_t * resize_op_out)24 enum xnn_status xnn_create_resize_bilinear2d_nchw_f32(
25 size_t channels,
26 size_t input_pixel_stride,
27 size_t output_pixel_stride,
28 uint32_t flags,
29 xnn_operator_t* resize_op_out)
30 {
31 xnn_operator_t resize_op = NULL;
32 enum xnn_status status = xnn_status_uninitialized;
33
34 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
35 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
36 xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32));
37 goto error;
38 }
39
40 status = xnn_status_invalid_parameter;
41
42 if (channels == 0) {
43 xnn_log_error(
44 "failed to create %s operator with %zu channels: number of channels must be non-zero",
45 xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32), channels);
46 goto error;
47 }
48
49 if (input_pixel_stride < channels) {
50 xnn_log_error(
51 "failed to create %s operator with input pixel stride of %zu: "
52 "stride must be at least as large as the number of channels (%zu)",
53 xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32), input_pixel_stride, channels);
54 goto error;
55 }
56
57 if (output_pixel_stride < channels) {
58 xnn_log_error(
59 "failed to create %s operator with output pixel stride of %zu: "
60 "stride must be at least as large as the number of channels (%zu)",
61 xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32), output_pixel_stride, channels);
62 goto error;
63 }
64
65 status = xnn_status_out_of_memory;
66
67 resize_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
68 if (resize_op == NULL) {
69 xnn_log_error(
70 "failed to allocate %zu bytes for %s operator descriptor",
71 sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32));
72 goto error;
73 }
74
75 resize_op->channels = channels;
76 resize_op->input_pixel_stride = input_pixel_stride;
77 resize_op->output_pixel_stride = output_pixel_stride;
78
79 resize_op->type = xnn_operator_type_resize_bilinear_nchw_f32;
80 resize_op->flags = flags;
81
82 resize_op->state = xnn_run_state_invalid;
83
84 *resize_op_out = resize_op;
85 return xnn_status_success;
86
87 error:
88 xnn_delete_operator(resize_op);
89 return status;
90 }
91
xnn_setup_resize_bilinear2d_nchw_f32(xnn_operator_t resize_op,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const float * input,float * output,pthreadpool_t threadpool)92 enum xnn_status xnn_setup_resize_bilinear2d_nchw_f32(
93 xnn_operator_t resize_op,
94 size_t batch_size,
95 size_t input_height,
96 size_t input_width,
97 size_t output_height,
98 size_t output_width,
99 const float* input,
100 float* output,
101 pthreadpool_t threadpool)
102 {
103 if (resize_op->type != xnn_operator_type_resize_bilinear_nchw_f32) {
104 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
105 xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32),
106 xnn_operator_type_to_string(resize_op->type));
107 return xnn_status_invalid_parameter;
108 }
109 resize_op->state = xnn_run_state_invalid;
110
111 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
112 xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
113 xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32));
114 return xnn_status_uninitialized;
115 }
116
117 if (input_width <= 1 || input_height <= 1) {
118 xnn_log_error(
119 "failed to setup %s operator with %zux%zu input: input dimensions must be greater than 1",
120 xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32), input_width, input_height);
121 return xnn_status_invalid_parameter;
122 }
123
124 if (max(input_width, input_height) >= 16777216) {
125 xnn_log_error(
126 "failed to setup %s operator with %zux%zu input: input dimensions must be below 2**24",
127 xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32), input_width, input_height);
128 return xnn_status_unsupported_parameter;
129 }
130
131 if (output_width == 0 || output_height == 0) {
132 xnn_log_error(
133 "failed to setup %s operator with %zux%zu output: output dimensions must be non-zero",
134 xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32), output_width, output_height);
135 return xnn_status_invalid_parameter;
136 }
137
138 if (max(output_width, output_height) >= 16777216) {
139 xnn_log_error(
140 "failed to setup %s operator with %zux%zu output: output dimensions must be below 2**24",
141 xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32), output_width, output_height);
142 return xnn_status_unsupported_parameter;
143 }
144
145 if (batch_size == 0) {
146 resize_op->state = xnn_run_state_skip;
147 return xnn_status_success;
148 }
149
150 if (output_height * output_width != resize_op->last_output_height * resize_op->last_output_width) {
151 const size_t indirection_buffer_size = sizeof(void*) * (output_height * output_width * 4);
152 const size_t packed_weights_size = sizeof(float) * (output_height * output_width * 2);
153
154 const void** indirection_buffer = (const void**) xnn_reallocate_memory(resize_op->indirection_buffer, indirection_buffer_size);
155 if (indirection_buffer == NULL) {
156 xnn_log_error(
157 "failed to allocate %zu bytes for %s operator indirection buffer",
158 indirection_buffer_size, xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32));
159 return xnn_status_out_of_memory;
160 }
161 resize_op->indirection_buffer = indirection_buffer;
162
163 // Note: packed weights must be SIMD-aligned, so we can't use xnn_reallocate_memory
164 xnn_release_simd_memory(resize_op->packed_weights.pointer);
165 resize_op->packed_weights.pointer = xnn_allocate_simd_memory(packed_weights_size);
166 if (resize_op->packed_weights.pointer == NULL) {
167 xnn_log_error(
168 "failed to allocate %zu bytes for %s operator packed weights",
169 packed_weights_size, xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32));
170 return xnn_status_out_of_memory;
171 }
172 }
173
174 const size_t input_pixel_stride_in_bytes = sizeof(float); // Since the layout in CHW the pixels
175 if (input_height != resize_op->last_input_height ||
176 input_width != resize_op->last_input_width ||
177 output_height != resize_op->last_output_height ||
178 output_width != resize_op->last_output_width)
179 {
180 const uint32_t flags = resize_op->flags;
181 xnn_indirection_init_resize_bilinear2d_chw_f32(
182 input_pixel_stride_in_bytes,
183 input_height, input_width,
184 output_height, output_width,
185 input, resize_op->indirection_buffer, resize_op->packed_weights.pointer,
186 !!(flags & XNN_FLAG_ALIGN_CORNERS),
187 !!(flags & XNN_FLAG_TENSORFLOW_LEGACY_MODE));
188
189 resize_op->last_input = input;
190 resize_op->last_input_height = input_height;
191 resize_op->last_input_width = input_width;
192 resize_op->last_output_height = output_height;
193 resize_op->last_output_width = output_width;
194 }
195
196 // Resize bilinear packed weights can change when the operator is resized, we will not use weights cache.
197 assert(resize_op->weights_cache == NULL);
198 resize_op->context.resize_bilinear_chw = (struct resize_bilinear_chw_context) {
199 .output_pixels = output_height * output_width,
200 .channels = resize_op->channels,
201 .input_channel_stride = input_height * input_width * sizeof(float),
202 .indirect_input = resize_op->indirection_buffer,
203 .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) resize_op->last_input),
204 .input_batch_stride = resize_op->input_pixel_stride * input_height * input_width * sizeof(float),
205 .packed_weights = resize_op->packed_weights.pointer,
206 .output = output,
207 .output_batch_stride = resize_op->output_pixel_stride * output_height * output_width * sizeof(float),
208 .output_channel_stride = output_height * output_width * sizeof(float),
209 .ukernel = xnn_params.f32.ibilinear_chw.ukernel,
210 };
211
212 #if XNN_TEST_MODE
213 const size_t output_channel_tile = xnn_params.f32.ibilinear_chw.channel_tile;
214 #else
215 const size_t num_threads = pthreadpool_get_threads_count(threadpool);
216 size_t output_channel_tile = resize_op->channels;
217 if (num_threads > 1) {
218 const size_t target_tiles_per_thread = 4;
219 const size_t max_channel_tile = divide_round_up(output_channel_tile, num_threads * target_tiles_per_thread);
220 if (max_channel_tile < output_channel_tile) {
221 const uint32_t output_channel_subtile = xnn_params.f32.ibilinear_chw.channel_tile;
222 output_channel_tile =
223 min(output_channel_tile,
224 divide_round_up(output_channel_tile, max_channel_tile * output_channel_subtile) * output_channel_subtile);
225 }
226 }
227 #endif
228 resize_op->compute.type = xnn_parallelization_type_2d_tile_1d;
229 resize_op->compute.task_2d_tile_1d = (pthreadpool_task_2d_tile_1d_t) xnn_compute_resize_bilinear_chw;
230 resize_op->compute.range[0] = batch_size;
231 resize_op->compute.range[1] = resize_op->channels;
232 resize_op->compute.tile[0] = output_channel_tile;
233 resize_op->state = xnn_run_state_ready;
234
235 return xnn_status_success;
236 }
237