xref: /aosp_15_r20/external/XNNPACK/src/operators/resize-bilinear-nchw.c (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <assert.h>
7 #include <math.h>
8 #include <stdbool.h>
9 #include <stddef.h>
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include <xnnpack.h>
15 #include <xnnpack/allocator.h>
16 #include <xnnpack/operator.h>
17 #include <xnnpack/log.h>
18 #include <xnnpack/common.h>
19 #include <xnnpack/math.h>
20 #include <xnnpack/params.h>
21 #include <xnnpack/indirection.h>
22 
23 
xnn_create_resize_bilinear2d_nchw_f32(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,xnn_operator_t * resize_op_out)24 enum xnn_status xnn_create_resize_bilinear2d_nchw_f32(
25     size_t channels,
26     size_t input_pixel_stride,
27     size_t output_pixel_stride,
28     uint32_t flags,
29     xnn_operator_t* resize_op_out)
30 {
31   xnn_operator_t resize_op = NULL;
32   enum xnn_status status = xnn_status_uninitialized;
33 
34   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
35     xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
36       xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32));
37     goto error;
38   }
39 
40   status = xnn_status_invalid_parameter;
41 
42   if (channels == 0) {
43     xnn_log_error(
44       "failed to create %s operator with %zu channels: number of channels must be non-zero",
45       xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32), channels);
46     goto error;
47   }
48 
49   if (input_pixel_stride < channels) {
50     xnn_log_error(
51       "failed to create %s operator with input pixel stride of %zu: "
52       "stride must be at least as large as the number of channels (%zu)",
53       xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32), input_pixel_stride, channels);
54     goto error;
55   }
56 
57   if (output_pixel_stride < channels) {
58     xnn_log_error(
59       "failed to create %s operator with output pixel stride of %zu: "
60       "stride must be at least as large as the number of channels (%zu)",
61       xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32), output_pixel_stride, channels);
62     goto error;
63   }
64 
65   status = xnn_status_out_of_memory;
66 
67   resize_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
68   if (resize_op == NULL) {
69     xnn_log_error(
70       "failed to allocate %zu bytes for %s operator descriptor",
71       sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32));
72     goto error;
73   }
74 
75   resize_op->channels = channels;
76   resize_op->input_pixel_stride = input_pixel_stride;
77   resize_op->output_pixel_stride = output_pixel_stride;
78 
79   resize_op->type = xnn_operator_type_resize_bilinear_nchw_f32;
80   resize_op->flags = flags;
81 
82   resize_op->state = xnn_run_state_invalid;
83 
84   *resize_op_out = resize_op;
85   return xnn_status_success;
86 
87 error:
88   xnn_delete_operator(resize_op);
89   return status;
90 }
91 
xnn_setup_resize_bilinear2d_nchw_f32(xnn_operator_t resize_op,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const float * input,float * output,pthreadpool_t threadpool)92 enum xnn_status xnn_setup_resize_bilinear2d_nchw_f32(
93     xnn_operator_t resize_op,
94     size_t batch_size,
95     size_t input_height,
96     size_t input_width,
97     size_t output_height,
98     size_t output_width,
99     const float* input,
100     float* output,
101     pthreadpool_t threadpool)
102 {
103   if (resize_op->type != xnn_operator_type_resize_bilinear_nchw_f32) {
104     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
105       xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32),
106       xnn_operator_type_to_string(resize_op->type));
107     return xnn_status_invalid_parameter;
108   }
109   resize_op->state = xnn_run_state_invalid;
110 
111   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
112     xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
113       xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32));
114     return xnn_status_uninitialized;
115   }
116 
117   if (input_width <= 1 || input_height <= 1) {
118     xnn_log_error(
119       "failed to setup %s operator with %zux%zu input: input dimensions must be greater than 1",
120       xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32), input_width, input_height);
121     return xnn_status_invalid_parameter;
122   }
123 
124   if (max(input_width, input_height) >= 16777216) {
125     xnn_log_error(
126       "failed to setup %s operator with %zux%zu input: input dimensions must be below 2**24",
127       xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32), input_width, input_height);
128     return xnn_status_unsupported_parameter;
129   }
130 
131   if (output_width == 0 || output_height == 0) {
132     xnn_log_error(
133       "failed to setup %s operator with %zux%zu output: output dimensions must be non-zero",
134       xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32), output_width, output_height);
135     return xnn_status_invalid_parameter;
136   }
137 
138   if (max(output_width, output_height) >= 16777216) {
139     xnn_log_error(
140       "failed to setup %s operator with %zux%zu output: output dimensions must be below 2**24",
141       xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32), output_width, output_height);
142     return xnn_status_unsupported_parameter;
143   }
144 
145   if (batch_size == 0) {
146     resize_op->state = xnn_run_state_skip;
147     return xnn_status_success;
148   }
149 
150   if (output_height * output_width != resize_op->last_output_height * resize_op->last_output_width) {
151     const size_t indirection_buffer_size = sizeof(void*) * (output_height * output_width * 4);
152     const size_t packed_weights_size = sizeof(float) * (output_height * output_width * 2);
153 
154     const void** indirection_buffer = (const void**) xnn_reallocate_memory(resize_op->indirection_buffer, indirection_buffer_size);
155     if (indirection_buffer == NULL) {
156       xnn_log_error(
157         "failed to allocate %zu bytes for %s operator indirection buffer",
158         indirection_buffer_size, xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32));
159       return xnn_status_out_of_memory;
160     }
161     resize_op->indirection_buffer = indirection_buffer;
162 
163     // Note: packed weights must be SIMD-aligned, so we can't use xnn_reallocate_memory
164     xnn_release_simd_memory(resize_op->packed_weights.pointer);
165     resize_op->packed_weights.pointer = xnn_allocate_simd_memory(packed_weights_size);
166     if (resize_op->packed_weights.pointer == NULL) {
167       xnn_log_error(
168         "failed to allocate %zu bytes for %s operator packed weights",
169         packed_weights_size, xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nchw_f32));
170       return xnn_status_out_of_memory;
171     }
172   }
173 
174   const size_t input_pixel_stride_in_bytes = sizeof(float); // Since the layout in CHW the pixels
175   if (input_height != resize_op->last_input_height ||
176       input_width != resize_op->last_input_width ||
177       output_height != resize_op->last_output_height ||
178       output_width != resize_op->last_output_width)
179   {
180     const uint32_t flags = resize_op->flags;
181     xnn_indirection_init_resize_bilinear2d_chw_f32(
182         input_pixel_stride_in_bytes,
183         input_height, input_width,
184         output_height, output_width,
185         input, resize_op->indirection_buffer, resize_op->packed_weights.pointer,
186         !!(flags & XNN_FLAG_ALIGN_CORNERS),
187         !!(flags & XNN_FLAG_TENSORFLOW_LEGACY_MODE));
188 
189     resize_op->last_input = input;
190     resize_op->last_input_height = input_height;
191     resize_op->last_input_width = input_width;
192     resize_op->last_output_height = output_height;
193     resize_op->last_output_width = output_width;
194   }
195 
196   // Resize bilinear packed weights can change when the operator is resized, we will not use weights cache.
197   assert(resize_op->weights_cache == NULL);
198   resize_op->context.resize_bilinear_chw = (struct resize_bilinear_chw_context) {
199     .output_pixels = output_height * output_width,
200     .channels = resize_op->channels,
201     .input_channel_stride =  input_height * input_width * sizeof(float),
202     .indirect_input = resize_op->indirection_buffer,
203     .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) resize_op->last_input),
204     .input_batch_stride = resize_op->input_pixel_stride * input_height * input_width * sizeof(float),
205     .packed_weights = resize_op->packed_weights.pointer,
206     .output = output,
207     .output_batch_stride = resize_op->output_pixel_stride * output_height * output_width * sizeof(float),
208     .output_channel_stride = output_height * output_width * sizeof(float),
209     .ukernel = xnn_params.f32.ibilinear_chw.ukernel,
210   };
211 
212   #if XNN_TEST_MODE
213     const size_t output_channel_tile = xnn_params.f32.ibilinear_chw.channel_tile;
214   #else
215     const size_t num_threads = pthreadpool_get_threads_count(threadpool);
216     size_t output_channel_tile = resize_op->channels;
217     if (num_threads > 1) {
218       const size_t target_tiles_per_thread = 4;
219       const size_t max_channel_tile = divide_round_up(output_channel_tile, num_threads * target_tiles_per_thread);
220       if (max_channel_tile < output_channel_tile) {
221         const uint32_t output_channel_subtile = xnn_params.f32.ibilinear_chw.channel_tile;
222         output_channel_tile =
223           min(output_channel_tile,
224             divide_round_up(output_channel_tile, max_channel_tile * output_channel_subtile) * output_channel_subtile);
225       }
226     }
227   #endif
228   resize_op->compute.type = xnn_parallelization_type_2d_tile_1d;
229   resize_op->compute.task_2d_tile_1d = (pthreadpool_task_2d_tile_1d_t) xnn_compute_resize_bilinear_chw;
230   resize_op->compute.range[0] = batch_size;
231   resize_op->compute.range[1] = resize_op->channels;
232   resize_op->compute.tile[0] = output_channel_tile;
233   resize_op->state = xnn_run_state_ready;
234 
235   return xnn_status_success;
236 }
237