1 /*
2 * Copyright (c) 2022, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include "aom_dsp/pyramid.h"
13 #include "aom_mem/aom_mem.h"
14 #include "aom_ports/bitops.h"
15 #include "aom_util/aom_pthread.h"
16
17 // TODO(rachelbarker): Move needed code from av1/ to aom_dsp/
18 #include "av1/common/resize.h"
19
20 #include <assert.h>
21 #include <string.h>
22
23 // Lifecycle:
24 // * Frame buffer alloc code calls aom_get_pyramid_alloc_size()
25 // to work out how much space is needed for a given number of pyramid
26 // levels. This is counted in the size checked against the max allocation
27 // limit
28 // * Then calls aom_alloc_pyramid() to actually create the pyramid
29 // * Pyramid is initially marked as containing no valid data
30 // * Each pyramid layer is computed on-demand, the first time it is requested
31 // * Whenever frame buffer is reused, reset the counter of filled levels.
32 // This invalidates all of the existing pyramid levels.
33 // * Whenever frame buffer is resized, reallocate pyramid
34
aom_get_pyramid_alloc_size(int width,int height,bool image_is_16bit)35 size_t aom_get_pyramid_alloc_size(int width, int height, bool image_is_16bit) {
36 // Allocate the maximum possible number of layers for this width and height
37 const int msb = get_msb(AOMMIN(width, height));
38 const int n_levels = AOMMAX(msb - MIN_PYRAMID_SIZE_LOG2, 1);
39
40 size_t alloc_size = 0;
41 alloc_size += sizeof(ImagePyramid);
42 alloc_size += n_levels * sizeof(PyramidLayer);
43
44 // Calculate how much memory is needed for downscaled frame buffers
45 size_t buffer_size = 0;
46
47 // Work out if we need to allocate a few extra bytes for alignment.
48 // aom_memalign() will ensure that the start of the allocation is aligned
49 // to a multiple of PYRAMID_ALIGNMENT. But we want the first image pixel
50 // to be aligned, not the first byte of the allocation.
51 //
52 // In the loop below, we ensure that the stride of every image is a multiple
53 // of PYRAMID_ALIGNMENT. Thus the allocated size of each pyramid level will
54 // also be a multiple of PYRAMID_ALIGNMENT. Thus, as long as we can get the
55 // first pixel in the first pyramid layer aligned properly, that will
56 // automatically mean that the first pixel of every row of every layer is
57 // properly aligned too.
58 //
59 // Thus all we need to consider is the first pixel in the first layer.
60 // This is located at offset
61 // extra_bytes + level_stride * PYRAMID_PADDING + PYRAMID_PADDING
62 // bytes into the buffer. Since level_stride is a multiple of
63 // PYRAMID_ALIGNMENT, we can ignore that. So we need
64 // extra_bytes + PYRAMID_PADDING = multiple of PYRAMID_ALIGNMENT
65 //
66 // To solve this, we can round PYRAMID_PADDING up to the next multiple
67 // of PYRAMID_ALIGNMENT, then subtract the orginal value to calculate
68 // how many extra bytes are needed.
69 size_t first_px_offset =
70 (PYRAMID_PADDING + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1);
71 size_t extra_bytes = first_px_offset - PYRAMID_PADDING;
72 buffer_size += extra_bytes;
73
74 // If the original image is stored in an 8-bit buffer, then we can point the
75 // lowest pyramid level at that buffer rather than allocating a new one.
76 int first_allocated_level = image_is_16bit ? 0 : 1;
77
78 for (int level = first_allocated_level; level < n_levels; level++) {
79 int level_width = width >> level;
80 int level_height = height >> level;
81
82 // Allocate padding for each layer
83 int padded_width = level_width + 2 * PYRAMID_PADDING;
84 int padded_height = level_height + 2 * PYRAMID_PADDING;
85
86 // Align the layer stride to be a multiple of PYRAMID_ALIGNMENT
87 // This ensures that, as long as the top-left pixel in this pyramid level is
88 // properly aligned, then so will the leftmost pixel in every row of the
89 // pyramid level.
90 int level_stride =
91 (padded_width + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1);
92
93 buffer_size += level_stride * padded_height;
94 }
95
96 alloc_size += buffer_size;
97
98 return alloc_size;
99 }
100
aom_alloc_pyramid(int width,int height,bool image_is_16bit)101 ImagePyramid *aom_alloc_pyramid(int width, int height, bool image_is_16bit) {
102 // Allocate the maximum possible number of layers for this width and height
103 const int msb = get_msb(AOMMIN(width, height));
104 const int n_levels = AOMMAX(msb - MIN_PYRAMID_SIZE_LOG2, 1);
105
106 ImagePyramid *pyr = aom_calloc(1, sizeof(*pyr));
107 if (!pyr) {
108 return NULL;
109 }
110
111 pyr->layers = aom_calloc(n_levels, sizeof(*pyr->layers));
112 if (!pyr->layers) {
113 aom_free(pyr);
114 return NULL;
115 }
116
117 pyr->max_levels = n_levels;
118 pyr->filled_levels = 0;
119
120 // Compute sizes and offsets for each pyramid level
121 // These are gathered up first, so that we can allocate all pyramid levels
122 // in a single buffer
123 size_t buffer_size = 0;
124 size_t *layer_offsets = aom_calloc(n_levels, sizeof(*layer_offsets));
125 if (!layer_offsets) {
126 aom_free(pyr->layers);
127 aom_free(pyr);
128 return NULL;
129 }
130
131 // Work out if we need to allocate a few extra bytes for alignment.
132 // aom_memalign() will ensure that the start of the allocation is aligned
133 // to a multiple of PYRAMID_ALIGNMENT. But we want the first image pixel
134 // to be aligned, not the first byte of the allocation.
135 //
136 // In the loop below, we ensure that the stride of every image is a multiple
137 // of PYRAMID_ALIGNMENT. Thus the allocated size of each pyramid level will
138 // also be a multiple of PYRAMID_ALIGNMENT. Thus, as long as we can get the
139 // first pixel in the first pyramid layer aligned properly, that will
140 // automatically mean that the first pixel of every row of every layer is
141 // properly aligned too.
142 //
143 // Thus all we need to consider is the first pixel in the first layer.
144 // This is located at offset
145 // extra_bytes + level_stride * PYRAMID_PADDING + PYRAMID_PADDING
146 // bytes into the buffer. Since level_stride is a multiple of
147 // PYRAMID_ALIGNMENT, we can ignore that. So we need
148 // extra_bytes + PYRAMID_PADDING = multiple of PYRAMID_ALIGNMENT
149 //
150 // To solve this, we can round PYRAMID_PADDING up to the next multiple
151 // of PYRAMID_ALIGNMENT, then subtract the orginal value to calculate
152 // how many extra bytes are needed.
153 size_t first_px_offset =
154 (PYRAMID_PADDING + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1);
155 size_t extra_bytes = first_px_offset - PYRAMID_PADDING;
156 buffer_size += extra_bytes;
157
158 // If the original image is stored in an 8-bit buffer, then we can point the
159 // lowest pyramid level at that buffer rather than allocating a new one.
160 int first_allocated_level = image_is_16bit ? 0 : 1;
161
162 for (int level = first_allocated_level; level < n_levels; level++) {
163 PyramidLayer *layer = &pyr->layers[level];
164
165 int level_width = width >> level;
166 int level_height = height >> level;
167
168 // Allocate padding for each layer
169 int padded_width = level_width + 2 * PYRAMID_PADDING;
170 int padded_height = level_height + 2 * PYRAMID_PADDING;
171
172 // Align the layer stride to be a multiple of PYRAMID_ALIGNMENT
173 // This ensures that, as long as the top-left pixel in this pyramid level is
174 // properly aligned, then so will the leftmost pixel in every row of the
175 // pyramid level.
176 int level_stride =
177 (padded_width + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1);
178
179 size_t level_alloc_start = buffer_size;
180 size_t level_start =
181 level_alloc_start + PYRAMID_PADDING * level_stride + PYRAMID_PADDING;
182
183 buffer_size += level_stride * padded_height;
184
185 layer_offsets[level] = level_start;
186 layer->width = level_width;
187 layer->height = level_height;
188 layer->stride = level_stride;
189 }
190
191 pyr->buffer_alloc =
192 aom_memalign(PYRAMID_ALIGNMENT, buffer_size * sizeof(*pyr->buffer_alloc));
193 if (!pyr->buffer_alloc) {
194 aom_free(pyr->layers);
195 aom_free(pyr);
196 aom_free(layer_offsets);
197 return NULL;
198 }
199
200 // Fill in pointers for each level
201 // If image is 8-bit, then the lowest level is left unconfigured for now,
202 // and will be set up properly when the pyramid is filled in
203 for (int level = first_allocated_level; level < n_levels; level++) {
204 PyramidLayer *layer = &pyr->layers[level];
205 layer->buffer = pyr->buffer_alloc + layer_offsets[level];
206 }
207
208 #if CONFIG_MULTITHREAD
209 pthread_mutex_init(&pyr->mutex, NULL);
210 #endif // CONFIG_MULTITHREAD
211
212 aom_free(layer_offsets);
213 return pyr;
214 }
215
216 // Fill the border region of a pyramid frame.
217 // This must be called after the main image area is filled out.
218 // `img_buf` should point to the first pixel in the image area,
219 // ie. it should be pyr->level_buffer + pyr->level_loc[level].
fill_border(uint8_t * img_buf,const int width,const int height,const int stride)220 static inline void fill_border(uint8_t *img_buf, const int width,
221 const int height, const int stride) {
222 // Fill left and right areas
223 for (int row = 0; row < height; row++) {
224 uint8_t *row_start = &img_buf[row * stride];
225 uint8_t left_pixel = row_start[0];
226 memset(row_start - PYRAMID_PADDING, left_pixel, PYRAMID_PADDING);
227 uint8_t right_pixel = row_start[width - 1];
228 memset(row_start + width, right_pixel, PYRAMID_PADDING);
229 }
230
231 // Fill top area
232 for (int row = -PYRAMID_PADDING; row < 0; row++) {
233 uint8_t *row_start = &img_buf[row * stride];
234 memcpy(row_start - PYRAMID_PADDING, img_buf - PYRAMID_PADDING,
235 width + 2 * PYRAMID_PADDING);
236 }
237
238 // Fill bottom area
239 uint8_t *last_row_start = &img_buf[(height - 1) * stride];
240 for (int row = height; row < height + PYRAMID_PADDING; row++) {
241 uint8_t *row_start = &img_buf[row * stride];
242 memcpy(row_start - PYRAMID_PADDING, last_row_start - PYRAMID_PADDING,
243 width + 2 * PYRAMID_PADDING);
244 }
245 }
246
247 // Compute downsampling pyramid for a frame
248 //
249 // This function will ensure that the first `n_levels` levels of the pyramid
250 // are filled, unless the frame is too small to have this many levels.
251 // In that case, we will fill all available levels and then stop.
252 //
253 // Returns the actual number of levels filled, capped at n_levels,
254 // or -1 on error.
255 //
256 // This must only be called while holding frame_pyr->mutex
fill_pyramid(const YV12_BUFFER_CONFIG * frame,int bit_depth,int n_levels,ImagePyramid * frame_pyr)257 static inline int fill_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth,
258 int n_levels, ImagePyramid *frame_pyr) {
259 int already_filled_levels = frame_pyr->filled_levels;
260
261 // This condition should already be enforced by aom_compute_pyramid
262 assert(n_levels <= frame_pyr->max_levels);
263
264 if (already_filled_levels >= n_levels) {
265 return n_levels;
266 }
267
268 const int frame_width = frame->y_crop_width;
269 const int frame_height = frame->y_crop_height;
270 const int frame_stride = frame->y_stride;
271 assert((frame_width >> n_levels) >= 0);
272 assert((frame_height >> n_levels) >= 0);
273
274 if (already_filled_levels == 0) {
275 // Fill in largest level from the original image
276 PyramidLayer *first_layer = &frame_pyr->layers[0];
277 if (frame->flags & YV12_FLAG_HIGHBITDEPTH) {
278 // For frames stored in a 16-bit buffer, we need to downconvert to 8 bits
279 assert(first_layer->width == frame_width);
280 assert(first_layer->height == frame_height);
281
282 uint16_t *frame_buffer = CONVERT_TO_SHORTPTR(frame->y_buffer);
283 uint8_t *pyr_buffer = first_layer->buffer;
284 int pyr_stride = first_layer->stride;
285 for (int y = 0; y < frame_height; y++) {
286 uint16_t *frame_row = frame_buffer + y * frame_stride;
287 uint8_t *pyr_row = pyr_buffer + y * pyr_stride;
288 for (int x = 0; x < frame_width; x++) {
289 pyr_row[x] = frame_row[x] >> (bit_depth - 8);
290 }
291 }
292
293 fill_border(pyr_buffer, frame_width, frame_height, pyr_stride);
294 } else {
295 // For frames stored in an 8-bit buffer, we don't need to copy anything -
296 // we can just reference the original image buffer
297 first_layer->buffer = frame->y_buffer;
298 first_layer->width = frame_width;
299 first_layer->height = frame_height;
300 first_layer->stride = frame_stride;
301 }
302
303 already_filled_levels = 1;
304 }
305
306 // Fill in the remaining levels through progressive downsampling
307 for (int level = already_filled_levels; level < n_levels; ++level) {
308 bool mem_status = false;
309 PyramidLayer *prev_layer = &frame_pyr->layers[level - 1];
310 uint8_t *prev_buffer = prev_layer->buffer;
311 int prev_stride = prev_layer->stride;
312
313 PyramidLayer *this_layer = &frame_pyr->layers[level];
314 uint8_t *this_buffer = this_layer->buffer;
315 int this_width = this_layer->width;
316 int this_height = this_layer->height;
317 int this_stride = this_layer->stride;
318
319 // The width and height of the previous layer that needs to be considered to
320 // derive the current layer frame.
321 const int input_layer_width = this_width << 1;
322 const int input_layer_height = this_height << 1;
323
324 // Compute the this pyramid level by downsampling the current level.
325 //
326 // We downsample by a factor of exactly 2, clipping the rightmost and
327 // bottommost pixel off of the current level if needed. We do this for
328 // two main reasons:
329 //
330 // 1) In the disflow code, when stepping from a higher pyramid level to a
331 // lower pyramid level, we need to not just interpolate the flow field
332 // but also to scale each flow vector by the upsampling ratio.
333 // So it is much more convenient if this ratio is simply 2.
334 //
335 // 2) Up/downsampling by a factor of 2 can be implemented much more
336 // efficiently than up/downsampling by a generic ratio.
337 // TODO(rachelbarker): Use optimized downsample-by-2 function
338
339 // SIMD support has been added specifically for cases where the downsample
340 // factor is exactly 2. In such instances, horizontal and vertical resizing
341 // is performed utilizing the down2_symeven() function, which considers the
342 // even dimensions of the input layer.
343 if (should_resize_by_half(input_layer_height, input_layer_width,
344 this_height, this_width)) {
345 assert(input_layer_height % 2 == 0 && input_layer_width % 2 == 0 &&
346 "Input width or height cannot be odd.");
347 mem_status = av1_resize_plane_to_half(
348 prev_buffer, input_layer_height, input_layer_width, prev_stride,
349 this_buffer, this_height, this_width, this_stride);
350 } else {
351 mem_status = av1_resize_plane(prev_buffer, input_layer_height,
352 input_layer_width, prev_stride, this_buffer,
353 this_height, this_width, this_stride);
354 }
355
356 // Terminate early in cases of memory allocation failure.
357 if (!mem_status) {
358 frame_pyr->filled_levels = n_levels;
359 return -1;
360 }
361
362 fill_border(this_buffer, this_width, this_height, this_stride);
363 }
364
365 frame_pyr->filled_levels = n_levels;
366 return n_levels;
367 }
368
369 // Fill out a downsampling pyramid for a given frame.
370 //
371 // The top level (index 0) will always be an 8-bit copy of the input frame,
372 // regardless of the input bit depth. Additional levels are then downscaled
373 // by powers of 2.
374 //
375 // This function will ensure that the first `n_levels` levels of the pyramid
376 // are filled, unless the frame is too small to have this many levels.
377 // In that case, we will fill all available levels and then stop.
378 // No matter how small the frame is, at least one level is guaranteed
379 // to be filled.
380 //
381 // Returns the actual number of levels filled, capped at n_levels,
382 // or -1 on error.
aom_compute_pyramid(const YV12_BUFFER_CONFIG * frame,int bit_depth,int n_levels,ImagePyramid * pyr)383 int aom_compute_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth,
384 int n_levels, ImagePyramid *pyr) {
385 assert(pyr);
386
387 // Per the comments in the ImagePyramid struct, we must take this mutex
388 // before reading or writing the filled_levels field, and hold it while
389 // computing any additional pyramid levels, to ensure proper behaviour
390 // when multithreading is used
391 #if CONFIG_MULTITHREAD
392 pthread_mutex_lock(&pyr->mutex);
393 #endif // CONFIG_MULTITHREAD
394
395 n_levels = AOMMIN(n_levels, pyr->max_levels);
396 int result = n_levels;
397 if (pyr->filled_levels < n_levels) {
398 // Compute any missing levels that we need
399 result = fill_pyramid(frame, bit_depth, n_levels, pyr);
400 }
401
402 // At this point, as long as result >= 0, the requested number of pyramid
403 // levels are guaranteed to be valid, and can be safely read from without
404 // holding the mutex any further
405 assert(IMPLIES(result >= 0, pyr->filled_levels >= n_levels));
406 #if CONFIG_MULTITHREAD
407 pthread_mutex_unlock(&pyr->mutex);
408 #endif // CONFIG_MULTITHREAD
409 return result;
410 }
411
412 #ifndef NDEBUG
413 // Check if a pyramid has already been computed to at least n levels
414 // This is mostly a debug helper - as it is necessary to hold pyr->mutex
415 // while reading the number of already-computed levels, we cannot just write:
416 // assert(pyr->filled_levels >= n_levels);
417 // This function allows the check to be correctly written as:
418 // assert(aom_is_pyramid_valid(pyr, n_levels));
419 //
420 // Note: This deliberately does not restrict n_levels based on the maximum
421 // number of permitted levels for the frame size. This allows the check to
422 // catch cases where the caller forgets to handle the case where
423 // max_levels is less than the requested number of levels
aom_is_pyramid_valid(ImagePyramid * pyr,int n_levels)424 bool aom_is_pyramid_valid(ImagePyramid *pyr, int n_levels) {
425 assert(pyr);
426
427 // Per the comments in the ImagePyramid struct, we must take this mutex
428 // before reading or writing the filled_levels field, to ensure proper
429 // behaviour when multithreading is used
430 #if CONFIG_MULTITHREAD
431 pthread_mutex_lock(&pyr->mutex);
432 #endif // CONFIG_MULTITHREAD
433
434 bool result = (pyr->filled_levels >= n_levels);
435
436 #if CONFIG_MULTITHREAD
437 pthread_mutex_unlock(&pyr->mutex);
438 #endif // CONFIG_MULTITHREAD
439
440 return result;
441 }
442 #endif
443
444 // Mark a pyramid as no longer containing valid data.
445 // This must be done whenever the corresponding frame buffer is reused
aom_invalidate_pyramid(ImagePyramid * pyr)446 void aom_invalidate_pyramid(ImagePyramid *pyr) {
447 if (pyr) {
448 #if CONFIG_MULTITHREAD
449 pthread_mutex_lock(&pyr->mutex);
450 #endif // CONFIG_MULTITHREAD
451 pyr->filled_levels = 0;
452 #if CONFIG_MULTITHREAD
453 pthread_mutex_unlock(&pyr->mutex);
454 #endif // CONFIG_MULTITHREAD
455 }
456 }
457
458 // Release the memory associated with a pyramid
aom_free_pyramid(ImagePyramid * pyr)459 void aom_free_pyramid(ImagePyramid *pyr) {
460 if (pyr) {
461 #if CONFIG_MULTITHREAD
462 pthread_mutex_destroy(&pyr->mutex);
463 #endif // CONFIG_MULTITHREAD
464 aom_free(pyr->buffer_alloc);
465 aom_free(pyr->layers);
466 aom_free(pyr);
467 }
468 }
469