xref: /aosp_15_r20/external/libaom/av1/common/restoration.c (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1*77c1e3ccSAndroid Build Coastguard Worker /*
2*77c1e3ccSAndroid Build Coastguard Worker  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3*77c1e3ccSAndroid Build Coastguard Worker  *
4*77c1e3ccSAndroid Build Coastguard Worker  * This source code is subject to the terms of the BSD 2 Clause License and
5*77c1e3ccSAndroid Build Coastguard Worker  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6*77c1e3ccSAndroid Build Coastguard Worker  * was not distributed with this source code in the LICENSE file, you can
7*77c1e3ccSAndroid Build Coastguard Worker  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8*77c1e3ccSAndroid Build Coastguard Worker  * Media Patent License 1.0 was not distributed with this source code in the
9*77c1e3ccSAndroid Build Coastguard Worker  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10*77c1e3ccSAndroid Build Coastguard Worker  *
11*77c1e3ccSAndroid Build Coastguard Worker  */
12*77c1e3ccSAndroid Build Coastguard Worker 
13*77c1e3ccSAndroid Build Coastguard Worker #include <math.h>
14*77c1e3ccSAndroid Build Coastguard Worker #include <stddef.h>
15*77c1e3ccSAndroid Build Coastguard Worker 
16*77c1e3ccSAndroid Build Coastguard Worker #include "config/aom_config.h"
17*77c1e3ccSAndroid Build Coastguard Worker #include "config/aom_scale_rtcd.h"
18*77c1e3ccSAndroid Build Coastguard Worker 
19*77c1e3ccSAndroid Build Coastguard Worker #include "aom/internal/aom_codec_internal.h"
20*77c1e3ccSAndroid Build Coastguard Worker #include "aom_mem/aom_mem.h"
21*77c1e3ccSAndroid Build Coastguard Worker #include "aom_dsp/aom_dsp_common.h"
22*77c1e3ccSAndroid Build Coastguard Worker #include "aom_mem/aom_mem.h"
23*77c1e3ccSAndroid Build Coastguard Worker #include "aom_ports/mem.h"
24*77c1e3ccSAndroid Build Coastguard Worker #include "aom_util/aom_pthread.h"
25*77c1e3ccSAndroid Build Coastguard Worker 
26*77c1e3ccSAndroid Build Coastguard Worker #include "av1/common/av1_common_int.h"
27*77c1e3ccSAndroid Build Coastguard Worker #include "av1/common/convolve.h"
28*77c1e3ccSAndroid Build Coastguard Worker #include "av1/common/enums.h"
29*77c1e3ccSAndroid Build Coastguard Worker #include "av1/common/resize.h"
30*77c1e3ccSAndroid Build Coastguard Worker #include "av1/common/restoration.h"
31*77c1e3ccSAndroid Build Coastguard Worker #include "av1/common/thread_common.h"
32*77c1e3ccSAndroid Build Coastguard Worker 
33*77c1e3ccSAndroid Build Coastguard Worker // The 's' values are calculated based on original 'r' and 'e' values in the
34*77c1e3ccSAndroid Build Coastguard Worker // spec using GenSgrprojVtable().
35*77c1e3ccSAndroid Build Coastguard Worker // Note: Setting r = 0 skips the filter; with corresponding s = -1 (invalid).
36*77c1e3ccSAndroid Build Coastguard Worker const sgr_params_type av1_sgr_params[SGRPROJ_PARAMS] = {
37*77c1e3ccSAndroid Build Coastguard Worker   { { 2, 1 }, { 140, 3236 } }, { { 2, 1 }, { 112, 2158 } },
38*77c1e3ccSAndroid Build Coastguard Worker   { { 2, 1 }, { 93, 1618 } },  { { 2, 1 }, { 80, 1438 } },
39*77c1e3ccSAndroid Build Coastguard Worker   { { 2, 1 }, { 70, 1295 } },  { { 2, 1 }, { 58, 1177 } },
40*77c1e3ccSAndroid Build Coastguard Worker   { { 2, 1 }, { 47, 1079 } },  { { 2, 1 }, { 37, 996 } },
41*77c1e3ccSAndroid Build Coastguard Worker   { { 2, 1 }, { 30, 925 } },   { { 2, 1 }, { 25, 863 } },
42*77c1e3ccSAndroid Build Coastguard Worker   { { 0, 1 }, { -1, 2589 } },  { { 0, 1 }, { -1, 1618 } },
43*77c1e3ccSAndroid Build Coastguard Worker   { { 0, 1 }, { -1, 1177 } },  { { 0, 1 }, { -1, 925 } },
44*77c1e3ccSAndroid Build Coastguard Worker   { { 2, 0 }, { 56, -1 } },    { { 2, 0 }, { 22, -1 } },
45*77c1e3ccSAndroid Build Coastguard Worker };
46*77c1e3ccSAndroid Build Coastguard Worker 
av1_get_upsampled_plane_size(const AV1_COMMON * cm,int is_uv,int * plane_w,int * plane_h)47*77c1e3ccSAndroid Build Coastguard Worker void av1_get_upsampled_plane_size(const AV1_COMMON *cm, int is_uv, int *plane_w,
48*77c1e3ccSAndroid Build Coastguard Worker                                   int *plane_h) {
49*77c1e3ccSAndroid Build Coastguard Worker   int ss_x = is_uv && cm->seq_params->subsampling_x;
50*77c1e3ccSAndroid Build Coastguard Worker   int ss_y = is_uv && cm->seq_params->subsampling_y;
51*77c1e3ccSAndroid Build Coastguard Worker   *plane_w = ROUND_POWER_OF_TWO(cm->superres_upscaled_width, ss_x);
52*77c1e3ccSAndroid Build Coastguard Worker   *plane_h = ROUND_POWER_OF_TWO(cm->height, ss_y);
53*77c1e3ccSAndroid Build Coastguard Worker }
54*77c1e3ccSAndroid Build Coastguard Worker 
55*77c1e3ccSAndroid Build Coastguard Worker // Count horizontal or vertical units in a plane (use a width or height for
56*77c1e3ccSAndroid Build Coastguard Worker // plane_size, respectively). We basically want to divide the plane size by the
57*77c1e3ccSAndroid Build Coastguard Worker // size of a restoration unit. Rather than rounding up unconditionally as you
58*77c1e3ccSAndroid Build Coastguard Worker // might expect, we round to nearest, which models the way a right or bottom
59*77c1e3ccSAndroid Build Coastguard Worker // restoration unit can extend to up to 150% its normal width or height.
60*77c1e3ccSAndroid Build Coastguard Worker //
61*77c1e3ccSAndroid Build Coastguard Worker // The max with 1 is to deal with small frames, which may be smaller than
62*77c1e3ccSAndroid Build Coastguard Worker // half of an LR unit in size.
av1_lr_count_units(int unit_size,int plane_size)63*77c1e3ccSAndroid Build Coastguard Worker int av1_lr_count_units(int unit_size, int plane_size) {
64*77c1e3ccSAndroid Build Coastguard Worker   return AOMMAX((plane_size + (unit_size >> 1)) / unit_size, 1);
65*77c1e3ccSAndroid Build Coastguard Worker }
66*77c1e3ccSAndroid Build Coastguard Worker 
av1_alloc_restoration_struct(AV1_COMMON * cm,RestorationInfo * rsi,int is_uv)67*77c1e3ccSAndroid Build Coastguard Worker void av1_alloc_restoration_struct(AV1_COMMON *cm, RestorationInfo *rsi,
68*77c1e3ccSAndroid Build Coastguard Worker                                   int is_uv) {
69*77c1e3ccSAndroid Build Coastguard Worker   int plane_w, plane_h;
70*77c1e3ccSAndroid Build Coastguard Worker   av1_get_upsampled_plane_size(cm, is_uv, &plane_w, &plane_h);
71*77c1e3ccSAndroid Build Coastguard Worker 
72*77c1e3ccSAndroid Build Coastguard Worker   const int unit_size = rsi->restoration_unit_size;
73*77c1e3ccSAndroid Build Coastguard Worker   const int horz_units = av1_lr_count_units(unit_size, plane_w);
74*77c1e3ccSAndroid Build Coastguard Worker   const int vert_units = av1_lr_count_units(unit_size, plane_h);
75*77c1e3ccSAndroid Build Coastguard Worker 
76*77c1e3ccSAndroid Build Coastguard Worker   rsi->num_rest_units = horz_units * vert_units;
77*77c1e3ccSAndroid Build Coastguard Worker   rsi->horz_units = horz_units;
78*77c1e3ccSAndroid Build Coastguard Worker   rsi->vert_units = vert_units;
79*77c1e3ccSAndroid Build Coastguard Worker 
80*77c1e3ccSAndroid Build Coastguard Worker   aom_free(rsi->unit_info);
81*77c1e3ccSAndroid Build Coastguard Worker   CHECK_MEM_ERROR(cm, rsi->unit_info,
82*77c1e3ccSAndroid Build Coastguard Worker                   (RestorationUnitInfo *)aom_memalign(
83*77c1e3ccSAndroid Build Coastguard Worker                       16, sizeof(*rsi->unit_info) * rsi->num_rest_units));
84*77c1e3ccSAndroid Build Coastguard Worker }
85*77c1e3ccSAndroid Build Coastguard Worker 
av1_free_restoration_struct(RestorationInfo * rst_info)86*77c1e3ccSAndroid Build Coastguard Worker void av1_free_restoration_struct(RestorationInfo *rst_info) {
87*77c1e3ccSAndroid Build Coastguard Worker   aom_free(rst_info->unit_info);
88*77c1e3ccSAndroid Build Coastguard Worker   rst_info->unit_info = NULL;
89*77c1e3ccSAndroid Build Coastguard Worker }
90*77c1e3ccSAndroid Build Coastguard Worker 
91*77c1e3ccSAndroid Build Coastguard Worker #if 0
92*77c1e3ccSAndroid Build Coastguard Worker // Pair of values for each sgrproj parameter:
93*77c1e3ccSAndroid Build Coastguard Worker // Index 0 corresponds to r[0], e[0]
94*77c1e3ccSAndroid Build Coastguard Worker // Index 1 corresponds to r[1], e[1]
95*77c1e3ccSAndroid Build Coastguard Worker int sgrproj_mtable[SGRPROJ_PARAMS][2];
96*77c1e3ccSAndroid Build Coastguard Worker 
97*77c1e3ccSAndroid Build Coastguard Worker static void GenSgrprojVtable(void) {
98*77c1e3ccSAndroid Build Coastguard Worker   for (int i = 0; i < SGRPROJ_PARAMS; ++i) {
99*77c1e3ccSAndroid Build Coastguard Worker     const sgr_params_type *const params = &av1_sgr_params[i];
100*77c1e3ccSAndroid Build Coastguard Worker     for (int j = 0; j < 2; ++j) {
101*77c1e3ccSAndroid Build Coastguard Worker       const int e = params->e[j];
102*77c1e3ccSAndroid Build Coastguard Worker       const int r = params->r[j];
103*77c1e3ccSAndroid Build Coastguard Worker       if (r == 0) {                 // filter is disabled
104*77c1e3ccSAndroid Build Coastguard Worker         sgrproj_mtable[i][j] = -1;  // mark invalid
105*77c1e3ccSAndroid Build Coastguard Worker       } else {                      // filter is enabled
106*77c1e3ccSAndroid Build Coastguard Worker         const int n = (2 * r + 1) * (2 * r + 1);
107*77c1e3ccSAndroid Build Coastguard Worker         const int n2e = n * n * e;
108*77c1e3ccSAndroid Build Coastguard Worker         assert(n2e != 0);
109*77c1e3ccSAndroid Build Coastguard Worker         sgrproj_mtable[i][j] = (((1 << SGRPROJ_MTABLE_BITS) + n2e / 2) / n2e);
110*77c1e3ccSAndroid Build Coastguard Worker       }
111*77c1e3ccSAndroid Build Coastguard Worker     }
112*77c1e3ccSAndroid Build Coastguard Worker   }
113*77c1e3ccSAndroid Build Coastguard Worker }
114*77c1e3ccSAndroid Build Coastguard Worker #endif
115*77c1e3ccSAndroid Build Coastguard Worker 
av1_loop_restoration_precal(void)116*77c1e3ccSAndroid Build Coastguard Worker void av1_loop_restoration_precal(void) {
117*77c1e3ccSAndroid Build Coastguard Worker #if 0
118*77c1e3ccSAndroid Build Coastguard Worker   GenSgrprojVtable();
119*77c1e3ccSAndroid Build Coastguard Worker #endif
120*77c1e3ccSAndroid Build Coastguard Worker }
121*77c1e3ccSAndroid Build Coastguard Worker 
extend_frame_lowbd(uint8_t * data,int width,int height,ptrdiff_t stride,int border_horz,int border_vert)122*77c1e3ccSAndroid Build Coastguard Worker static void extend_frame_lowbd(uint8_t *data, int width, int height,
123*77c1e3ccSAndroid Build Coastguard Worker                                ptrdiff_t stride, int border_horz,
124*77c1e3ccSAndroid Build Coastguard Worker                                int border_vert) {
125*77c1e3ccSAndroid Build Coastguard Worker   uint8_t *data_p;
126*77c1e3ccSAndroid Build Coastguard Worker   int i;
127*77c1e3ccSAndroid Build Coastguard Worker   for (i = 0; i < height; ++i) {
128*77c1e3ccSAndroid Build Coastguard Worker     data_p = data + i * stride;
129*77c1e3ccSAndroid Build Coastguard Worker     memset(data_p - border_horz, data_p[0], border_horz);
130*77c1e3ccSAndroid Build Coastguard Worker     memset(data_p + width, data_p[width - 1], border_horz);
131*77c1e3ccSAndroid Build Coastguard Worker   }
132*77c1e3ccSAndroid Build Coastguard Worker   data_p = data - border_horz;
133*77c1e3ccSAndroid Build Coastguard Worker   for (i = -border_vert; i < 0; ++i) {
134*77c1e3ccSAndroid Build Coastguard Worker     memcpy(data_p + i * stride, data_p, width + 2 * border_horz);
135*77c1e3ccSAndroid Build Coastguard Worker   }
136*77c1e3ccSAndroid Build Coastguard Worker   for (i = height; i < height + border_vert; ++i) {
137*77c1e3ccSAndroid Build Coastguard Worker     memcpy(data_p + i * stride, data_p + (height - 1) * stride,
138*77c1e3ccSAndroid Build Coastguard Worker            width + 2 * border_horz);
139*77c1e3ccSAndroid Build Coastguard Worker   }
140*77c1e3ccSAndroid Build Coastguard Worker }
141*77c1e3ccSAndroid Build Coastguard Worker 
142*77c1e3ccSAndroid Build Coastguard Worker #if CONFIG_AV1_HIGHBITDEPTH
extend_frame_highbd(uint16_t * data,int width,int height,ptrdiff_t stride,int border_horz,int border_vert)143*77c1e3ccSAndroid Build Coastguard Worker static void extend_frame_highbd(uint16_t *data, int width, int height,
144*77c1e3ccSAndroid Build Coastguard Worker                                 ptrdiff_t stride, int border_horz,
145*77c1e3ccSAndroid Build Coastguard Worker                                 int border_vert) {
146*77c1e3ccSAndroid Build Coastguard Worker   uint16_t *data_p;
147*77c1e3ccSAndroid Build Coastguard Worker   int i, j;
148*77c1e3ccSAndroid Build Coastguard Worker   for (i = 0; i < height; ++i) {
149*77c1e3ccSAndroid Build Coastguard Worker     data_p = data + i * stride;
150*77c1e3ccSAndroid Build Coastguard Worker     for (j = -border_horz; j < 0; ++j) data_p[j] = data_p[0];
151*77c1e3ccSAndroid Build Coastguard Worker     for (j = width; j < width + border_horz; ++j) data_p[j] = data_p[width - 1];
152*77c1e3ccSAndroid Build Coastguard Worker   }
153*77c1e3ccSAndroid Build Coastguard Worker   data_p = data - border_horz;
154*77c1e3ccSAndroid Build Coastguard Worker   for (i = -border_vert; i < 0; ++i) {
155*77c1e3ccSAndroid Build Coastguard Worker     memcpy(data_p + i * stride, data_p,
156*77c1e3ccSAndroid Build Coastguard Worker            (width + 2 * border_horz) * sizeof(uint16_t));
157*77c1e3ccSAndroid Build Coastguard Worker   }
158*77c1e3ccSAndroid Build Coastguard Worker   for (i = height; i < height + border_vert; ++i) {
159*77c1e3ccSAndroid Build Coastguard Worker     memcpy(data_p + i * stride, data_p + (height - 1) * stride,
160*77c1e3ccSAndroid Build Coastguard Worker            (width + 2 * border_horz) * sizeof(uint16_t));
161*77c1e3ccSAndroid Build Coastguard Worker   }
162*77c1e3ccSAndroid Build Coastguard Worker }
163*77c1e3ccSAndroid Build Coastguard Worker 
copy_rest_unit_highbd(int width,int height,const uint16_t * src,int src_stride,uint16_t * dst,int dst_stride)164*77c1e3ccSAndroid Build Coastguard Worker static void copy_rest_unit_highbd(int width, int height, const uint16_t *src,
165*77c1e3ccSAndroid Build Coastguard Worker                                   int src_stride, uint16_t *dst,
166*77c1e3ccSAndroid Build Coastguard Worker                                   int dst_stride) {
167*77c1e3ccSAndroid Build Coastguard Worker   for (int i = 0; i < height; ++i)
168*77c1e3ccSAndroid Build Coastguard Worker     memcpy(dst + i * dst_stride, src + i * src_stride, width * sizeof(*dst));
169*77c1e3ccSAndroid Build Coastguard Worker }
170*77c1e3ccSAndroid Build Coastguard Worker #endif
171*77c1e3ccSAndroid Build Coastguard Worker 
av1_extend_frame(uint8_t * data,int width,int height,int stride,int border_horz,int border_vert,int highbd)172*77c1e3ccSAndroid Build Coastguard Worker void av1_extend_frame(uint8_t *data, int width, int height, int stride,
173*77c1e3ccSAndroid Build Coastguard Worker                       int border_horz, int border_vert, int highbd) {
174*77c1e3ccSAndroid Build Coastguard Worker #if CONFIG_AV1_HIGHBITDEPTH
175*77c1e3ccSAndroid Build Coastguard Worker   if (highbd) {
176*77c1e3ccSAndroid Build Coastguard Worker     extend_frame_highbd(CONVERT_TO_SHORTPTR(data), width, height, stride,
177*77c1e3ccSAndroid Build Coastguard Worker                         border_horz, border_vert);
178*77c1e3ccSAndroid Build Coastguard Worker     return;
179*77c1e3ccSAndroid Build Coastguard Worker   }
180*77c1e3ccSAndroid Build Coastguard Worker #endif
181*77c1e3ccSAndroid Build Coastguard Worker   (void)highbd;
182*77c1e3ccSAndroid Build Coastguard Worker   extend_frame_lowbd(data, width, height, stride, border_horz, border_vert);
183*77c1e3ccSAndroid Build Coastguard Worker }
184*77c1e3ccSAndroid Build Coastguard Worker 
copy_rest_unit_lowbd(int width,int height,const uint8_t * src,int src_stride,uint8_t * dst,int dst_stride)185*77c1e3ccSAndroid Build Coastguard Worker static void copy_rest_unit_lowbd(int width, int height, const uint8_t *src,
186*77c1e3ccSAndroid Build Coastguard Worker                                  int src_stride, uint8_t *dst, int dst_stride) {
187*77c1e3ccSAndroid Build Coastguard Worker   for (int i = 0; i < height; ++i)
188*77c1e3ccSAndroid Build Coastguard Worker     memcpy(dst + i * dst_stride, src + i * src_stride, width);
189*77c1e3ccSAndroid Build Coastguard Worker }
190*77c1e3ccSAndroid Build Coastguard Worker 
copy_rest_unit(int width,int height,const uint8_t * src,int src_stride,uint8_t * dst,int dst_stride,int highbd)191*77c1e3ccSAndroid Build Coastguard Worker static void copy_rest_unit(int width, int height, const uint8_t *src,
192*77c1e3ccSAndroid Build Coastguard Worker                            int src_stride, uint8_t *dst, int dst_stride,
193*77c1e3ccSAndroid Build Coastguard Worker                            int highbd) {
194*77c1e3ccSAndroid Build Coastguard Worker #if CONFIG_AV1_HIGHBITDEPTH
195*77c1e3ccSAndroid Build Coastguard Worker   if (highbd) {
196*77c1e3ccSAndroid Build Coastguard Worker     copy_rest_unit_highbd(width, height, CONVERT_TO_SHORTPTR(src), src_stride,
197*77c1e3ccSAndroid Build Coastguard Worker                           CONVERT_TO_SHORTPTR(dst), dst_stride);
198*77c1e3ccSAndroid Build Coastguard Worker     return;
199*77c1e3ccSAndroid Build Coastguard Worker   }
200*77c1e3ccSAndroid Build Coastguard Worker #endif
201*77c1e3ccSAndroid Build Coastguard Worker   (void)highbd;
202*77c1e3ccSAndroid Build Coastguard Worker   copy_rest_unit_lowbd(width, height, src, src_stride, dst, dst_stride);
203*77c1e3ccSAndroid Build Coastguard Worker }
204*77c1e3ccSAndroid Build Coastguard Worker 
205*77c1e3ccSAndroid Build Coastguard Worker #define REAL_PTR(hbd, d) ((hbd) ? (uint8_t *)CONVERT_TO_SHORTPTR(d) : (d))
206*77c1e3ccSAndroid Build Coastguard Worker 
207*77c1e3ccSAndroid Build Coastguard Worker // With striped loop restoration, the filtering for each 64-pixel stripe gets
208*77c1e3ccSAndroid Build Coastguard Worker // most of its input from the output of CDEF (stored in data8), but we need to
209*77c1e3ccSAndroid Build Coastguard Worker // fill out a border of 3 pixels above/below the stripe according to the
210*77c1e3ccSAndroid Build Coastguard Worker // following rules:
211*77c1e3ccSAndroid Build Coastguard Worker //
212*77c1e3ccSAndroid Build Coastguard Worker // * At the top and bottom of the frame, we copy the outermost row of CDEF
213*77c1e3ccSAndroid Build Coastguard Worker //   pixels three times. This extension is done by a call to av1_extend_frame()
214*77c1e3ccSAndroid Build Coastguard Worker //   at the start of the loop restoration process, so the value of
215*77c1e3ccSAndroid Build Coastguard Worker //   copy_above/copy_below doesn't strictly matter.
216*77c1e3ccSAndroid Build Coastguard Worker //
217*77c1e3ccSAndroid Build Coastguard Worker // * All other boundaries are stripe boundaries within the frame. In that case,
218*77c1e3ccSAndroid Build Coastguard Worker //   we take 2 rows of deblocked pixels and extend them to 3 rows of context.
get_stripe_boundary_info(const RestorationTileLimits * limits,int plane_w,int plane_h,int ss_y,int * copy_above,int * copy_below)219*77c1e3ccSAndroid Build Coastguard Worker static void get_stripe_boundary_info(const RestorationTileLimits *limits,
220*77c1e3ccSAndroid Build Coastguard Worker                                      int plane_w, int plane_h, int ss_y,
221*77c1e3ccSAndroid Build Coastguard Worker                                      int *copy_above, int *copy_below) {
222*77c1e3ccSAndroid Build Coastguard Worker   (void)plane_w;
223*77c1e3ccSAndroid Build Coastguard Worker 
224*77c1e3ccSAndroid Build Coastguard Worker   *copy_above = 1;
225*77c1e3ccSAndroid Build Coastguard Worker   *copy_below = 1;
226*77c1e3ccSAndroid Build Coastguard Worker 
227*77c1e3ccSAndroid Build Coastguard Worker   const int full_stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
228*77c1e3ccSAndroid Build Coastguard Worker   const int runit_offset = RESTORATION_UNIT_OFFSET >> ss_y;
229*77c1e3ccSAndroid Build Coastguard Worker 
230*77c1e3ccSAndroid Build Coastguard Worker   const int first_stripe_in_plane = (limits->v_start == 0);
231*77c1e3ccSAndroid Build Coastguard Worker   const int this_stripe_height =
232*77c1e3ccSAndroid Build Coastguard Worker       full_stripe_height - (first_stripe_in_plane ? runit_offset : 0);
233*77c1e3ccSAndroid Build Coastguard Worker   const int last_stripe_in_plane =
234*77c1e3ccSAndroid Build Coastguard Worker       (limits->v_start + this_stripe_height >= plane_h);
235*77c1e3ccSAndroid Build Coastguard Worker 
236*77c1e3ccSAndroid Build Coastguard Worker   if (first_stripe_in_plane) *copy_above = 0;
237*77c1e3ccSAndroid Build Coastguard Worker   if (last_stripe_in_plane) *copy_below = 0;
238*77c1e3ccSAndroid Build Coastguard Worker }
239*77c1e3ccSAndroid Build Coastguard Worker 
240*77c1e3ccSAndroid Build Coastguard Worker // Overwrite the border pixels around a processing stripe so that the conditions
241*77c1e3ccSAndroid Build Coastguard Worker // listed above get_stripe_boundary_info() are preserved.
242*77c1e3ccSAndroid Build Coastguard Worker // We save the pixels which get overwritten into a temporary buffer, so that
243*77c1e3ccSAndroid Build Coastguard Worker // they can be restored by restore_processing_stripe_boundary() after we've
244*77c1e3ccSAndroid Build Coastguard Worker // processed the stripe.
245*77c1e3ccSAndroid Build Coastguard Worker //
246*77c1e3ccSAndroid Build Coastguard Worker // limits gives the rectangular limits of the remaining stripes for the current
247*77c1e3ccSAndroid Build Coastguard Worker // restoration unit. rsb is the stored stripe boundaries (taken from either
248*77c1e3ccSAndroid Build Coastguard Worker // deblock or CDEF output as necessary).
setup_processing_stripe_boundary(const RestorationTileLimits * limits,const RestorationStripeBoundaries * rsb,int rsb_row,int use_highbd,int h,uint8_t * data8,int data_stride,RestorationLineBuffers * rlbs,int copy_above,int copy_below,int opt)249*77c1e3ccSAndroid Build Coastguard Worker static void setup_processing_stripe_boundary(
250*77c1e3ccSAndroid Build Coastguard Worker     const RestorationTileLimits *limits, const RestorationStripeBoundaries *rsb,
251*77c1e3ccSAndroid Build Coastguard Worker     int rsb_row, int use_highbd, int h, uint8_t *data8, int data_stride,
252*77c1e3ccSAndroid Build Coastguard Worker     RestorationLineBuffers *rlbs, int copy_above, int copy_below, int opt) {
253*77c1e3ccSAndroid Build Coastguard Worker   // Offsets within the line buffers. The buffer logically starts at column
254*77c1e3ccSAndroid Build Coastguard Worker   // -RESTORATION_EXTRA_HORZ so the 1st column (at x0 - RESTORATION_EXTRA_HORZ)
255*77c1e3ccSAndroid Build Coastguard Worker   // has column x0 in the buffer.
256*77c1e3ccSAndroid Build Coastguard Worker   const int buf_stride = rsb->stripe_boundary_stride;
257*77c1e3ccSAndroid Build Coastguard Worker   const int buf_x0_off = limits->h_start;
258*77c1e3ccSAndroid Build Coastguard Worker   const int line_width =
259*77c1e3ccSAndroid Build Coastguard Worker       (limits->h_end - limits->h_start) + 2 * RESTORATION_EXTRA_HORZ;
260*77c1e3ccSAndroid Build Coastguard Worker   const int line_size = line_width << use_highbd;
261*77c1e3ccSAndroid Build Coastguard Worker 
262*77c1e3ccSAndroid Build Coastguard Worker   const int data_x0 = limits->h_start - RESTORATION_EXTRA_HORZ;
263*77c1e3ccSAndroid Build Coastguard Worker 
264*77c1e3ccSAndroid Build Coastguard Worker   // Replace RESTORATION_BORDER pixels above the top of the stripe
265*77c1e3ccSAndroid Build Coastguard Worker   // We expand RESTORATION_CTX_VERT=2 lines from rsb->stripe_boundary_above
266*77c1e3ccSAndroid Build Coastguard Worker   // to fill RESTORATION_BORDER=3 lines of above pixels. This is done by
267*77c1e3ccSAndroid Build Coastguard Worker   // duplicating the topmost of the 2 lines (see the AOMMAX call when
268*77c1e3ccSAndroid Build Coastguard Worker   // calculating src_row, which gets the values 0, 0, 1 for i = -3, -2, -1).
269*77c1e3ccSAndroid Build Coastguard Worker   if (!opt) {
270*77c1e3ccSAndroid Build Coastguard Worker     if (copy_above) {
271*77c1e3ccSAndroid Build Coastguard Worker       uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride;
272*77c1e3ccSAndroid Build Coastguard Worker 
273*77c1e3ccSAndroid Build Coastguard Worker       for (int i = -RESTORATION_BORDER; i < 0; ++i) {
274*77c1e3ccSAndroid Build Coastguard Worker         const int buf_row = rsb_row + AOMMAX(i + RESTORATION_CTX_VERT, 0);
275*77c1e3ccSAndroid Build Coastguard Worker         const int buf_off = buf_x0_off + buf_row * buf_stride;
276*77c1e3ccSAndroid Build Coastguard Worker         const uint8_t *buf =
277*77c1e3ccSAndroid Build Coastguard Worker             rsb->stripe_boundary_above + (buf_off << use_highbd);
278*77c1e3ccSAndroid Build Coastguard Worker         uint8_t *dst8 = data8_tl + i * data_stride;
279*77c1e3ccSAndroid Build Coastguard Worker         // Save old pixels, then replace with data from stripe_boundary_above
280*77c1e3ccSAndroid Build Coastguard Worker         memcpy(rlbs->tmp_save_above[i + RESTORATION_BORDER],
281*77c1e3ccSAndroid Build Coastguard Worker                REAL_PTR(use_highbd, dst8), line_size);
282*77c1e3ccSAndroid Build Coastguard Worker         memcpy(REAL_PTR(use_highbd, dst8), buf, line_size);
283*77c1e3ccSAndroid Build Coastguard Worker       }
284*77c1e3ccSAndroid Build Coastguard Worker     }
285*77c1e3ccSAndroid Build Coastguard Worker 
286*77c1e3ccSAndroid Build Coastguard Worker     // Replace RESTORATION_BORDER pixels below the bottom of the stripe.
287*77c1e3ccSAndroid Build Coastguard Worker     // The second buffer row is repeated, so src_row gets the values 0, 1, 1
288*77c1e3ccSAndroid Build Coastguard Worker     // for i = 0, 1, 2.
289*77c1e3ccSAndroid Build Coastguard Worker     if (copy_below) {
290*77c1e3ccSAndroid Build Coastguard Worker       const int stripe_end = limits->v_start + h;
291*77c1e3ccSAndroid Build Coastguard Worker       uint8_t *data8_bl = data8 + data_x0 + stripe_end * data_stride;
292*77c1e3ccSAndroid Build Coastguard Worker 
293*77c1e3ccSAndroid Build Coastguard Worker       for (int i = 0; i < RESTORATION_BORDER; ++i) {
294*77c1e3ccSAndroid Build Coastguard Worker         const int buf_row = rsb_row + AOMMIN(i, RESTORATION_CTX_VERT - 1);
295*77c1e3ccSAndroid Build Coastguard Worker         const int buf_off = buf_x0_off + buf_row * buf_stride;
296*77c1e3ccSAndroid Build Coastguard Worker         const uint8_t *src =
297*77c1e3ccSAndroid Build Coastguard Worker             rsb->stripe_boundary_below + (buf_off << use_highbd);
298*77c1e3ccSAndroid Build Coastguard Worker 
299*77c1e3ccSAndroid Build Coastguard Worker         uint8_t *dst8 = data8_bl + i * data_stride;
300*77c1e3ccSAndroid Build Coastguard Worker         // Save old pixels, then replace with data from stripe_boundary_below
301*77c1e3ccSAndroid Build Coastguard Worker         memcpy(rlbs->tmp_save_below[i], REAL_PTR(use_highbd, dst8), line_size);
302*77c1e3ccSAndroid Build Coastguard Worker         memcpy(REAL_PTR(use_highbd, dst8), src, line_size);
303*77c1e3ccSAndroid Build Coastguard Worker       }
304*77c1e3ccSAndroid Build Coastguard Worker     }
305*77c1e3ccSAndroid Build Coastguard Worker   } else {
306*77c1e3ccSAndroid Build Coastguard Worker     if (copy_above) {
307*77c1e3ccSAndroid Build Coastguard Worker       uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride;
308*77c1e3ccSAndroid Build Coastguard Worker 
309*77c1e3ccSAndroid Build Coastguard Worker       // Only save and overwrite i=-RESTORATION_BORDER line.
310*77c1e3ccSAndroid Build Coastguard Worker       uint8_t *dst8 = data8_tl + (-RESTORATION_BORDER) * data_stride;
311*77c1e3ccSAndroid Build Coastguard Worker       // Save old pixels, then replace with data from stripe_boundary_above
312*77c1e3ccSAndroid Build Coastguard Worker       memcpy(rlbs->tmp_save_above[0], REAL_PTR(use_highbd, dst8), line_size);
313*77c1e3ccSAndroid Build Coastguard Worker       memcpy(REAL_PTR(use_highbd, dst8),
314*77c1e3ccSAndroid Build Coastguard Worker              REAL_PTR(use_highbd,
315*77c1e3ccSAndroid Build Coastguard Worker                       data8_tl + (-RESTORATION_BORDER + 1) * data_stride),
316*77c1e3ccSAndroid Build Coastguard Worker              line_size);
317*77c1e3ccSAndroid Build Coastguard Worker     }
318*77c1e3ccSAndroid Build Coastguard Worker 
319*77c1e3ccSAndroid Build Coastguard Worker     if (copy_below) {
320*77c1e3ccSAndroid Build Coastguard Worker       const int stripe_end = limits->v_start + h;
321*77c1e3ccSAndroid Build Coastguard Worker       uint8_t *data8_bl = data8 + data_x0 + stripe_end * data_stride;
322*77c1e3ccSAndroid Build Coastguard Worker 
323*77c1e3ccSAndroid Build Coastguard Worker       // Only save and overwrite i=2 line.
324*77c1e3ccSAndroid Build Coastguard Worker       uint8_t *dst8 = data8_bl + 2 * data_stride;
325*77c1e3ccSAndroid Build Coastguard Worker       // Save old pixels, then replace with data from stripe_boundary_below
326*77c1e3ccSAndroid Build Coastguard Worker       memcpy(rlbs->tmp_save_below[2], REAL_PTR(use_highbd, dst8), line_size);
327*77c1e3ccSAndroid Build Coastguard Worker       memcpy(REAL_PTR(use_highbd, dst8),
328*77c1e3ccSAndroid Build Coastguard Worker              REAL_PTR(use_highbd, data8_bl + (2 - 1) * data_stride), line_size);
329*77c1e3ccSAndroid Build Coastguard Worker     }
330*77c1e3ccSAndroid Build Coastguard Worker   }
331*77c1e3ccSAndroid Build Coastguard Worker }
332*77c1e3ccSAndroid Build Coastguard Worker 
333*77c1e3ccSAndroid Build Coastguard Worker // Once a processing stripe is finished, this function sets the boundary
334*77c1e3ccSAndroid Build Coastguard Worker // pixels which were overwritten by setup_processing_stripe_boundary()
335*77c1e3ccSAndroid Build Coastguard Worker // back to their original values
restore_processing_stripe_boundary(const RestorationTileLimits * limits,const RestorationLineBuffers * rlbs,int use_highbd,int h,uint8_t * data8,int data_stride,int copy_above,int copy_below,int opt)336*77c1e3ccSAndroid Build Coastguard Worker static void restore_processing_stripe_boundary(
337*77c1e3ccSAndroid Build Coastguard Worker     const RestorationTileLimits *limits, const RestorationLineBuffers *rlbs,
338*77c1e3ccSAndroid Build Coastguard Worker     int use_highbd, int h, uint8_t *data8, int data_stride, int copy_above,
339*77c1e3ccSAndroid Build Coastguard Worker     int copy_below, int opt) {
340*77c1e3ccSAndroid Build Coastguard Worker   const int line_width =
341*77c1e3ccSAndroid Build Coastguard Worker       (limits->h_end - limits->h_start) + 2 * RESTORATION_EXTRA_HORZ;
342*77c1e3ccSAndroid Build Coastguard Worker   const int line_size = line_width << use_highbd;
343*77c1e3ccSAndroid Build Coastguard Worker 
344*77c1e3ccSAndroid Build Coastguard Worker   const int data_x0 = limits->h_start - RESTORATION_EXTRA_HORZ;
345*77c1e3ccSAndroid Build Coastguard Worker 
346*77c1e3ccSAndroid Build Coastguard Worker   if (!opt) {
347*77c1e3ccSAndroid Build Coastguard Worker     if (copy_above) {
348*77c1e3ccSAndroid Build Coastguard Worker       uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride;
349*77c1e3ccSAndroid Build Coastguard Worker       for (int i = -RESTORATION_BORDER; i < 0; ++i) {
350*77c1e3ccSAndroid Build Coastguard Worker         uint8_t *dst8 = data8_tl + i * data_stride;
351*77c1e3ccSAndroid Build Coastguard Worker         memcpy(REAL_PTR(use_highbd, dst8),
352*77c1e3ccSAndroid Build Coastguard Worker                rlbs->tmp_save_above[i + RESTORATION_BORDER], line_size);
353*77c1e3ccSAndroid Build Coastguard Worker       }
354*77c1e3ccSAndroid Build Coastguard Worker     }
355*77c1e3ccSAndroid Build Coastguard Worker 
356*77c1e3ccSAndroid Build Coastguard Worker     if (copy_below) {
357*77c1e3ccSAndroid Build Coastguard Worker       const int stripe_bottom = limits->v_start + h;
358*77c1e3ccSAndroid Build Coastguard Worker       uint8_t *data8_bl = data8 + data_x0 + stripe_bottom * data_stride;
359*77c1e3ccSAndroid Build Coastguard Worker 
360*77c1e3ccSAndroid Build Coastguard Worker       for (int i = 0; i < RESTORATION_BORDER; ++i) {
361*77c1e3ccSAndroid Build Coastguard Worker         if (stripe_bottom + i >= limits->v_end + RESTORATION_BORDER) break;
362*77c1e3ccSAndroid Build Coastguard Worker 
363*77c1e3ccSAndroid Build Coastguard Worker         uint8_t *dst8 = data8_bl + i * data_stride;
364*77c1e3ccSAndroid Build Coastguard Worker         memcpy(REAL_PTR(use_highbd, dst8), rlbs->tmp_save_below[i], line_size);
365*77c1e3ccSAndroid Build Coastguard Worker       }
366*77c1e3ccSAndroid Build Coastguard Worker     }
367*77c1e3ccSAndroid Build Coastguard Worker   } else {
368*77c1e3ccSAndroid Build Coastguard Worker     if (copy_above) {
369*77c1e3ccSAndroid Build Coastguard Worker       uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride;
370*77c1e3ccSAndroid Build Coastguard Worker 
371*77c1e3ccSAndroid Build Coastguard Worker       // Only restore i=-RESTORATION_BORDER line.
372*77c1e3ccSAndroid Build Coastguard Worker       uint8_t *dst8 = data8_tl + (-RESTORATION_BORDER) * data_stride;
373*77c1e3ccSAndroid Build Coastguard Worker       memcpy(REAL_PTR(use_highbd, dst8), rlbs->tmp_save_above[0], line_size);
374*77c1e3ccSAndroid Build Coastguard Worker     }
375*77c1e3ccSAndroid Build Coastguard Worker 
376*77c1e3ccSAndroid Build Coastguard Worker     if (copy_below) {
377*77c1e3ccSAndroid Build Coastguard Worker       const int stripe_bottom = limits->v_start + h;
378*77c1e3ccSAndroid Build Coastguard Worker       uint8_t *data8_bl = data8 + data_x0 + stripe_bottom * data_stride;
379*77c1e3ccSAndroid Build Coastguard Worker 
380*77c1e3ccSAndroid Build Coastguard Worker       // Only restore i=2 line.
381*77c1e3ccSAndroid Build Coastguard Worker       if (stripe_bottom + 2 < limits->v_end + RESTORATION_BORDER) {
382*77c1e3ccSAndroid Build Coastguard Worker         uint8_t *dst8 = data8_bl + 2 * data_stride;
383*77c1e3ccSAndroid Build Coastguard Worker         memcpy(REAL_PTR(use_highbd, dst8), rlbs->tmp_save_below[2], line_size);
384*77c1e3ccSAndroid Build Coastguard Worker       }
385*77c1e3ccSAndroid Build Coastguard Worker     }
386*77c1e3ccSAndroid Build Coastguard Worker   }
387*77c1e3ccSAndroid Build Coastguard Worker }
388*77c1e3ccSAndroid Build Coastguard Worker 
wiener_filter_stripe(const RestorationUnitInfo * rui,int stripe_width,int stripe_height,int procunit_width,const uint8_t * src,int src_stride,uint8_t * dst,int dst_stride,int32_t * tmpbuf,int bit_depth,struct aom_internal_error_info * error_info)389*77c1e3ccSAndroid Build Coastguard Worker static void wiener_filter_stripe(const RestorationUnitInfo *rui,
390*77c1e3ccSAndroid Build Coastguard Worker                                  int stripe_width, int stripe_height,
391*77c1e3ccSAndroid Build Coastguard Worker                                  int procunit_width, const uint8_t *src,
392*77c1e3ccSAndroid Build Coastguard Worker                                  int src_stride, uint8_t *dst, int dst_stride,
393*77c1e3ccSAndroid Build Coastguard Worker                                  int32_t *tmpbuf, int bit_depth,
394*77c1e3ccSAndroid Build Coastguard Worker                                  struct aom_internal_error_info *error_info) {
395*77c1e3ccSAndroid Build Coastguard Worker   (void)tmpbuf;
396*77c1e3ccSAndroid Build Coastguard Worker   (void)bit_depth;
397*77c1e3ccSAndroid Build Coastguard Worker   (void)error_info;
398*77c1e3ccSAndroid Build Coastguard Worker   assert(bit_depth == 8);
399*77c1e3ccSAndroid Build Coastguard Worker   const WienerConvolveParams conv_params = get_conv_params_wiener(8);
400*77c1e3ccSAndroid Build Coastguard Worker 
401*77c1e3ccSAndroid Build Coastguard Worker   for (int j = 0; j < stripe_width; j += procunit_width) {
402*77c1e3ccSAndroid Build Coastguard Worker     int w = AOMMIN(procunit_width, (stripe_width - j + 15) & ~15);
403*77c1e3ccSAndroid Build Coastguard Worker     const uint8_t *src_p = src + j;
404*77c1e3ccSAndroid Build Coastguard Worker     uint8_t *dst_p = dst + j;
405*77c1e3ccSAndroid Build Coastguard Worker     av1_wiener_convolve_add_src(
406*77c1e3ccSAndroid Build Coastguard Worker         src_p, src_stride, dst_p, dst_stride, rui->wiener_info.hfilter, 16,
407*77c1e3ccSAndroid Build Coastguard Worker         rui->wiener_info.vfilter, 16, w, stripe_height, &conv_params);
408*77c1e3ccSAndroid Build Coastguard Worker   }
409*77c1e3ccSAndroid Build Coastguard Worker }
410*77c1e3ccSAndroid Build Coastguard Worker 
411*77c1e3ccSAndroid Build Coastguard Worker /* Calculate windowed sums (if sqr=0) or sums of squares (if sqr=1)
412*77c1e3ccSAndroid Build Coastguard Worker    over the input. The window is of size (2r + 1)x(2r + 1), and we
413*77c1e3ccSAndroid Build Coastguard Worker    specialize to r = 1, 2, 3. A default function is used for r > 3.
414*77c1e3ccSAndroid Build Coastguard Worker 
415*77c1e3ccSAndroid Build Coastguard Worker    Each loop follows the same format: We keep a window's worth of input
416*77c1e3ccSAndroid Build Coastguard Worker    in individual variables and select data out of that as appropriate.
417*77c1e3ccSAndroid Build Coastguard Worker */
boxsum1(int32_t * src,int width,int height,int src_stride,int sqr,int32_t * dst,int dst_stride)418*77c1e3ccSAndroid Build Coastguard Worker static void boxsum1(int32_t *src, int width, int height, int src_stride,
419*77c1e3ccSAndroid Build Coastguard Worker                     int sqr, int32_t *dst, int dst_stride) {
420*77c1e3ccSAndroid Build Coastguard Worker   int i, j, a, b, c;
421*77c1e3ccSAndroid Build Coastguard Worker   assert(width > 2 * SGRPROJ_BORDER_HORZ);
422*77c1e3ccSAndroid Build Coastguard Worker   assert(height > 2 * SGRPROJ_BORDER_VERT);
423*77c1e3ccSAndroid Build Coastguard Worker 
424*77c1e3ccSAndroid Build Coastguard Worker   // Vertical sum over 3-pixel regions, from src into dst.
425*77c1e3ccSAndroid Build Coastguard Worker   if (!sqr) {
426*77c1e3ccSAndroid Build Coastguard Worker     for (j = 0; j < width; ++j) {
427*77c1e3ccSAndroid Build Coastguard Worker       a = src[j];
428*77c1e3ccSAndroid Build Coastguard Worker       b = src[src_stride + j];
429*77c1e3ccSAndroid Build Coastguard Worker       c = src[2 * src_stride + j];
430*77c1e3ccSAndroid Build Coastguard Worker 
431*77c1e3ccSAndroid Build Coastguard Worker       dst[j] = a + b;
432*77c1e3ccSAndroid Build Coastguard Worker       for (i = 1; i < height - 2; ++i) {
433*77c1e3ccSAndroid Build Coastguard Worker         // Loop invariant: At the start of each iteration,
434*77c1e3ccSAndroid Build Coastguard Worker         // a = src[(i - 1) * src_stride + j]
435*77c1e3ccSAndroid Build Coastguard Worker         // b = src[(i    ) * src_stride + j]
436*77c1e3ccSAndroid Build Coastguard Worker         // c = src[(i + 1) * src_stride + j]
437*77c1e3ccSAndroid Build Coastguard Worker         dst[i * dst_stride + j] = a + b + c;
438*77c1e3ccSAndroid Build Coastguard Worker         a = b;
439*77c1e3ccSAndroid Build Coastguard Worker         b = c;
440*77c1e3ccSAndroid Build Coastguard Worker         c = src[(i + 2) * src_stride + j];
441*77c1e3ccSAndroid Build Coastguard Worker       }
442*77c1e3ccSAndroid Build Coastguard Worker       dst[i * dst_stride + j] = a + b + c;
443*77c1e3ccSAndroid Build Coastguard Worker       dst[(i + 1) * dst_stride + j] = b + c;
444*77c1e3ccSAndroid Build Coastguard Worker     }
445*77c1e3ccSAndroid Build Coastguard Worker   } else {
446*77c1e3ccSAndroid Build Coastguard Worker     for (j = 0; j < width; ++j) {
447*77c1e3ccSAndroid Build Coastguard Worker       a = src[j] * src[j];
448*77c1e3ccSAndroid Build Coastguard Worker       b = src[src_stride + j] * src[src_stride + j];
449*77c1e3ccSAndroid Build Coastguard Worker       c = src[2 * src_stride + j] * src[2 * src_stride + j];
450*77c1e3ccSAndroid Build Coastguard Worker 
451*77c1e3ccSAndroid Build Coastguard Worker       dst[j] = a + b;
452*77c1e3ccSAndroid Build Coastguard Worker       for (i = 1; i < height - 2; ++i) {
453*77c1e3ccSAndroid Build Coastguard Worker         dst[i * dst_stride + j] = a + b + c;
454*77c1e3ccSAndroid Build Coastguard Worker         a = b;
455*77c1e3ccSAndroid Build Coastguard Worker         b = c;
456*77c1e3ccSAndroid Build Coastguard Worker         c = src[(i + 2) * src_stride + j] * src[(i + 2) * src_stride + j];
457*77c1e3ccSAndroid Build Coastguard Worker       }
458*77c1e3ccSAndroid Build Coastguard Worker       dst[i * dst_stride + j] = a + b + c;
459*77c1e3ccSAndroid Build Coastguard Worker       dst[(i + 1) * dst_stride + j] = b + c;
460*77c1e3ccSAndroid Build Coastguard Worker     }
461*77c1e3ccSAndroid Build Coastguard Worker   }
462*77c1e3ccSAndroid Build Coastguard Worker 
463*77c1e3ccSAndroid Build Coastguard Worker   // Horizontal sum over 3-pixel regions of dst
464*77c1e3ccSAndroid Build Coastguard Worker   for (i = 0; i < height; ++i) {
465*77c1e3ccSAndroid Build Coastguard Worker     a = dst[i * dst_stride];
466*77c1e3ccSAndroid Build Coastguard Worker     b = dst[i * dst_stride + 1];
467*77c1e3ccSAndroid Build Coastguard Worker     c = dst[i * dst_stride + 2];
468*77c1e3ccSAndroid Build Coastguard Worker 
469*77c1e3ccSAndroid Build Coastguard Worker     dst[i * dst_stride] = a + b;
470*77c1e3ccSAndroid Build Coastguard Worker     for (j = 1; j < width - 2; ++j) {
471*77c1e3ccSAndroid Build Coastguard Worker       // Loop invariant: At the start of each iteration,
472*77c1e3ccSAndroid Build Coastguard Worker       // a = src[i * src_stride + (j - 1)]
473*77c1e3ccSAndroid Build Coastguard Worker       // b = src[i * src_stride + (j    )]
474*77c1e3ccSAndroid Build Coastguard Worker       // c = src[i * src_stride + (j + 1)]
475*77c1e3ccSAndroid Build Coastguard Worker       dst[i * dst_stride + j] = a + b + c;
476*77c1e3ccSAndroid Build Coastguard Worker       a = b;
477*77c1e3ccSAndroid Build Coastguard Worker       b = c;
478*77c1e3ccSAndroid Build Coastguard Worker       c = dst[i * dst_stride + (j + 2)];
479*77c1e3ccSAndroid Build Coastguard Worker     }
480*77c1e3ccSAndroid Build Coastguard Worker     dst[i * dst_stride + j] = a + b + c;
481*77c1e3ccSAndroid Build Coastguard Worker     dst[i * dst_stride + (j + 1)] = b + c;
482*77c1e3ccSAndroid Build Coastguard Worker   }
483*77c1e3ccSAndroid Build Coastguard Worker }
484*77c1e3ccSAndroid Build Coastguard Worker 
boxsum2(int32_t * src,int width,int height,int src_stride,int sqr,int32_t * dst,int dst_stride)485*77c1e3ccSAndroid Build Coastguard Worker static void boxsum2(int32_t *src, int width, int height, int src_stride,
486*77c1e3ccSAndroid Build Coastguard Worker                     int sqr, int32_t *dst, int dst_stride) {
487*77c1e3ccSAndroid Build Coastguard Worker   int i, j, a, b, c, d, e;
488*77c1e3ccSAndroid Build Coastguard Worker   assert(width > 2 * SGRPROJ_BORDER_HORZ);
489*77c1e3ccSAndroid Build Coastguard Worker   assert(height > 2 * SGRPROJ_BORDER_VERT);
490*77c1e3ccSAndroid Build Coastguard Worker 
491*77c1e3ccSAndroid Build Coastguard Worker   // Vertical sum over 5-pixel regions, from src into dst.
492*77c1e3ccSAndroid Build Coastguard Worker   if (!sqr) {
493*77c1e3ccSAndroid Build Coastguard Worker     for (j = 0; j < width; ++j) {
494*77c1e3ccSAndroid Build Coastguard Worker       a = src[j];
495*77c1e3ccSAndroid Build Coastguard Worker       b = src[src_stride + j];
496*77c1e3ccSAndroid Build Coastguard Worker       c = src[2 * src_stride + j];
497*77c1e3ccSAndroid Build Coastguard Worker       d = src[3 * src_stride + j];
498*77c1e3ccSAndroid Build Coastguard Worker       e = src[4 * src_stride + j];
499*77c1e3ccSAndroid Build Coastguard Worker 
500*77c1e3ccSAndroid Build Coastguard Worker       dst[j] = a + b + c;
501*77c1e3ccSAndroid Build Coastguard Worker       dst[dst_stride + j] = a + b + c + d;
502*77c1e3ccSAndroid Build Coastguard Worker       for (i = 2; i < height - 3; ++i) {
503*77c1e3ccSAndroid Build Coastguard Worker         // Loop invariant: At the start of each iteration,
504*77c1e3ccSAndroid Build Coastguard Worker         // a = src[(i - 2) * src_stride + j]
505*77c1e3ccSAndroid Build Coastguard Worker         // b = src[(i - 1) * src_stride + j]
506*77c1e3ccSAndroid Build Coastguard Worker         // c = src[(i    ) * src_stride + j]
507*77c1e3ccSAndroid Build Coastguard Worker         // d = src[(i + 1) * src_stride + j]
508*77c1e3ccSAndroid Build Coastguard Worker         // e = src[(i + 2) * src_stride + j]
509*77c1e3ccSAndroid Build Coastguard Worker         dst[i * dst_stride + j] = a + b + c + d + e;
510*77c1e3ccSAndroid Build Coastguard Worker         a = b;
511*77c1e3ccSAndroid Build Coastguard Worker         b = c;
512*77c1e3ccSAndroid Build Coastguard Worker         c = d;
513*77c1e3ccSAndroid Build Coastguard Worker         d = e;
514*77c1e3ccSAndroid Build Coastguard Worker         e = src[(i + 3) * src_stride + j];
515*77c1e3ccSAndroid Build Coastguard Worker       }
516*77c1e3ccSAndroid Build Coastguard Worker       dst[i * dst_stride + j] = a + b + c + d + e;
517*77c1e3ccSAndroid Build Coastguard Worker       dst[(i + 1) * dst_stride + j] = b + c + d + e;
518*77c1e3ccSAndroid Build Coastguard Worker       dst[(i + 2) * dst_stride + j] = c + d + e;
519*77c1e3ccSAndroid Build Coastguard Worker     }
520*77c1e3ccSAndroid Build Coastguard Worker   } else {
521*77c1e3ccSAndroid Build Coastguard Worker     for (j = 0; j < width; ++j) {
522*77c1e3ccSAndroid Build Coastguard Worker       a = src[j] * src[j];
523*77c1e3ccSAndroid Build Coastguard Worker       b = src[src_stride + j] * src[src_stride + j];
524*77c1e3ccSAndroid Build Coastguard Worker       c = src[2 * src_stride + j] * src[2 * src_stride + j];
525*77c1e3ccSAndroid Build Coastguard Worker       d = src[3 * src_stride + j] * src[3 * src_stride + j];
526*77c1e3ccSAndroid Build Coastguard Worker       e = src[4 * src_stride + j] * src[4 * src_stride + j];
527*77c1e3ccSAndroid Build Coastguard Worker 
528*77c1e3ccSAndroid Build Coastguard Worker       dst[j] = a + b + c;
529*77c1e3ccSAndroid Build Coastguard Worker       dst[dst_stride + j] = a + b + c + d;
530*77c1e3ccSAndroid Build Coastguard Worker       for (i = 2; i < height - 3; ++i) {
531*77c1e3ccSAndroid Build Coastguard Worker         dst[i * dst_stride + j] = a + b + c + d + e;
532*77c1e3ccSAndroid Build Coastguard Worker         a = b;
533*77c1e3ccSAndroid Build Coastguard Worker         b = c;
534*77c1e3ccSAndroid Build Coastguard Worker         c = d;
535*77c1e3ccSAndroid Build Coastguard Worker         d = e;
536*77c1e3ccSAndroid Build Coastguard Worker         e = src[(i + 3) * src_stride + j] * src[(i + 3) * src_stride + j];
537*77c1e3ccSAndroid Build Coastguard Worker       }
538*77c1e3ccSAndroid Build Coastguard Worker       dst[i * dst_stride + j] = a + b + c + d + e;
539*77c1e3ccSAndroid Build Coastguard Worker       dst[(i + 1) * dst_stride + j] = b + c + d + e;
540*77c1e3ccSAndroid Build Coastguard Worker       dst[(i + 2) * dst_stride + j] = c + d + e;
541*77c1e3ccSAndroid Build Coastguard Worker     }
542*77c1e3ccSAndroid Build Coastguard Worker   }
543*77c1e3ccSAndroid Build Coastguard Worker 
544*77c1e3ccSAndroid Build Coastguard Worker   // Horizontal sum over 5-pixel regions of dst
545*77c1e3ccSAndroid Build Coastguard Worker   for (i = 0; i < height; ++i) {
546*77c1e3ccSAndroid Build Coastguard Worker     a = dst[i * dst_stride];
547*77c1e3ccSAndroid Build Coastguard Worker     b = dst[i * dst_stride + 1];
548*77c1e3ccSAndroid Build Coastguard Worker     c = dst[i * dst_stride + 2];
549*77c1e3ccSAndroid Build Coastguard Worker     d = dst[i * dst_stride + 3];
550*77c1e3ccSAndroid Build Coastguard Worker     e = dst[i * dst_stride + 4];
551*77c1e3ccSAndroid Build Coastguard Worker 
552*77c1e3ccSAndroid Build Coastguard Worker     dst[i * dst_stride] = a + b + c;
553*77c1e3ccSAndroid Build Coastguard Worker     dst[i * dst_stride + 1] = a + b + c + d;
554*77c1e3ccSAndroid Build Coastguard Worker     for (j = 2; j < width - 3; ++j) {
555*77c1e3ccSAndroid Build Coastguard Worker       // Loop invariant: At the start of each iteration,
556*77c1e3ccSAndroid Build Coastguard Worker       // a = src[i * src_stride + (j - 2)]
557*77c1e3ccSAndroid Build Coastguard Worker       // b = src[i * src_stride + (j - 1)]
558*77c1e3ccSAndroid Build Coastguard Worker       // c = src[i * src_stride + (j    )]
559*77c1e3ccSAndroid Build Coastguard Worker       // d = src[i * src_stride + (j + 1)]
560*77c1e3ccSAndroid Build Coastguard Worker       // e = src[i * src_stride + (j + 2)]
561*77c1e3ccSAndroid Build Coastguard Worker       dst[i * dst_stride + j] = a + b + c + d + e;
562*77c1e3ccSAndroid Build Coastguard Worker       a = b;
563*77c1e3ccSAndroid Build Coastguard Worker       b = c;
564*77c1e3ccSAndroid Build Coastguard Worker       c = d;
565*77c1e3ccSAndroid Build Coastguard Worker       d = e;
566*77c1e3ccSAndroid Build Coastguard Worker       e = dst[i * dst_stride + (j + 3)];
567*77c1e3ccSAndroid Build Coastguard Worker     }
568*77c1e3ccSAndroid Build Coastguard Worker     dst[i * dst_stride + j] = a + b + c + d + e;
569*77c1e3ccSAndroid Build Coastguard Worker     dst[i * dst_stride + (j + 1)] = b + c + d + e;
570*77c1e3ccSAndroid Build Coastguard Worker     dst[i * dst_stride + (j + 2)] = c + d + e;
571*77c1e3ccSAndroid Build Coastguard Worker   }
572*77c1e3ccSAndroid Build Coastguard Worker }
573*77c1e3ccSAndroid Build Coastguard Worker 
boxsum(int32_t * src,int width,int height,int src_stride,int r,int sqr,int32_t * dst,int dst_stride)574*77c1e3ccSAndroid Build Coastguard Worker static void boxsum(int32_t *src, int width, int height, int src_stride, int r,
575*77c1e3ccSAndroid Build Coastguard Worker                    int sqr, int32_t *dst, int dst_stride) {
576*77c1e3ccSAndroid Build Coastguard Worker   if (r == 1)
577*77c1e3ccSAndroid Build Coastguard Worker     boxsum1(src, width, height, src_stride, sqr, dst, dst_stride);
578*77c1e3ccSAndroid Build Coastguard Worker   else if (r == 2)
579*77c1e3ccSAndroid Build Coastguard Worker     boxsum2(src, width, height, src_stride, sqr, dst, dst_stride);
580*77c1e3ccSAndroid Build Coastguard Worker   else
581*77c1e3ccSAndroid Build Coastguard Worker     assert(0 && "Invalid value of r in self-guided filter");
582*77c1e3ccSAndroid Build Coastguard Worker }
583*77c1e3ccSAndroid Build Coastguard Worker 
av1_decode_xq(const int * xqd,int * xq,const sgr_params_type * params)584*77c1e3ccSAndroid Build Coastguard Worker void av1_decode_xq(const int *xqd, int *xq, const sgr_params_type *params) {
585*77c1e3ccSAndroid Build Coastguard Worker   if (params->r[0] == 0) {
586*77c1e3ccSAndroid Build Coastguard Worker     xq[0] = 0;
587*77c1e3ccSAndroid Build Coastguard Worker     xq[1] = (1 << SGRPROJ_PRJ_BITS) - xqd[1];
588*77c1e3ccSAndroid Build Coastguard Worker   } else if (params->r[1] == 0) {
589*77c1e3ccSAndroid Build Coastguard Worker     xq[0] = xqd[0];
590*77c1e3ccSAndroid Build Coastguard Worker     xq[1] = 0;
591*77c1e3ccSAndroid Build Coastguard Worker   } else {
592*77c1e3ccSAndroid Build Coastguard Worker     xq[0] = xqd[0];
593*77c1e3ccSAndroid Build Coastguard Worker     xq[1] = (1 << SGRPROJ_PRJ_BITS) - xq[0] - xqd[1];
594*77c1e3ccSAndroid Build Coastguard Worker   }
595*77c1e3ccSAndroid Build Coastguard Worker }
596*77c1e3ccSAndroid Build Coastguard Worker 
597*77c1e3ccSAndroid Build Coastguard Worker const int32_t av1_x_by_xplus1[256] = {
598*77c1e3ccSAndroid Build Coastguard Worker   // Special case: Map 0 -> 1 (corresponding to a value of 1/256)
599*77c1e3ccSAndroid Build Coastguard Worker   // instead of 0. See comments in selfguided_restoration_internal() for why
600*77c1e3ccSAndroid Build Coastguard Worker   1,   128, 171, 192, 205, 213, 219, 224, 228, 230, 233, 235, 236, 238, 239,
601*77c1e3ccSAndroid Build Coastguard Worker   240, 241, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 247, 247,
602*77c1e3ccSAndroid Build Coastguard Worker   248, 248, 248, 248, 249, 249, 249, 249, 249, 250, 250, 250, 250, 250, 250,
603*77c1e3ccSAndroid Build Coastguard Worker   250, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 252, 252, 252, 252,
604*77c1e3ccSAndroid Build Coastguard Worker   252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 253, 253,
605*77c1e3ccSAndroid Build Coastguard Worker   253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253,
606*77c1e3ccSAndroid Build Coastguard Worker   253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 254, 254, 254,
607*77c1e3ccSAndroid Build Coastguard Worker   254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
608*77c1e3ccSAndroid Build Coastguard Worker   254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
609*77c1e3ccSAndroid Build Coastguard Worker   254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
610*77c1e3ccSAndroid Build Coastguard Worker   254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
611*77c1e3ccSAndroid Build Coastguard Worker   254, 254, 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
612*77c1e3ccSAndroid Build Coastguard Worker   255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
613*77c1e3ccSAndroid Build Coastguard Worker   255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
614*77c1e3ccSAndroid Build Coastguard Worker   255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
615*77c1e3ccSAndroid Build Coastguard Worker   255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
616*77c1e3ccSAndroid Build Coastguard Worker   255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
617*77c1e3ccSAndroid Build Coastguard Worker   256,
618*77c1e3ccSAndroid Build Coastguard Worker };
619*77c1e3ccSAndroid Build Coastguard Worker 
620*77c1e3ccSAndroid Build Coastguard Worker const int32_t av1_one_by_x[MAX_NELEM] = {
621*77c1e3ccSAndroid Build Coastguard Worker   4096, 2048, 1365, 1024, 819, 683, 585, 512, 455, 410, 372, 341, 315,
622*77c1e3ccSAndroid Build Coastguard Worker   293,  273,  256,  241,  228, 216, 205, 195, 186, 178, 171, 164,
623*77c1e3ccSAndroid Build Coastguard Worker };
624*77c1e3ccSAndroid Build Coastguard Worker 
calculate_intermediate_result(int32_t * dgd,int width,int height,int dgd_stride,int bit_depth,int sgr_params_idx,int radius_idx,int pass,int32_t * A,int32_t * B)625*77c1e3ccSAndroid Build Coastguard Worker static void calculate_intermediate_result(int32_t *dgd, int width, int height,
626*77c1e3ccSAndroid Build Coastguard Worker                                           int dgd_stride, int bit_depth,
627*77c1e3ccSAndroid Build Coastguard Worker                                           int sgr_params_idx, int radius_idx,
628*77c1e3ccSAndroid Build Coastguard Worker                                           int pass, int32_t *A, int32_t *B) {
629*77c1e3ccSAndroid Build Coastguard Worker   const sgr_params_type *const params = &av1_sgr_params[sgr_params_idx];
630*77c1e3ccSAndroid Build Coastguard Worker   const int r = params->r[radius_idx];
631*77c1e3ccSAndroid Build Coastguard Worker   const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
632*77c1e3ccSAndroid Build Coastguard Worker   const int height_ext = height + 2 * SGRPROJ_BORDER_VERT;
633*77c1e3ccSAndroid Build Coastguard Worker   // Adjusting the stride of A and B here appears to avoid bad cache effects,
634*77c1e3ccSAndroid Build Coastguard Worker   // leading to a significant speed improvement.
635*77c1e3ccSAndroid Build Coastguard Worker   // We also align the stride to a multiple of 16 bytes, for consistency
636*77c1e3ccSAndroid Build Coastguard Worker   // with the SIMD version of this function.
637*77c1e3ccSAndroid Build Coastguard Worker   int buf_stride = ((width_ext + 3) & ~3) + 16;
638*77c1e3ccSAndroid Build Coastguard Worker   const int step = pass == 0 ? 1 : 2;
639*77c1e3ccSAndroid Build Coastguard Worker   int i, j;
640*77c1e3ccSAndroid Build Coastguard Worker 
641*77c1e3ccSAndroid Build Coastguard Worker   assert(r <= MAX_RADIUS && "Need MAX_RADIUS >= r");
642*77c1e3ccSAndroid Build Coastguard Worker   assert(r <= SGRPROJ_BORDER_VERT - 1 && r <= SGRPROJ_BORDER_HORZ - 1 &&
643*77c1e3ccSAndroid Build Coastguard Worker          "Need SGRPROJ_BORDER_* >= r+1");
644*77c1e3ccSAndroid Build Coastguard Worker 
645*77c1e3ccSAndroid Build Coastguard Worker   boxsum(dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ,
646*77c1e3ccSAndroid Build Coastguard Worker          width_ext, height_ext, dgd_stride, r, 0, B, buf_stride);
647*77c1e3ccSAndroid Build Coastguard Worker   boxsum(dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ,
648*77c1e3ccSAndroid Build Coastguard Worker          width_ext, height_ext, dgd_stride, r, 1, A, buf_stride);
649*77c1e3ccSAndroid Build Coastguard Worker   A += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
650*77c1e3ccSAndroid Build Coastguard Worker   B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
651*77c1e3ccSAndroid Build Coastguard Worker   // Calculate the eventual A[] and B[] arrays. Include a 1-pixel border - ie,
652*77c1e3ccSAndroid Build Coastguard Worker   // for a 64x64 processing unit, we calculate 66x66 pixels of A[] and B[].
653*77c1e3ccSAndroid Build Coastguard Worker   for (i = -1; i < height + 1; i += step) {
654*77c1e3ccSAndroid Build Coastguard Worker     for (j = -1; j < width + 1; ++j) {
655*77c1e3ccSAndroid Build Coastguard Worker       const int k = i * buf_stride + j;
656*77c1e3ccSAndroid Build Coastguard Worker       const int n = (2 * r + 1) * (2 * r + 1);
657*77c1e3ccSAndroid Build Coastguard Worker 
658*77c1e3ccSAndroid Build Coastguard Worker       // a < 2^16 * n < 2^22 regardless of bit depth
659*77c1e3ccSAndroid Build Coastguard Worker       uint32_t a = ROUND_POWER_OF_TWO(A[k], 2 * (bit_depth - 8));
660*77c1e3ccSAndroid Build Coastguard Worker       // b < 2^8 * n < 2^14 regardless of bit depth
661*77c1e3ccSAndroid Build Coastguard Worker       uint32_t b = ROUND_POWER_OF_TWO(B[k], bit_depth - 8);
662*77c1e3ccSAndroid Build Coastguard Worker 
663*77c1e3ccSAndroid Build Coastguard Worker       // Each term in calculating p = a * n - b * b is < 2^16 * n^2 < 2^28,
664*77c1e3ccSAndroid Build Coastguard Worker       // and p itself satisfies p < 2^14 * n^2 < 2^26.
665*77c1e3ccSAndroid Build Coastguard Worker       // This bound on p is due to:
666*77c1e3ccSAndroid Build Coastguard Worker       // https://en.wikipedia.org/wiki/Popoviciu's_inequality_on_variances
667*77c1e3ccSAndroid Build Coastguard Worker       //
668*77c1e3ccSAndroid Build Coastguard Worker       // Note: Sometimes, in high bit depth, we can end up with a*n < b*b.
669*77c1e3ccSAndroid Build Coastguard Worker       // This is an artefact of rounding, and can only happen if all pixels
670*77c1e3ccSAndroid Build Coastguard Worker       // are (almost) identical, so in this case we saturate to p=0.
671*77c1e3ccSAndroid Build Coastguard Worker       uint32_t p = (a * n < b * b) ? 0 : a * n - b * b;
672*77c1e3ccSAndroid Build Coastguard Worker 
673*77c1e3ccSAndroid Build Coastguard Worker       const uint32_t s = params->s[radius_idx];
674*77c1e3ccSAndroid Build Coastguard Worker 
675*77c1e3ccSAndroid Build Coastguard Worker       // p * s < (2^14 * n^2) * round(2^20 / n^2 eps) < 2^34 / eps < 2^32
676*77c1e3ccSAndroid Build Coastguard Worker       // as long as eps >= 4. So p * s fits into a uint32_t, and z < 2^12
677*77c1e3ccSAndroid Build Coastguard Worker       // (this holds even after accounting for the rounding in s)
678*77c1e3ccSAndroid Build Coastguard Worker       const uint32_t z = ROUND_POWER_OF_TWO(p * s, SGRPROJ_MTABLE_BITS);
679*77c1e3ccSAndroid Build Coastguard Worker 
680*77c1e3ccSAndroid Build Coastguard Worker       // Note: We have to be quite careful about the value of A[k].
681*77c1e3ccSAndroid Build Coastguard Worker       // This is used as a blend factor between individual pixel values and the
682*77c1e3ccSAndroid Build Coastguard Worker       // local mean. So it logically has a range of [0, 256], including both
683*77c1e3ccSAndroid Build Coastguard Worker       // endpoints.
684*77c1e3ccSAndroid Build Coastguard Worker       //
685*77c1e3ccSAndroid Build Coastguard Worker       // This is a pain for hardware, as we'd like something which can be stored
686*77c1e3ccSAndroid Build Coastguard Worker       // in exactly 8 bits.
687*77c1e3ccSAndroid Build Coastguard Worker       // Further, in the calculation of B[k] below, if z == 0 and r == 2,
688*77c1e3ccSAndroid Build Coastguard Worker       // then A[k] "should be" 0. But then we can end up setting B[k] to a value
689*77c1e3ccSAndroid Build Coastguard Worker       // slightly above 2^(8 + bit depth), due to rounding in the value of
690*77c1e3ccSAndroid Build Coastguard Worker       // av1_one_by_x[25-1].
691*77c1e3ccSAndroid Build Coastguard Worker       //
692*77c1e3ccSAndroid Build Coastguard Worker       // Thus we saturate so that, when z == 0, A[k] is set to 1 instead of 0.
693*77c1e3ccSAndroid Build Coastguard Worker       // This fixes the above issues (256 - A[k] fits in a uint8, and we can't
694*77c1e3ccSAndroid Build Coastguard Worker       // overflow), without significantly affecting the final result: z == 0
695*77c1e3ccSAndroid Build Coastguard Worker       // implies that the image is essentially "flat", so the local mean and
696*77c1e3ccSAndroid Build Coastguard Worker       // individual pixel values are very similar.
697*77c1e3ccSAndroid Build Coastguard Worker       //
698*77c1e3ccSAndroid Build Coastguard Worker       // Note that saturating on the other side, ie. requring A[k] <= 255,
699*77c1e3ccSAndroid Build Coastguard Worker       // would be a bad idea, as that corresponds to the case where the image
700*77c1e3ccSAndroid Build Coastguard Worker       // is very variable, when we want to preserve the local pixel value as
701*77c1e3ccSAndroid Build Coastguard Worker       // much as possible.
702*77c1e3ccSAndroid Build Coastguard Worker       A[k] = av1_x_by_xplus1[AOMMIN(z, 255)];  // in range [1, 256]
703*77c1e3ccSAndroid Build Coastguard Worker 
704*77c1e3ccSAndroid Build Coastguard Worker       // SGRPROJ_SGR - A[k] < 2^8 (from above), B[k] < 2^(bit_depth) * n,
705*77c1e3ccSAndroid Build Coastguard Worker       // av1_one_by_x[n - 1] = round(2^12 / n)
706*77c1e3ccSAndroid Build Coastguard Worker       // => the product here is < 2^(20 + bit_depth) <= 2^32,
707*77c1e3ccSAndroid Build Coastguard Worker       // and B[k] is set to a value < 2^(8 + bit depth)
708*77c1e3ccSAndroid Build Coastguard Worker       // This holds even with the rounding in av1_one_by_x and in the overall
709*77c1e3ccSAndroid Build Coastguard Worker       // result, as long as SGRPROJ_SGR - A[k] is strictly less than 2^8.
710*77c1e3ccSAndroid Build Coastguard Worker       B[k] = (int32_t)ROUND_POWER_OF_TWO((uint32_t)(SGRPROJ_SGR - A[k]) *
711*77c1e3ccSAndroid Build Coastguard Worker                                              (uint32_t)B[k] *
712*77c1e3ccSAndroid Build Coastguard Worker                                              (uint32_t)av1_one_by_x[n - 1],
713*77c1e3ccSAndroid Build Coastguard Worker                                          SGRPROJ_RECIP_BITS);
714*77c1e3ccSAndroid Build Coastguard Worker     }
715*77c1e3ccSAndroid Build Coastguard Worker   }
716*77c1e3ccSAndroid Build Coastguard Worker }
717*77c1e3ccSAndroid Build Coastguard Worker 
selfguided_restoration_fast_internal(int32_t * dgd,int width,int height,int dgd_stride,int32_t * dst,int dst_stride,int bit_depth,int sgr_params_idx,int radius_idx)718*77c1e3ccSAndroid Build Coastguard Worker static void selfguided_restoration_fast_internal(
719*77c1e3ccSAndroid Build Coastguard Worker     int32_t *dgd, int width, int height, int dgd_stride, int32_t *dst,
720*77c1e3ccSAndroid Build Coastguard Worker     int dst_stride, int bit_depth, int sgr_params_idx, int radius_idx) {
721*77c1e3ccSAndroid Build Coastguard Worker   const sgr_params_type *const params = &av1_sgr_params[sgr_params_idx];
722*77c1e3ccSAndroid Build Coastguard Worker   const int r = params->r[radius_idx];
723*77c1e3ccSAndroid Build Coastguard Worker   const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
724*77c1e3ccSAndroid Build Coastguard Worker   // Adjusting the stride of A and B here appears to avoid bad cache effects,
725*77c1e3ccSAndroid Build Coastguard Worker   // leading to a significant speed improvement.
726*77c1e3ccSAndroid Build Coastguard Worker   // We also align the stride to a multiple of 16 bytes, for consistency
727*77c1e3ccSAndroid Build Coastguard Worker   // with the SIMD version of this function.
728*77c1e3ccSAndroid Build Coastguard Worker   int buf_stride = ((width_ext + 3) & ~3) + 16;
729*77c1e3ccSAndroid Build Coastguard Worker   int32_t A_[RESTORATION_PROC_UNIT_PELS];
730*77c1e3ccSAndroid Build Coastguard Worker   int32_t B_[RESTORATION_PROC_UNIT_PELS];
731*77c1e3ccSAndroid Build Coastguard Worker   int32_t *A = A_;
732*77c1e3ccSAndroid Build Coastguard Worker   int32_t *B = B_;
733*77c1e3ccSAndroid Build Coastguard Worker   int i, j;
734*77c1e3ccSAndroid Build Coastguard Worker   calculate_intermediate_result(dgd, width, height, dgd_stride, bit_depth,
735*77c1e3ccSAndroid Build Coastguard Worker                                 sgr_params_idx, radius_idx, 1, A, B);
736*77c1e3ccSAndroid Build Coastguard Worker   A += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
737*77c1e3ccSAndroid Build Coastguard Worker   B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
738*77c1e3ccSAndroid Build Coastguard Worker 
739*77c1e3ccSAndroid Build Coastguard Worker   // Use the A[] and B[] arrays to calculate the filtered image
740*77c1e3ccSAndroid Build Coastguard Worker   (void)r;
741*77c1e3ccSAndroid Build Coastguard Worker   assert(r == 2);
742*77c1e3ccSAndroid Build Coastguard Worker   for (i = 0; i < height; ++i) {
743*77c1e3ccSAndroid Build Coastguard Worker     if (!(i & 1)) {  // even row
744*77c1e3ccSAndroid Build Coastguard Worker       for (j = 0; j < width; ++j) {
745*77c1e3ccSAndroid Build Coastguard Worker         const int k = i * buf_stride + j;
746*77c1e3ccSAndroid Build Coastguard Worker         const int l = i * dgd_stride + j;
747*77c1e3ccSAndroid Build Coastguard Worker         const int m = i * dst_stride + j;
748*77c1e3ccSAndroid Build Coastguard Worker         const int nb = 5;
749*77c1e3ccSAndroid Build Coastguard Worker         const int32_t a = (A[k - buf_stride] + A[k + buf_stride]) * 6 +
750*77c1e3ccSAndroid Build Coastguard Worker                           (A[k - 1 - buf_stride] + A[k - 1 + buf_stride] +
751*77c1e3ccSAndroid Build Coastguard Worker                            A[k + 1 - buf_stride] + A[k + 1 + buf_stride]) *
752*77c1e3ccSAndroid Build Coastguard Worker                               5;
753*77c1e3ccSAndroid Build Coastguard Worker         const int32_t b = (B[k - buf_stride] + B[k + buf_stride]) * 6 +
754*77c1e3ccSAndroid Build Coastguard Worker                           (B[k - 1 - buf_stride] + B[k - 1 + buf_stride] +
755*77c1e3ccSAndroid Build Coastguard Worker                            B[k + 1 - buf_stride] + B[k + 1 + buf_stride]) *
756*77c1e3ccSAndroid Build Coastguard Worker                               5;
757*77c1e3ccSAndroid Build Coastguard Worker         const int32_t v = a * dgd[l] + b;
758*77c1e3ccSAndroid Build Coastguard Worker         dst[m] =
759*77c1e3ccSAndroid Build Coastguard Worker             ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
760*77c1e3ccSAndroid Build Coastguard Worker       }
761*77c1e3ccSAndroid Build Coastguard Worker     } else {  // odd row
762*77c1e3ccSAndroid Build Coastguard Worker       for (j = 0; j < width; ++j) {
763*77c1e3ccSAndroid Build Coastguard Worker         const int k = i * buf_stride + j;
764*77c1e3ccSAndroid Build Coastguard Worker         const int l = i * dgd_stride + j;
765*77c1e3ccSAndroid Build Coastguard Worker         const int m = i * dst_stride + j;
766*77c1e3ccSAndroid Build Coastguard Worker         const int nb = 4;
767*77c1e3ccSAndroid Build Coastguard Worker         const int32_t a = A[k] * 6 + (A[k - 1] + A[k + 1]) * 5;
768*77c1e3ccSAndroid Build Coastguard Worker         const int32_t b = B[k] * 6 + (B[k - 1] + B[k + 1]) * 5;
769*77c1e3ccSAndroid Build Coastguard Worker         const int32_t v = a * dgd[l] + b;
770*77c1e3ccSAndroid Build Coastguard Worker         dst[m] =
771*77c1e3ccSAndroid Build Coastguard Worker             ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
772*77c1e3ccSAndroid Build Coastguard Worker       }
773*77c1e3ccSAndroid Build Coastguard Worker     }
774*77c1e3ccSAndroid Build Coastguard Worker   }
775*77c1e3ccSAndroid Build Coastguard Worker }
776*77c1e3ccSAndroid Build Coastguard Worker 
selfguided_restoration_internal(int32_t * dgd,int width,int height,int dgd_stride,int32_t * dst,int dst_stride,int bit_depth,int sgr_params_idx,int radius_idx)777*77c1e3ccSAndroid Build Coastguard Worker static void selfguided_restoration_internal(int32_t *dgd, int width, int height,
778*77c1e3ccSAndroid Build Coastguard Worker                                             int dgd_stride, int32_t *dst,
779*77c1e3ccSAndroid Build Coastguard Worker                                             int dst_stride, int bit_depth,
780*77c1e3ccSAndroid Build Coastguard Worker                                             int sgr_params_idx,
781*77c1e3ccSAndroid Build Coastguard Worker                                             int radius_idx) {
782*77c1e3ccSAndroid Build Coastguard Worker   const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
783*77c1e3ccSAndroid Build Coastguard Worker   // Adjusting the stride of A and B here appears to avoid bad cache effects,
784*77c1e3ccSAndroid Build Coastguard Worker   // leading to a significant speed improvement.
785*77c1e3ccSAndroid Build Coastguard Worker   // We also align the stride to a multiple of 16 bytes, for consistency
786*77c1e3ccSAndroid Build Coastguard Worker   // with the SIMD version of this function.
787*77c1e3ccSAndroid Build Coastguard Worker   int buf_stride = ((width_ext + 3) & ~3) + 16;
788*77c1e3ccSAndroid Build Coastguard Worker   int32_t A_[RESTORATION_PROC_UNIT_PELS];
789*77c1e3ccSAndroid Build Coastguard Worker   int32_t B_[RESTORATION_PROC_UNIT_PELS];
790*77c1e3ccSAndroid Build Coastguard Worker   int32_t *A = A_;
791*77c1e3ccSAndroid Build Coastguard Worker   int32_t *B = B_;
792*77c1e3ccSAndroid Build Coastguard Worker   int i, j;
793*77c1e3ccSAndroid Build Coastguard Worker   calculate_intermediate_result(dgd, width, height, dgd_stride, bit_depth,
794*77c1e3ccSAndroid Build Coastguard Worker                                 sgr_params_idx, radius_idx, 0, A, B);
795*77c1e3ccSAndroid Build Coastguard Worker   A += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
796*77c1e3ccSAndroid Build Coastguard Worker   B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
797*77c1e3ccSAndroid Build Coastguard Worker 
798*77c1e3ccSAndroid Build Coastguard Worker   // Use the A[] and B[] arrays to calculate the filtered image
799*77c1e3ccSAndroid Build Coastguard Worker   for (i = 0; i < height; ++i) {
800*77c1e3ccSAndroid Build Coastguard Worker     for (j = 0; j < width; ++j) {
801*77c1e3ccSAndroid Build Coastguard Worker       const int k = i * buf_stride + j;
802*77c1e3ccSAndroid Build Coastguard Worker       const int l = i * dgd_stride + j;
803*77c1e3ccSAndroid Build Coastguard Worker       const int m = i * dst_stride + j;
804*77c1e3ccSAndroid Build Coastguard Worker       const int nb = 5;
805*77c1e3ccSAndroid Build Coastguard Worker       const int32_t a =
806*77c1e3ccSAndroid Build Coastguard Worker           (A[k] + A[k - 1] + A[k + 1] + A[k - buf_stride] + A[k + buf_stride]) *
807*77c1e3ccSAndroid Build Coastguard Worker               4 +
808*77c1e3ccSAndroid Build Coastguard Worker           (A[k - 1 - buf_stride] + A[k - 1 + buf_stride] +
809*77c1e3ccSAndroid Build Coastguard Worker            A[k + 1 - buf_stride] + A[k + 1 + buf_stride]) *
810*77c1e3ccSAndroid Build Coastguard Worker               3;
811*77c1e3ccSAndroid Build Coastguard Worker       const int32_t b =
812*77c1e3ccSAndroid Build Coastguard Worker           (B[k] + B[k - 1] + B[k + 1] + B[k - buf_stride] + B[k + buf_stride]) *
813*77c1e3ccSAndroid Build Coastguard Worker               4 +
814*77c1e3ccSAndroid Build Coastguard Worker           (B[k - 1 - buf_stride] + B[k - 1 + buf_stride] +
815*77c1e3ccSAndroid Build Coastguard Worker            B[k + 1 - buf_stride] + B[k + 1 + buf_stride]) *
816*77c1e3ccSAndroid Build Coastguard Worker               3;
817*77c1e3ccSAndroid Build Coastguard Worker       const int32_t v = a * dgd[l] + b;
818*77c1e3ccSAndroid Build Coastguard Worker       dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
819*77c1e3ccSAndroid Build Coastguard Worker     }
820*77c1e3ccSAndroid Build Coastguard Worker   }
821*77c1e3ccSAndroid Build Coastguard Worker }
822*77c1e3ccSAndroid Build Coastguard Worker 
av1_selfguided_restoration_c(const uint8_t * dgd8,int width,int height,int dgd_stride,int32_t * flt0,int32_t * flt1,int flt_stride,int sgr_params_idx,int bit_depth,int highbd)823*77c1e3ccSAndroid Build Coastguard Worker int av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
824*77c1e3ccSAndroid Build Coastguard Worker                                  int dgd_stride, int32_t *flt0, int32_t *flt1,
825*77c1e3ccSAndroid Build Coastguard Worker                                  int flt_stride, int sgr_params_idx,
826*77c1e3ccSAndroid Build Coastguard Worker                                  int bit_depth, int highbd) {
827*77c1e3ccSAndroid Build Coastguard Worker   int32_t dgd32_[RESTORATION_PROC_UNIT_PELS];
828*77c1e3ccSAndroid Build Coastguard Worker   const int dgd32_stride = width + 2 * SGRPROJ_BORDER_HORZ;
829*77c1e3ccSAndroid Build Coastguard Worker   int32_t *dgd32 =
830*77c1e3ccSAndroid Build Coastguard Worker       dgd32_ + dgd32_stride * SGRPROJ_BORDER_VERT + SGRPROJ_BORDER_HORZ;
831*77c1e3ccSAndroid Build Coastguard Worker 
832*77c1e3ccSAndroid Build Coastguard Worker   if (highbd) {
833*77c1e3ccSAndroid Build Coastguard Worker     const uint16_t *dgd16 = CONVERT_TO_SHORTPTR(dgd8);
834*77c1e3ccSAndroid Build Coastguard Worker     for (int i = -SGRPROJ_BORDER_VERT; i < height + SGRPROJ_BORDER_VERT; ++i) {
835*77c1e3ccSAndroid Build Coastguard Worker       for (int j = -SGRPROJ_BORDER_HORZ; j < width + SGRPROJ_BORDER_HORZ; ++j) {
836*77c1e3ccSAndroid Build Coastguard Worker         dgd32[i * dgd32_stride + j] = dgd16[i * dgd_stride + j];
837*77c1e3ccSAndroid Build Coastguard Worker       }
838*77c1e3ccSAndroid Build Coastguard Worker     }
839*77c1e3ccSAndroid Build Coastguard Worker   } else {
840*77c1e3ccSAndroid Build Coastguard Worker     for (int i = -SGRPROJ_BORDER_VERT; i < height + SGRPROJ_BORDER_VERT; ++i) {
841*77c1e3ccSAndroid Build Coastguard Worker       for (int j = -SGRPROJ_BORDER_HORZ; j < width + SGRPROJ_BORDER_HORZ; ++j) {
842*77c1e3ccSAndroid Build Coastguard Worker         dgd32[i * dgd32_stride + j] = dgd8[i * dgd_stride + j];
843*77c1e3ccSAndroid Build Coastguard Worker       }
844*77c1e3ccSAndroid Build Coastguard Worker     }
845*77c1e3ccSAndroid Build Coastguard Worker   }
846*77c1e3ccSAndroid Build Coastguard Worker 
847*77c1e3ccSAndroid Build Coastguard Worker   const sgr_params_type *const params = &av1_sgr_params[sgr_params_idx];
848*77c1e3ccSAndroid Build Coastguard Worker   // If params->r == 0 we skip the corresponding filter. We only allow one of
849*77c1e3ccSAndroid Build Coastguard Worker   // the radii to be 0, as having both equal to 0 would be equivalent to
850*77c1e3ccSAndroid Build Coastguard Worker   // skipping SGR entirely.
851*77c1e3ccSAndroid Build Coastguard Worker   assert(!(params->r[0] == 0 && params->r[1] == 0));
852*77c1e3ccSAndroid Build Coastguard Worker 
853*77c1e3ccSAndroid Build Coastguard Worker   if (params->r[0] > 0)
854*77c1e3ccSAndroid Build Coastguard Worker     selfguided_restoration_fast_internal(dgd32, width, height, dgd32_stride,
855*77c1e3ccSAndroid Build Coastguard Worker                                          flt0, flt_stride, bit_depth,
856*77c1e3ccSAndroid Build Coastguard Worker                                          sgr_params_idx, 0);
857*77c1e3ccSAndroid Build Coastguard Worker   if (params->r[1] > 0)
858*77c1e3ccSAndroid Build Coastguard Worker     selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt1,
859*77c1e3ccSAndroid Build Coastguard Worker                                     flt_stride, bit_depth, sgr_params_idx, 1);
860*77c1e3ccSAndroid Build Coastguard Worker   return 0;
861*77c1e3ccSAndroid Build Coastguard Worker }
862*77c1e3ccSAndroid Build Coastguard Worker 
av1_apply_selfguided_restoration_c(const uint8_t * dat8,int width,int height,int stride,int eps,const int * xqd,uint8_t * dst8,int dst_stride,int32_t * tmpbuf,int bit_depth,int highbd)863*77c1e3ccSAndroid Build Coastguard Worker int av1_apply_selfguided_restoration_c(const uint8_t *dat8, int width,
864*77c1e3ccSAndroid Build Coastguard Worker                                        int height, int stride, int eps,
865*77c1e3ccSAndroid Build Coastguard Worker                                        const int *xqd, uint8_t *dst8,
866*77c1e3ccSAndroid Build Coastguard Worker                                        int dst_stride, int32_t *tmpbuf,
867*77c1e3ccSAndroid Build Coastguard Worker                                        int bit_depth, int highbd) {
868*77c1e3ccSAndroid Build Coastguard Worker   int32_t *flt0 = tmpbuf;
869*77c1e3ccSAndroid Build Coastguard Worker   int32_t *flt1 = flt0 + RESTORATION_UNITPELS_MAX;
870*77c1e3ccSAndroid Build Coastguard Worker   assert(width * height <= RESTORATION_UNITPELS_MAX);
871*77c1e3ccSAndroid Build Coastguard Worker 
872*77c1e3ccSAndroid Build Coastguard Worker   const int ret = av1_selfguided_restoration_c(
873*77c1e3ccSAndroid Build Coastguard Worker       dat8, width, height, stride, flt0, flt1, width, eps, bit_depth, highbd);
874*77c1e3ccSAndroid Build Coastguard Worker   if (ret != 0) return ret;
875*77c1e3ccSAndroid Build Coastguard Worker   const sgr_params_type *const params = &av1_sgr_params[eps];
876*77c1e3ccSAndroid Build Coastguard Worker   int xq[2];
877*77c1e3ccSAndroid Build Coastguard Worker   av1_decode_xq(xqd, xq, params);
878*77c1e3ccSAndroid Build Coastguard Worker   for (int i = 0; i < height; ++i) {
879*77c1e3ccSAndroid Build Coastguard Worker     for (int j = 0; j < width; ++j) {
880*77c1e3ccSAndroid Build Coastguard Worker       const int k = i * width + j;
881*77c1e3ccSAndroid Build Coastguard Worker       uint8_t *dst8ij = dst8 + i * dst_stride + j;
882*77c1e3ccSAndroid Build Coastguard Worker       const uint8_t *dat8ij = dat8 + i * stride + j;
883*77c1e3ccSAndroid Build Coastguard Worker 
884*77c1e3ccSAndroid Build Coastguard Worker       const uint16_t pre_u = highbd ? *CONVERT_TO_SHORTPTR(dat8ij) : *dat8ij;
885*77c1e3ccSAndroid Build Coastguard Worker       const int32_t u = (int32_t)pre_u << SGRPROJ_RST_BITS;
886*77c1e3ccSAndroid Build Coastguard Worker       int32_t v = u << SGRPROJ_PRJ_BITS;
887*77c1e3ccSAndroid Build Coastguard Worker       // If params->r == 0 then we skipped the filtering in
888*77c1e3ccSAndroid Build Coastguard Worker       // av1_selfguided_restoration_c, i.e. flt[k] == u
889*77c1e3ccSAndroid Build Coastguard Worker       if (params->r[0] > 0) v += xq[0] * (flt0[k] - u);
890*77c1e3ccSAndroid Build Coastguard Worker       if (params->r[1] > 0) v += xq[1] * (flt1[k] - u);
891*77c1e3ccSAndroid Build Coastguard Worker       const int16_t w =
892*77c1e3ccSAndroid Build Coastguard Worker           (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
893*77c1e3ccSAndroid Build Coastguard Worker 
894*77c1e3ccSAndroid Build Coastguard Worker       const uint16_t out = clip_pixel_highbd(w, bit_depth);
895*77c1e3ccSAndroid Build Coastguard Worker       if (highbd)
896*77c1e3ccSAndroid Build Coastguard Worker         *CONVERT_TO_SHORTPTR(dst8ij) = out;
897*77c1e3ccSAndroid Build Coastguard Worker       else
898*77c1e3ccSAndroid Build Coastguard Worker         *dst8ij = (uint8_t)out;
899*77c1e3ccSAndroid Build Coastguard Worker     }
900*77c1e3ccSAndroid Build Coastguard Worker   }
901*77c1e3ccSAndroid Build Coastguard Worker   return 0;
902*77c1e3ccSAndroid Build Coastguard Worker }
903*77c1e3ccSAndroid Build Coastguard Worker 
sgrproj_filter_stripe(const RestorationUnitInfo * rui,int stripe_width,int stripe_height,int procunit_width,const uint8_t * src,int src_stride,uint8_t * dst,int dst_stride,int32_t * tmpbuf,int bit_depth,struct aom_internal_error_info * error_info)904*77c1e3ccSAndroid Build Coastguard Worker static void sgrproj_filter_stripe(const RestorationUnitInfo *rui,
905*77c1e3ccSAndroid Build Coastguard Worker                                   int stripe_width, int stripe_height,
906*77c1e3ccSAndroid Build Coastguard Worker                                   int procunit_width, const uint8_t *src,
907*77c1e3ccSAndroid Build Coastguard Worker                                   int src_stride, uint8_t *dst, int dst_stride,
908*77c1e3ccSAndroid Build Coastguard Worker                                   int32_t *tmpbuf, int bit_depth,
909*77c1e3ccSAndroid Build Coastguard Worker                                   struct aom_internal_error_info *error_info) {
910*77c1e3ccSAndroid Build Coastguard Worker   (void)bit_depth;
911*77c1e3ccSAndroid Build Coastguard Worker   assert(bit_depth == 8);
912*77c1e3ccSAndroid Build Coastguard Worker 
913*77c1e3ccSAndroid Build Coastguard Worker   for (int j = 0; j < stripe_width; j += procunit_width) {
914*77c1e3ccSAndroid Build Coastguard Worker     int w = AOMMIN(procunit_width, stripe_width - j);
915*77c1e3ccSAndroid Build Coastguard Worker     if (av1_apply_selfguided_restoration(
916*77c1e3ccSAndroid Build Coastguard Worker             src + j, w, stripe_height, src_stride, rui->sgrproj_info.ep,
917*77c1e3ccSAndroid Build Coastguard Worker             rui->sgrproj_info.xqd, dst + j, dst_stride, tmpbuf, bit_depth,
918*77c1e3ccSAndroid Build Coastguard Worker             0) != 0) {
919*77c1e3ccSAndroid Build Coastguard Worker       aom_internal_error(
920*77c1e3ccSAndroid Build Coastguard Worker           error_info, AOM_CODEC_MEM_ERROR,
921*77c1e3ccSAndroid Build Coastguard Worker           "Error allocating buffer in av1_apply_selfguided_restoration");
922*77c1e3ccSAndroid Build Coastguard Worker     }
923*77c1e3ccSAndroid Build Coastguard Worker   }
924*77c1e3ccSAndroid Build Coastguard Worker }
925*77c1e3ccSAndroid Build Coastguard Worker 
926*77c1e3ccSAndroid Build Coastguard Worker #if CONFIG_AV1_HIGHBITDEPTH
wiener_filter_stripe_highbd(const RestorationUnitInfo * rui,int stripe_width,int stripe_height,int procunit_width,const uint8_t * src8,int src_stride,uint8_t * dst8,int dst_stride,int32_t * tmpbuf,int bit_depth,struct aom_internal_error_info * error_info)927*77c1e3ccSAndroid Build Coastguard Worker static void wiener_filter_stripe_highbd(
928*77c1e3ccSAndroid Build Coastguard Worker     const RestorationUnitInfo *rui, int stripe_width, int stripe_height,
929*77c1e3ccSAndroid Build Coastguard Worker     int procunit_width, const uint8_t *src8, int src_stride, uint8_t *dst8,
930*77c1e3ccSAndroid Build Coastguard Worker     int dst_stride, int32_t *tmpbuf, int bit_depth,
931*77c1e3ccSAndroid Build Coastguard Worker     struct aom_internal_error_info *error_info) {
932*77c1e3ccSAndroid Build Coastguard Worker   (void)tmpbuf;
933*77c1e3ccSAndroid Build Coastguard Worker   (void)error_info;
934*77c1e3ccSAndroid Build Coastguard Worker   const WienerConvolveParams conv_params = get_conv_params_wiener(bit_depth);
935*77c1e3ccSAndroid Build Coastguard Worker 
936*77c1e3ccSAndroid Build Coastguard Worker   for (int j = 0; j < stripe_width; j += procunit_width) {
937*77c1e3ccSAndroid Build Coastguard Worker     int w = AOMMIN(procunit_width, (stripe_width - j + 15) & ~15);
938*77c1e3ccSAndroid Build Coastguard Worker     const uint8_t *src8_p = src8 + j;
939*77c1e3ccSAndroid Build Coastguard Worker     uint8_t *dst8_p = dst8 + j;
940*77c1e3ccSAndroid Build Coastguard Worker     av1_highbd_wiener_convolve_add_src(src8_p, src_stride, dst8_p, dst_stride,
941*77c1e3ccSAndroid Build Coastguard Worker                                        rui->wiener_info.hfilter, 16,
942*77c1e3ccSAndroid Build Coastguard Worker                                        rui->wiener_info.vfilter, 16, w,
943*77c1e3ccSAndroid Build Coastguard Worker                                        stripe_height, &conv_params, bit_depth);
944*77c1e3ccSAndroid Build Coastguard Worker   }
945*77c1e3ccSAndroid Build Coastguard Worker }
946*77c1e3ccSAndroid Build Coastguard Worker 
sgrproj_filter_stripe_highbd(const RestorationUnitInfo * rui,int stripe_width,int stripe_height,int procunit_width,const uint8_t * src8,int src_stride,uint8_t * dst8,int dst_stride,int32_t * tmpbuf,int bit_depth,struct aom_internal_error_info * error_info)947*77c1e3ccSAndroid Build Coastguard Worker static void sgrproj_filter_stripe_highbd(
948*77c1e3ccSAndroid Build Coastguard Worker     const RestorationUnitInfo *rui, int stripe_width, int stripe_height,
949*77c1e3ccSAndroid Build Coastguard Worker     int procunit_width, const uint8_t *src8, int src_stride, uint8_t *dst8,
950*77c1e3ccSAndroid Build Coastguard Worker     int dst_stride, int32_t *tmpbuf, int bit_depth,
951*77c1e3ccSAndroid Build Coastguard Worker     struct aom_internal_error_info *error_info) {
952*77c1e3ccSAndroid Build Coastguard Worker   for (int j = 0; j < stripe_width; j += procunit_width) {
953*77c1e3ccSAndroid Build Coastguard Worker     int w = AOMMIN(procunit_width, stripe_width - j);
954*77c1e3ccSAndroid Build Coastguard Worker     if (av1_apply_selfguided_restoration(
955*77c1e3ccSAndroid Build Coastguard Worker             src8 + j, w, stripe_height, src_stride, rui->sgrproj_info.ep,
956*77c1e3ccSAndroid Build Coastguard Worker             rui->sgrproj_info.xqd, dst8 + j, dst_stride, tmpbuf, bit_depth,
957*77c1e3ccSAndroid Build Coastguard Worker             1) != 0) {
958*77c1e3ccSAndroid Build Coastguard Worker       aom_internal_error(
959*77c1e3ccSAndroid Build Coastguard Worker           error_info, AOM_CODEC_MEM_ERROR,
960*77c1e3ccSAndroid Build Coastguard Worker           "Error allocating buffer in av1_apply_selfguided_restoration");
961*77c1e3ccSAndroid Build Coastguard Worker     }
962*77c1e3ccSAndroid Build Coastguard Worker   }
963*77c1e3ccSAndroid Build Coastguard Worker }
964*77c1e3ccSAndroid Build Coastguard Worker #endif  // CONFIG_AV1_HIGHBITDEPTH
965*77c1e3ccSAndroid Build Coastguard Worker 
966*77c1e3ccSAndroid Build Coastguard Worker typedef void (*stripe_filter_fun)(const RestorationUnitInfo *rui,
967*77c1e3ccSAndroid Build Coastguard Worker                                   int stripe_width, int stripe_height,
968*77c1e3ccSAndroid Build Coastguard Worker                                   int procunit_width, const uint8_t *src,
969*77c1e3ccSAndroid Build Coastguard Worker                                   int src_stride, uint8_t *dst, int dst_stride,
970*77c1e3ccSAndroid Build Coastguard Worker                                   int32_t *tmpbuf, int bit_depth,
971*77c1e3ccSAndroid Build Coastguard Worker                                   struct aom_internal_error_info *error_info);
972*77c1e3ccSAndroid Build Coastguard Worker 
973*77c1e3ccSAndroid Build Coastguard Worker #if CONFIG_AV1_HIGHBITDEPTH
974*77c1e3ccSAndroid Build Coastguard Worker #define NUM_STRIPE_FILTERS 4
975*77c1e3ccSAndroid Build Coastguard Worker static const stripe_filter_fun stripe_filters[NUM_STRIPE_FILTERS] = {
976*77c1e3ccSAndroid Build Coastguard Worker   wiener_filter_stripe, sgrproj_filter_stripe, wiener_filter_stripe_highbd,
977*77c1e3ccSAndroid Build Coastguard Worker   sgrproj_filter_stripe_highbd
978*77c1e3ccSAndroid Build Coastguard Worker };
979*77c1e3ccSAndroid Build Coastguard Worker #else
980*77c1e3ccSAndroid Build Coastguard Worker #define NUM_STRIPE_FILTERS 2
981*77c1e3ccSAndroid Build Coastguard Worker static const stripe_filter_fun stripe_filters[NUM_STRIPE_FILTERS] = {
982*77c1e3ccSAndroid Build Coastguard Worker   wiener_filter_stripe, sgrproj_filter_stripe
983*77c1e3ccSAndroid Build Coastguard Worker };
984*77c1e3ccSAndroid Build Coastguard Worker #endif  // CONFIG_AV1_HIGHBITDEPTH
985*77c1e3ccSAndroid Build Coastguard Worker 
986*77c1e3ccSAndroid Build Coastguard Worker // Filter one restoration unit
av1_loop_restoration_filter_unit(const RestorationTileLimits * limits,const RestorationUnitInfo * rui,const RestorationStripeBoundaries * rsb,RestorationLineBuffers * rlbs,int plane_w,int plane_h,int ss_x,int ss_y,int highbd,int bit_depth,uint8_t * data8,int stride,uint8_t * dst8,int dst_stride,int32_t * tmpbuf,int optimized_lr,struct aom_internal_error_info * error_info)987*77c1e3ccSAndroid Build Coastguard Worker void av1_loop_restoration_filter_unit(
988*77c1e3ccSAndroid Build Coastguard Worker     const RestorationTileLimits *limits, const RestorationUnitInfo *rui,
989*77c1e3ccSAndroid Build Coastguard Worker     const RestorationStripeBoundaries *rsb, RestorationLineBuffers *rlbs,
990*77c1e3ccSAndroid Build Coastguard Worker     int plane_w, int plane_h, int ss_x, int ss_y, int highbd, int bit_depth,
991*77c1e3ccSAndroid Build Coastguard Worker     uint8_t *data8, int stride, uint8_t *dst8, int dst_stride, int32_t *tmpbuf,
992*77c1e3ccSAndroid Build Coastguard Worker     int optimized_lr, struct aom_internal_error_info *error_info) {
993*77c1e3ccSAndroid Build Coastguard Worker   RestorationType unit_rtype = rui->restoration_type;
994*77c1e3ccSAndroid Build Coastguard Worker 
995*77c1e3ccSAndroid Build Coastguard Worker   int unit_h = limits->v_end - limits->v_start;
996*77c1e3ccSAndroid Build Coastguard Worker   int unit_w = limits->h_end - limits->h_start;
997*77c1e3ccSAndroid Build Coastguard Worker   uint8_t *data8_tl =
998*77c1e3ccSAndroid Build Coastguard Worker       data8 + limits->v_start * (ptrdiff_t)stride + limits->h_start;
999*77c1e3ccSAndroid Build Coastguard Worker   uint8_t *dst8_tl =
1000*77c1e3ccSAndroid Build Coastguard Worker       dst8 + limits->v_start * (ptrdiff_t)dst_stride + limits->h_start;
1001*77c1e3ccSAndroid Build Coastguard Worker 
1002*77c1e3ccSAndroid Build Coastguard Worker   if (unit_rtype == RESTORE_NONE) {
1003*77c1e3ccSAndroid Build Coastguard Worker     copy_rest_unit(unit_w, unit_h, data8_tl, stride, dst8_tl, dst_stride,
1004*77c1e3ccSAndroid Build Coastguard Worker                    highbd);
1005*77c1e3ccSAndroid Build Coastguard Worker     return;
1006*77c1e3ccSAndroid Build Coastguard Worker   }
1007*77c1e3ccSAndroid Build Coastguard Worker 
1008*77c1e3ccSAndroid Build Coastguard Worker   const int filter_idx = 2 * highbd + (unit_rtype == RESTORE_SGRPROJ);
1009*77c1e3ccSAndroid Build Coastguard Worker   assert(filter_idx < NUM_STRIPE_FILTERS);
1010*77c1e3ccSAndroid Build Coastguard Worker   const stripe_filter_fun stripe_filter = stripe_filters[filter_idx];
1011*77c1e3ccSAndroid Build Coastguard Worker 
1012*77c1e3ccSAndroid Build Coastguard Worker   const int procunit_width = RESTORATION_PROC_UNIT_SIZE >> ss_x;
1013*77c1e3ccSAndroid Build Coastguard Worker 
1014*77c1e3ccSAndroid Build Coastguard Worker   // Filter the whole image one stripe at a time
1015*77c1e3ccSAndroid Build Coastguard Worker   RestorationTileLimits remaining_stripes = *limits;
1016*77c1e3ccSAndroid Build Coastguard Worker   int i = 0;
1017*77c1e3ccSAndroid Build Coastguard Worker   while (i < unit_h) {
1018*77c1e3ccSAndroid Build Coastguard Worker     int copy_above, copy_below;
1019*77c1e3ccSAndroid Build Coastguard Worker     remaining_stripes.v_start = limits->v_start + i;
1020*77c1e3ccSAndroid Build Coastguard Worker 
1021*77c1e3ccSAndroid Build Coastguard Worker     get_stripe_boundary_info(&remaining_stripes, plane_w, plane_h, ss_y,
1022*77c1e3ccSAndroid Build Coastguard Worker                              &copy_above, &copy_below);
1023*77c1e3ccSAndroid Build Coastguard Worker 
1024*77c1e3ccSAndroid Build Coastguard Worker     const int full_stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
1025*77c1e3ccSAndroid Build Coastguard Worker     const int runit_offset = RESTORATION_UNIT_OFFSET >> ss_y;
1026*77c1e3ccSAndroid Build Coastguard Worker 
1027*77c1e3ccSAndroid Build Coastguard Worker     // Work out where this stripe's boundaries are within
1028*77c1e3ccSAndroid Build Coastguard Worker     // rsb->stripe_boundary_{above,below}
1029*77c1e3ccSAndroid Build Coastguard Worker     const int frame_stripe =
1030*77c1e3ccSAndroid Build Coastguard Worker         (remaining_stripes.v_start + runit_offset) / full_stripe_height;
1031*77c1e3ccSAndroid Build Coastguard Worker     const int rsb_row = RESTORATION_CTX_VERT * frame_stripe;
1032*77c1e3ccSAndroid Build Coastguard Worker 
1033*77c1e3ccSAndroid Build Coastguard Worker     // Calculate this stripe's height, based on two rules:
1034*77c1e3ccSAndroid Build Coastguard Worker     // * The topmost stripe in the frame is 8 luma pixels shorter than usual.
1035*77c1e3ccSAndroid Build Coastguard Worker     // * We can't extend past the end of the current restoration unit
1036*77c1e3ccSAndroid Build Coastguard Worker     const int nominal_stripe_height =
1037*77c1e3ccSAndroid Build Coastguard Worker         full_stripe_height - ((frame_stripe == 0) ? runit_offset : 0);
1038*77c1e3ccSAndroid Build Coastguard Worker     const int h = AOMMIN(nominal_stripe_height,
1039*77c1e3ccSAndroid Build Coastguard Worker                          remaining_stripes.v_end - remaining_stripes.v_start);
1040*77c1e3ccSAndroid Build Coastguard Worker 
1041*77c1e3ccSAndroid Build Coastguard Worker     setup_processing_stripe_boundary(&remaining_stripes, rsb, rsb_row, highbd,
1042*77c1e3ccSAndroid Build Coastguard Worker                                      h, data8, stride, rlbs, copy_above,
1043*77c1e3ccSAndroid Build Coastguard Worker                                      copy_below, optimized_lr);
1044*77c1e3ccSAndroid Build Coastguard Worker 
1045*77c1e3ccSAndroid Build Coastguard Worker     stripe_filter(rui, unit_w, h, procunit_width, data8_tl + i * stride, stride,
1046*77c1e3ccSAndroid Build Coastguard Worker                   dst8_tl + i * dst_stride, dst_stride, tmpbuf, bit_depth,
1047*77c1e3ccSAndroid Build Coastguard Worker                   error_info);
1048*77c1e3ccSAndroid Build Coastguard Worker 
1049*77c1e3ccSAndroid Build Coastguard Worker     restore_processing_stripe_boundary(&remaining_stripes, rlbs, highbd, h,
1050*77c1e3ccSAndroid Build Coastguard Worker                                        data8, stride, copy_above, copy_below,
1051*77c1e3ccSAndroid Build Coastguard Worker                                        optimized_lr);
1052*77c1e3ccSAndroid Build Coastguard Worker 
1053*77c1e3ccSAndroid Build Coastguard Worker     i += h;
1054*77c1e3ccSAndroid Build Coastguard Worker   }
1055*77c1e3ccSAndroid Build Coastguard Worker }
1056*77c1e3ccSAndroid Build Coastguard Worker 
filter_frame_on_unit(const RestorationTileLimits * limits,int rest_unit_idx,void * priv,int32_t * tmpbuf,RestorationLineBuffers * rlbs,struct aom_internal_error_info * error_info)1057*77c1e3ccSAndroid Build Coastguard Worker static void filter_frame_on_unit(const RestorationTileLimits *limits,
1058*77c1e3ccSAndroid Build Coastguard Worker                                  int rest_unit_idx, void *priv, int32_t *tmpbuf,
1059*77c1e3ccSAndroid Build Coastguard Worker                                  RestorationLineBuffers *rlbs,
1060*77c1e3ccSAndroid Build Coastguard Worker                                  struct aom_internal_error_info *error_info) {
1061*77c1e3ccSAndroid Build Coastguard Worker   FilterFrameCtxt *ctxt = (FilterFrameCtxt *)priv;
1062*77c1e3ccSAndroid Build Coastguard Worker   const RestorationInfo *rsi = ctxt->rsi;
1063*77c1e3ccSAndroid Build Coastguard Worker 
1064*77c1e3ccSAndroid Build Coastguard Worker   av1_loop_restoration_filter_unit(
1065*77c1e3ccSAndroid Build Coastguard Worker       limits, &rsi->unit_info[rest_unit_idx], &rsi->boundaries, rlbs,
1066*77c1e3ccSAndroid Build Coastguard Worker       ctxt->plane_w, ctxt->plane_h, ctxt->ss_x, ctxt->ss_y, ctxt->highbd,
1067*77c1e3ccSAndroid Build Coastguard Worker       ctxt->bit_depth, ctxt->data8, ctxt->data_stride, ctxt->dst8,
1068*77c1e3ccSAndroid Build Coastguard Worker       ctxt->dst_stride, tmpbuf, rsi->optimized_lr, error_info);
1069*77c1e3ccSAndroid Build Coastguard Worker }
1070*77c1e3ccSAndroid Build Coastguard Worker 
av1_loop_restoration_filter_frame_init(AV1LrStruct * lr_ctxt,YV12_BUFFER_CONFIG * frame,AV1_COMMON * cm,int optimized_lr,int num_planes)1071*77c1e3ccSAndroid Build Coastguard Worker void av1_loop_restoration_filter_frame_init(AV1LrStruct *lr_ctxt,
1072*77c1e3ccSAndroid Build Coastguard Worker                                             YV12_BUFFER_CONFIG *frame,
1073*77c1e3ccSAndroid Build Coastguard Worker                                             AV1_COMMON *cm, int optimized_lr,
1074*77c1e3ccSAndroid Build Coastguard Worker                                             int num_planes) {
1075*77c1e3ccSAndroid Build Coastguard Worker   const SequenceHeader *const seq_params = cm->seq_params;
1076*77c1e3ccSAndroid Build Coastguard Worker   const int bit_depth = seq_params->bit_depth;
1077*77c1e3ccSAndroid Build Coastguard Worker   const int highbd = seq_params->use_highbitdepth;
1078*77c1e3ccSAndroid Build Coastguard Worker   lr_ctxt->dst = &cm->rst_frame;
1079*77c1e3ccSAndroid Build Coastguard Worker 
1080*77c1e3ccSAndroid Build Coastguard Worker   const int frame_width = frame->crop_widths[0];
1081*77c1e3ccSAndroid Build Coastguard Worker   const int frame_height = frame->crop_heights[0];
1082*77c1e3ccSAndroid Build Coastguard Worker   if (aom_realloc_frame_buffer(
1083*77c1e3ccSAndroid Build Coastguard Worker           lr_ctxt->dst, frame_width, frame_height, seq_params->subsampling_x,
1084*77c1e3ccSAndroid Build Coastguard Worker           seq_params->subsampling_y, highbd, AOM_RESTORATION_FRAME_BORDER,
1085*77c1e3ccSAndroid Build Coastguard Worker           cm->features.byte_alignment, NULL, NULL, NULL, false,
1086*77c1e3ccSAndroid Build Coastguard Worker           0) != AOM_CODEC_OK)
1087*77c1e3ccSAndroid Build Coastguard Worker     aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
1088*77c1e3ccSAndroid Build Coastguard Worker                        "Failed to allocate restoration dst buffer");
1089*77c1e3ccSAndroid Build Coastguard Worker 
1090*77c1e3ccSAndroid Build Coastguard Worker   lr_ctxt->on_rest_unit = filter_frame_on_unit;
1091*77c1e3ccSAndroid Build Coastguard Worker   lr_ctxt->frame = frame;
1092*77c1e3ccSAndroid Build Coastguard Worker   for (int plane = 0; plane < num_planes; ++plane) {
1093*77c1e3ccSAndroid Build Coastguard Worker     RestorationInfo *rsi = &cm->rst_info[plane];
1094*77c1e3ccSAndroid Build Coastguard Worker     RestorationType rtype = rsi->frame_restoration_type;
1095*77c1e3ccSAndroid Build Coastguard Worker     rsi->optimized_lr = optimized_lr;
1096*77c1e3ccSAndroid Build Coastguard Worker     lr_ctxt->ctxt[plane].rsi = rsi;
1097*77c1e3ccSAndroid Build Coastguard Worker 
1098*77c1e3ccSAndroid Build Coastguard Worker     if (rtype == RESTORE_NONE) {
1099*77c1e3ccSAndroid Build Coastguard Worker       continue;
1100*77c1e3ccSAndroid Build Coastguard Worker     }
1101*77c1e3ccSAndroid Build Coastguard Worker 
1102*77c1e3ccSAndroid Build Coastguard Worker     const int is_uv = plane > 0;
1103*77c1e3ccSAndroid Build Coastguard Worker     int plane_w, plane_h;
1104*77c1e3ccSAndroid Build Coastguard Worker     av1_get_upsampled_plane_size(cm, is_uv, &plane_w, &plane_h);
1105*77c1e3ccSAndroid Build Coastguard Worker     assert(plane_w == frame->crop_widths[is_uv]);
1106*77c1e3ccSAndroid Build Coastguard Worker     assert(plane_h == frame->crop_heights[is_uv]);
1107*77c1e3ccSAndroid Build Coastguard Worker 
1108*77c1e3ccSAndroid Build Coastguard Worker     av1_extend_frame(frame->buffers[plane], plane_w, plane_h,
1109*77c1e3ccSAndroid Build Coastguard Worker                      frame->strides[is_uv], RESTORATION_BORDER,
1110*77c1e3ccSAndroid Build Coastguard Worker                      RESTORATION_BORDER, highbd);
1111*77c1e3ccSAndroid Build Coastguard Worker 
1112*77c1e3ccSAndroid Build Coastguard Worker     FilterFrameCtxt *lr_plane_ctxt = &lr_ctxt->ctxt[plane];
1113*77c1e3ccSAndroid Build Coastguard Worker     lr_plane_ctxt->ss_x = is_uv && seq_params->subsampling_x;
1114*77c1e3ccSAndroid Build Coastguard Worker     lr_plane_ctxt->ss_y = is_uv && seq_params->subsampling_y;
1115*77c1e3ccSAndroid Build Coastguard Worker     lr_plane_ctxt->plane_w = plane_w;
1116*77c1e3ccSAndroid Build Coastguard Worker     lr_plane_ctxt->plane_h = plane_h;
1117*77c1e3ccSAndroid Build Coastguard Worker     lr_plane_ctxt->highbd = highbd;
1118*77c1e3ccSAndroid Build Coastguard Worker     lr_plane_ctxt->bit_depth = bit_depth;
1119*77c1e3ccSAndroid Build Coastguard Worker     lr_plane_ctxt->data8 = frame->buffers[plane];
1120*77c1e3ccSAndroid Build Coastguard Worker     lr_plane_ctxt->dst8 = lr_ctxt->dst->buffers[plane];
1121*77c1e3ccSAndroid Build Coastguard Worker     lr_plane_ctxt->data_stride = frame->strides[is_uv];
1122*77c1e3ccSAndroid Build Coastguard Worker     lr_plane_ctxt->dst_stride = lr_ctxt->dst->strides[is_uv];
1123*77c1e3ccSAndroid Build Coastguard Worker   }
1124*77c1e3ccSAndroid Build Coastguard Worker }
1125*77c1e3ccSAndroid Build Coastguard Worker 
av1_loop_restoration_copy_planes(AV1LrStruct * loop_rest_ctxt,AV1_COMMON * cm,int num_planes)1126*77c1e3ccSAndroid Build Coastguard Worker void av1_loop_restoration_copy_planes(AV1LrStruct *loop_rest_ctxt,
1127*77c1e3ccSAndroid Build Coastguard Worker                                       AV1_COMMON *cm, int num_planes) {
1128*77c1e3ccSAndroid Build Coastguard Worker   typedef void (*copy_fun)(const YV12_BUFFER_CONFIG *src_ybc,
1129*77c1e3ccSAndroid Build Coastguard Worker                            YV12_BUFFER_CONFIG *dst_ybc, int hstart, int hend,
1130*77c1e3ccSAndroid Build Coastguard Worker                            int vstart, int vend);
1131*77c1e3ccSAndroid Build Coastguard Worker   static const copy_fun copy_funs[3] = { aom_yv12_partial_coloc_copy_y,
1132*77c1e3ccSAndroid Build Coastguard Worker                                          aom_yv12_partial_coloc_copy_u,
1133*77c1e3ccSAndroid Build Coastguard Worker                                          aom_yv12_partial_coloc_copy_v };
1134*77c1e3ccSAndroid Build Coastguard Worker   assert(num_planes <= 3);
1135*77c1e3ccSAndroid Build Coastguard Worker   for (int plane = 0; plane < num_planes; ++plane) {
1136*77c1e3ccSAndroid Build Coastguard Worker     if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue;
1137*77c1e3ccSAndroid Build Coastguard Worker     FilterFrameCtxt *lr_plane_ctxt = &loop_rest_ctxt->ctxt[plane];
1138*77c1e3ccSAndroid Build Coastguard Worker     copy_funs[plane](loop_rest_ctxt->dst, loop_rest_ctxt->frame, 0,
1139*77c1e3ccSAndroid Build Coastguard Worker                      lr_plane_ctxt->plane_w, 0, lr_plane_ctxt->plane_h);
1140*77c1e3ccSAndroid Build Coastguard Worker   }
1141*77c1e3ccSAndroid Build Coastguard Worker }
1142*77c1e3ccSAndroid Build Coastguard Worker 
1143*77c1e3ccSAndroid Build Coastguard Worker // Call on_rest_unit for each loop restoration unit in the plane.
foreach_rest_unit_in_plane(const struct AV1Common * cm,int plane,rest_unit_visitor_t on_rest_unit,void * priv,int32_t * tmpbuf,RestorationLineBuffers * rlbs)1144*77c1e3ccSAndroid Build Coastguard Worker static void foreach_rest_unit_in_plane(const struct AV1Common *cm, int plane,
1145*77c1e3ccSAndroid Build Coastguard Worker                                        rest_unit_visitor_t on_rest_unit,
1146*77c1e3ccSAndroid Build Coastguard Worker                                        void *priv, int32_t *tmpbuf,
1147*77c1e3ccSAndroid Build Coastguard Worker                                        RestorationLineBuffers *rlbs) {
1148*77c1e3ccSAndroid Build Coastguard Worker   const RestorationInfo *rsi = &cm->rst_info[plane];
1149*77c1e3ccSAndroid Build Coastguard Worker   const int hnum_rest_units = rsi->horz_units;
1150*77c1e3ccSAndroid Build Coastguard Worker   const int vnum_rest_units = rsi->vert_units;
1151*77c1e3ccSAndroid Build Coastguard Worker   const int unit_size = rsi->restoration_unit_size;
1152*77c1e3ccSAndroid Build Coastguard Worker 
1153*77c1e3ccSAndroid Build Coastguard Worker   const int is_uv = plane > 0;
1154*77c1e3ccSAndroid Build Coastguard Worker   const int ss_y = is_uv && cm->seq_params->subsampling_y;
1155*77c1e3ccSAndroid Build Coastguard Worker   const int ext_size = unit_size * 3 / 2;
1156*77c1e3ccSAndroid Build Coastguard Worker   int plane_w, plane_h;
1157*77c1e3ccSAndroid Build Coastguard Worker   av1_get_upsampled_plane_size(cm, is_uv, &plane_w, &plane_h);
1158*77c1e3ccSAndroid Build Coastguard Worker 
1159*77c1e3ccSAndroid Build Coastguard Worker   int y0 = 0, i = 0;
1160*77c1e3ccSAndroid Build Coastguard Worker   while (y0 < plane_h) {
1161*77c1e3ccSAndroid Build Coastguard Worker     int remaining_h = plane_h - y0;
1162*77c1e3ccSAndroid Build Coastguard Worker     int h = (remaining_h < ext_size) ? remaining_h : unit_size;
1163*77c1e3ccSAndroid Build Coastguard Worker 
1164*77c1e3ccSAndroid Build Coastguard Worker     RestorationTileLimits limits;
1165*77c1e3ccSAndroid Build Coastguard Worker     limits.v_start = y0;
1166*77c1e3ccSAndroid Build Coastguard Worker     limits.v_end = y0 + h;
1167*77c1e3ccSAndroid Build Coastguard Worker     assert(limits.v_end <= plane_h);
1168*77c1e3ccSAndroid Build Coastguard Worker     // Offset upwards to align with the restoration processing stripe
1169*77c1e3ccSAndroid Build Coastguard Worker     const int voffset = RESTORATION_UNIT_OFFSET >> ss_y;
1170*77c1e3ccSAndroid Build Coastguard Worker     limits.v_start = AOMMAX(0, limits.v_start - voffset);
1171*77c1e3ccSAndroid Build Coastguard Worker     if (limits.v_end < plane_h) limits.v_end -= voffset;
1172*77c1e3ccSAndroid Build Coastguard Worker 
1173*77c1e3ccSAndroid Build Coastguard Worker     av1_foreach_rest_unit_in_row(&limits, plane_w, on_rest_unit, i, unit_size,
1174*77c1e3ccSAndroid Build Coastguard Worker                                  hnum_rest_units, vnum_rest_units, plane, priv,
1175*77c1e3ccSAndroid Build Coastguard Worker                                  tmpbuf, rlbs, av1_lr_sync_read_dummy,
1176*77c1e3ccSAndroid Build Coastguard Worker                                  av1_lr_sync_write_dummy, NULL, cm->error);
1177*77c1e3ccSAndroid Build Coastguard Worker 
1178*77c1e3ccSAndroid Build Coastguard Worker     y0 += h;
1179*77c1e3ccSAndroid Build Coastguard Worker     ++i;
1180*77c1e3ccSAndroid Build Coastguard Worker   }
1181*77c1e3ccSAndroid Build Coastguard Worker }
1182*77c1e3ccSAndroid Build Coastguard Worker 
foreach_rest_unit_in_planes(AV1LrStruct * lr_ctxt,AV1_COMMON * cm,int num_planes)1183*77c1e3ccSAndroid Build Coastguard Worker static void foreach_rest_unit_in_planes(AV1LrStruct *lr_ctxt, AV1_COMMON *cm,
1184*77c1e3ccSAndroid Build Coastguard Worker                                         int num_planes) {
1185*77c1e3ccSAndroid Build Coastguard Worker   FilterFrameCtxt *ctxt = lr_ctxt->ctxt;
1186*77c1e3ccSAndroid Build Coastguard Worker 
1187*77c1e3ccSAndroid Build Coastguard Worker   for (int plane = 0; plane < num_planes; ++plane) {
1188*77c1e3ccSAndroid Build Coastguard Worker     if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) {
1189*77c1e3ccSAndroid Build Coastguard Worker       continue;
1190*77c1e3ccSAndroid Build Coastguard Worker     }
1191*77c1e3ccSAndroid Build Coastguard Worker 
1192*77c1e3ccSAndroid Build Coastguard Worker     foreach_rest_unit_in_plane(cm, plane, lr_ctxt->on_rest_unit, &ctxt[plane],
1193*77c1e3ccSAndroid Build Coastguard Worker                                cm->rst_tmpbuf, cm->rlbs);
1194*77c1e3ccSAndroid Build Coastguard Worker   }
1195*77c1e3ccSAndroid Build Coastguard Worker }
1196*77c1e3ccSAndroid Build Coastguard Worker 
av1_loop_restoration_filter_frame(YV12_BUFFER_CONFIG * frame,AV1_COMMON * cm,int optimized_lr,void * lr_ctxt)1197*77c1e3ccSAndroid Build Coastguard Worker void av1_loop_restoration_filter_frame(YV12_BUFFER_CONFIG *frame,
1198*77c1e3ccSAndroid Build Coastguard Worker                                        AV1_COMMON *cm, int optimized_lr,
1199*77c1e3ccSAndroid Build Coastguard Worker                                        void *lr_ctxt) {
1200*77c1e3ccSAndroid Build Coastguard Worker   assert(!cm->features.all_lossless);
1201*77c1e3ccSAndroid Build Coastguard Worker   const int num_planes = av1_num_planes(cm);
1202*77c1e3ccSAndroid Build Coastguard Worker 
1203*77c1e3ccSAndroid Build Coastguard Worker   AV1LrStruct *loop_rest_ctxt = (AV1LrStruct *)lr_ctxt;
1204*77c1e3ccSAndroid Build Coastguard Worker 
1205*77c1e3ccSAndroid Build Coastguard Worker   av1_loop_restoration_filter_frame_init(loop_rest_ctxt, frame, cm,
1206*77c1e3ccSAndroid Build Coastguard Worker                                          optimized_lr, num_planes);
1207*77c1e3ccSAndroid Build Coastguard Worker 
1208*77c1e3ccSAndroid Build Coastguard Worker   foreach_rest_unit_in_planes(loop_rest_ctxt, cm, num_planes);
1209*77c1e3ccSAndroid Build Coastguard Worker 
1210*77c1e3ccSAndroid Build Coastguard Worker   av1_loop_restoration_copy_planes(loop_rest_ctxt, cm, num_planes);
1211*77c1e3ccSAndroid Build Coastguard Worker }
1212*77c1e3ccSAndroid Build Coastguard Worker 
av1_foreach_rest_unit_in_row(RestorationTileLimits * limits,int plane_w,rest_unit_visitor_t on_rest_unit,int row_number,int unit_size,int hnum_rest_units,int vnum_rest_units,int plane,void * priv,int32_t * tmpbuf,RestorationLineBuffers * rlbs,sync_read_fn_t on_sync_read,sync_write_fn_t on_sync_write,struct AV1LrSyncData * const lr_sync,struct aom_internal_error_info * error_info)1213*77c1e3ccSAndroid Build Coastguard Worker void av1_foreach_rest_unit_in_row(
1214*77c1e3ccSAndroid Build Coastguard Worker     RestorationTileLimits *limits, int plane_w,
1215*77c1e3ccSAndroid Build Coastguard Worker     rest_unit_visitor_t on_rest_unit, int row_number, int unit_size,
1216*77c1e3ccSAndroid Build Coastguard Worker     int hnum_rest_units, int vnum_rest_units, int plane, void *priv,
1217*77c1e3ccSAndroid Build Coastguard Worker     int32_t *tmpbuf, RestorationLineBuffers *rlbs, sync_read_fn_t on_sync_read,
1218*77c1e3ccSAndroid Build Coastguard Worker     sync_write_fn_t on_sync_write, struct AV1LrSyncData *const lr_sync,
1219*77c1e3ccSAndroid Build Coastguard Worker     struct aom_internal_error_info *error_info) {
1220*77c1e3ccSAndroid Build Coastguard Worker   const int ext_size = unit_size * 3 / 2;
1221*77c1e3ccSAndroid Build Coastguard Worker   int x0 = 0, j = 0;
1222*77c1e3ccSAndroid Build Coastguard Worker   while (x0 < plane_w) {
1223*77c1e3ccSAndroid Build Coastguard Worker     int remaining_w = plane_w - x0;
1224*77c1e3ccSAndroid Build Coastguard Worker     int w = (remaining_w < ext_size) ? remaining_w : unit_size;
1225*77c1e3ccSAndroid Build Coastguard Worker 
1226*77c1e3ccSAndroid Build Coastguard Worker     limits->h_start = x0;
1227*77c1e3ccSAndroid Build Coastguard Worker     limits->h_end = x0 + w;
1228*77c1e3ccSAndroid Build Coastguard Worker     assert(limits->h_end <= plane_w);
1229*77c1e3ccSAndroid Build Coastguard Worker 
1230*77c1e3ccSAndroid Build Coastguard Worker     const int unit_idx = row_number * hnum_rest_units + j;
1231*77c1e3ccSAndroid Build Coastguard Worker 
1232*77c1e3ccSAndroid Build Coastguard Worker     // No sync for even numbered rows
1233*77c1e3ccSAndroid Build Coastguard Worker     // For odd numbered rows, Loop Restoration of current block requires the LR
1234*77c1e3ccSAndroid Build Coastguard Worker     // of top-right and bottom-right blocks to be completed
1235*77c1e3ccSAndroid Build Coastguard Worker 
1236*77c1e3ccSAndroid Build Coastguard Worker     // top-right sync
1237*77c1e3ccSAndroid Build Coastguard Worker     on_sync_read(lr_sync, row_number, j, plane);
1238*77c1e3ccSAndroid Build Coastguard Worker     if ((row_number + 1) < vnum_rest_units)
1239*77c1e3ccSAndroid Build Coastguard Worker       // bottom-right sync
1240*77c1e3ccSAndroid Build Coastguard Worker       on_sync_read(lr_sync, row_number + 2, j, plane);
1241*77c1e3ccSAndroid Build Coastguard Worker 
1242*77c1e3ccSAndroid Build Coastguard Worker #if CONFIG_MULTITHREAD
1243*77c1e3ccSAndroid Build Coastguard Worker     if (lr_sync && lr_sync->num_workers > 1) {
1244*77c1e3ccSAndroid Build Coastguard Worker       pthread_mutex_lock(lr_sync->job_mutex);
1245*77c1e3ccSAndroid Build Coastguard Worker       const bool lr_mt_exit = lr_sync->lr_mt_exit;
1246*77c1e3ccSAndroid Build Coastguard Worker       pthread_mutex_unlock(lr_sync->job_mutex);
1247*77c1e3ccSAndroid Build Coastguard Worker       // Exit in case any worker has encountered an error.
1248*77c1e3ccSAndroid Build Coastguard Worker       if (lr_mt_exit) return;
1249*77c1e3ccSAndroid Build Coastguard Worker     }
1250*77c1e3ccSAndroid Build Coastguard Worker #endif
1251*77c1e3ccSAndroid Build Coastguard Worker 
1252*77c1e3ccSAndroid Build Coastguard Worker     on_rest_unit(limits, unit_idx, priv, tmpbuf, rlbs, error_info);
1253*77c1e3ccSAndroid Build Coastguard Worker 
1254*77c1e3ccSAndroid Build Coastguard Worker     on_sync_write(lr_sync, row_number, j, hnum_rest_units, plane);
1255*77c1e3ccSAndroid Build Coastguard Worker 
1256*77c1e3ccSAndroid Build Coastguard Worker     x0 += w;
1257*77c1e3ccSAndroid Build Coastguard Worker     ++j;
1258*77c1e3ccSAndroid Build Coastguard Worker   }
1259*77c1e3ccSAndroid Build Coastguard Worker }
1260*77c1e3ccSAndroid Build Coastguard Worker 
av1_lr_sync_read_dummy(void * const lr_sync,int r,int c,int plane)1261*77c1e3ccSAndroid Build Coastguard Worker void av1_lr_sync_read_dummy(void *const lr_sync, int r, int c, int plane) {
1262*77c1e3ccSAndroid Build Coastguard Worker   (void)lr_sync;
1263*77c1e3ccSAndroid Build Coastguard Worker   (void)r;
1264*77c1e3ccSAndroid Build Coastguard Worker   (void)c;
1265*77c1e3ccSAndroid Build Coastguard Worker   (void)plane;
1266*77c1e3ccSAndroid Build Coastguard Worker }
1267*77c1e3ccSAndroid Build Coastguard Worker 
av1_lr_sync_write_dummy(void * const lr_sync,int r,int c,const int sb_cols,int plane)1268*77c1e3ccSAndroid Build Coastguard Worker void av1_lr_sync_write_dummy(void *const lr_sync, int r, int c,
1269*77c1e3ccSAndroid Build Coastguard Worker                              const int sb_cols, int plane) {
1270*77c1e3ccSAndroid Build Coastguard Worker   (void)lr_sync;
1271*77c1e3ccSAndroid Build Coastguard Worker   (void)r;
1272*77c1e3ccSAndroid Build Coastguard Worker   (void)c;
1273*77c1e3ccSAndroid Build Coastguard Worker   (void)sb_cols;
1274*77c1e3ccSAndroid Build Coastguard Worker   (void)plane;
1275*77c1e3ccSAndroid Build Coastguard Worker }
1276*77c1e3ccSAndroid Build Coastguard Worker 
av1_loop_restoration_corners_in_sb(const struct AV1Common * cm,int plane,int mi_row,int mi_col,BLOCK_SIZE bsize,int * rcol0,int * rcol1,int * rrow0,int * rrow1)1277*77c1e3ccSAndroid Build Coastguard Worker int av1_loop_restoration_corners_in_sb(const struct AV1Common *cm, int plane,
1278*77c1e3ccSAndroid Build Coastguard Worker                                        int mi_row, int mi_col, BLOCK_SIZE bsize,
1279*77c1e3ccSAndroid Build Coastguard Worker                                        int *rcol0, int *rcol1, int *rrow0,
1280*77c1e3ccSAndroid Build Coastguard Worker                                        int *rrow1) {
1281*77c1e3ccSAndroid Build Coastguard Worker   assert(rcol0 && rcol1 && rrow0 && rrow1);
1282*77c1e3ccSAndroid Build Coastguard Worker 
1283*77c1e3ccSAndroid Build Coastguard Worker   if (bsize != cm->seq_params->sb_size) return 0;
1284*77c1e3ccSAndroid Build Coastguard Worker 
1285*77c1e3ccSAndroid Build Coastguard Worker   assert(!cm->features.all_lossless);
1286*77c1e3ccSAndroid Build Coastguard Worker 
1287*77c1e3ccSAndroid Build Coastguard Worker   const int is_uv = plane > 0;
1288*77c1e3ccSAndroid Build Coastguard Worker 
1289*77c1e3ccSAndroid Build Coastguard Worker   // Compute the mi-unit corners of the superblock
1290*77c1e3ccSAndroid Build Coastguard Worker   const int mi_row0 = mi_row;
1291*77c1e3ccSAndroid Build Coastguard Worker   const int mi_col0 = mi_col;
1292*77c1e3ccSAndroid Build Coastguard Worker   const int mi_row1 = mi_row0 + mi_size_high[bsize];
1293*77c1e3ccSAndroid Build Coastguard Worker   const int mi_col1 = mi_col0 + mi_size_wide[bsize];
1294*77c1e3ccSAndroid Build Coastguard Worker 
1295*77c1e3ccSAndroid Build Coastguard Worker   const RestorationInfo *rsi = &cm->rst_info[plane];
1296*77c1e3ccSAndroid Build Coastguard Worker   const int size = rsi->restoration_unit_size;
1297*77c1e3ccSAndroid Build Coastguard Worker   const int horz_units = rsi->horz_units;
1298*77c1e3ccSAndroid Build Coastguard Worker   const int vert_units = rsi->vert_units;
1299*77c1e3ccSAndroid Build Coastguard Worker 
1300*77c1e3ccSAndroid Build Coastguard Worker   // The size of an MI-unit on this plane of the image
1301*77c1e3ccSAndroid Build Coastguard Worker   const int ss_x = is_uv && cm->seq_params->subsampling_x;
1302*77c1e3ccSAndroid Build Coastguard Worker   const int ss_y = is_uv && cm->seq_params->subsampling_y;
1303*77c1e3ccSAndroid Build Coastguard Worker   const int mi_size_x = MI_SIZE >> ss_x;
1304*77c1e3ccSAndroid Build Coastguard Worker   const int mi_size_y = MI_SIZE >> ss_y;
1305*77c1e3ccSAndroid Build Coastguard Worker 
1306*77c1e3ccSAndroid Build Coastguard Worker   // Write m for the relative mi column or row, D for the superres denominator
1307*77c1e3ccSAndroid Build Coastguard Worker   // and N for the superres numerator. If u is the upscaled pixel offset then
1308*77c1e3ccSAndroid Build Coastguard Worker   // we can write the downscaled pixel offset in two ways as:
1309*77c1e3ccSAndroid Build Coastguard Worker   //
1310*77c1e3ccSAndroid Build Coastguard Worker   //   MI_SIZE * m = N / D u
1311*77c1e3ccSAndroid Build Coastguard Worker   //
1312*77c1e3ccSAndroid Build Coastguard Worker   // from which we get u = D * MI_SIZE * m / N
1313*77c1e3ccSAndroid Build Coastguard Worker   const int mi_to_num_x = av1_superres_scaled(cm)
1314*77c1e3ccSAndroid Build Coastguard Worker                               ? mi_size_x * cm->superres_scale_denominator
1315*77c1e3ccSAndroid Build Coastguard Worker                               : mi_size_x;
1316*77c1e3ccSAndroid Build Coastguard Worker   const int mi_to_num_y = mi_size_y;
1317*77c1e3ccSAndroid Build Coastguard Worker   const int denom_x = av1_superres_scaled(cm) ? size * SCALE_NUMERATOR : size;
1318*77c1e3ccSAndroid Build Coastguard Worker   const int denom_y = size;
1319*77c1e3ccSAndroid Build Coastguard Worker 
1320*77c1e3ccSAndroid Build Coastguard Worker   const int rnd_x = denom_x - 1;
1321*77c1e3ccSAndroid Build Coastguard Worker   const int rnd_y = denom_y - 1;
1322*77c1e3ccSAndroid Build Coastguard Worker 
1323*77c1e3ccSAndroid Build Coastguard Worker   // rcol0/rrow0 should be the first column/row of restoration units that
1324*77c1e3ccSAndroid Build Coastguard Worker   // doesn't start left/below of mi_col/mi_row. For this calculation, we need
1325*77c1e3ccSAndroid Build Coastguard Worker   // to round up the division (if the sb starts at runit column 10.1, the first
1326*77c1e3ccSAndroid Build Coastguard Worker   // matching runit has column index 11)
1327*77c1e3ccSAndroid Build Coastguard Worker   *rcol0 = (mi_col0 * mi_to_num_x + rnd_x) / denom_x;
1328*77c1e3ccSAndroid Build Coastguard Worker   *rrow0 = (mi_row0 * mi_to_num_y + rnd_y) / denom_y;
1329*77c1e3ccSAndroid Build Coastguard Worker 
1330*77c1e3ccSAndroid Build Coastguard Worker   // rel_col1/rel_row1 is the equivalent calculation, but for the superblock
1331*77c1e3ccSAndroid Build Coastguard Worker   // below-right. If we're at the bottom or right of the frame, this restoration
1332*77c1e3ccSAndroid Build Coastguard Worker   // unit might not exist, in which case we'll clamp accordingly.
1333*77c1e3ccSAndroid Build Coastguard Worker   *rcol1 = AOMMIN((mi_col1 * mi_to_num_x + rnd_x) / denom_x, horz_units);
1334*77c1e3ccSAndroid Build Coastguard Worker   *rrow1 = AOMMIN((mi_row1 * mi_to_num_y + rnd_y) / denom_y, vert_units);
1335*77c1e3ccSAndroid Build Coastguard Worker 
1336*77c1e3ccSAndroid Build Coastguard Worker   return *rcol0 < *rcol1 && *rrow0 < *rrow1;
1337*77c1e3ccSAndroid Build Coastguard Worker }
1338*77c1e3ccSAndroid Build Coastguard Worker 
1339*77c1e3ccSAndroid Build Coastguard Worker // Extend to left and right
extend_lines(uint8_t * buf,int width,int height,int stride,int extend,int use_highbitdepth)1340*77c1e3ccSAndroid Build Coastguard Worker static void extend_lines(uint8_t *buf, int width, int height, int stride,
1341*77c1e3ccSAndroid Build Coastguard Worker                          int extend, int use_highbitdepth) {
1342*77c1e3ccSAndroid Build Coastguard Worker   for (int i = 0; i < height; ++i) {
1343*77c1e3ccSAndroid Build Coastguard Worker     if (use_highbitdepth) {
1344*77c1e3ccSAndroid Build Coastguard Worker       uint16_t *buf16 = (uint16_t *)buf;
1345*77c1e3ccSAndroid Build Coastguard Worker       aom_memset16(buf16 - extend, buf16[0], extend);
1346*77c1e3ccSAndroid Build Coastguard Worker       aom_memset16(buf16 + width, buf16[width - 1], extend);
1347*77c1e3ccSAndroid Build Coastguard Worker     } else {
1348*77c1e3ccSAndroid Build Coastguard Worker       memset(buf - extend, buf[0], extend);
1349*77c1e3ccSAndroid Build Coastguard Worker       memset(buf + width, buf[width - 1], extend);
1350*77c1e3ccSAndroid Build Coastguard Worker     }
1351*77c1e3ccSAndroid Build Coastguard Worker     buf += stride;
1352*77c1e3ccSAndroid Build Coastguard Worker   }
1353*77c1e3ccSAndroid Build Coastguard Worker }
1354*77c1e3ccSAndroid Build Coastguard Worker 
save_deblock_boundary_lines(const YV12_BUFFER_CONFIG * frame,const AV1_COMMON * cm,int plane,int row,int stripe,int use_highbd,int is_above,RestorationStripeBoundaries * boundaries)1355*77c1e3ccSAndroid Build Coastguard Worker static void save_deblock_boundary_lines(
1356*77c1e3ccSAndroid Build Coastguard Worker     const YV12_BUFFER_CONFIG *frame, const AV1_COMMON *cm, int plane, int row,
1357*77c1e3ccSAndroid Build Coastguard Worker     int stripe, int use_highbd, int is_above,
1358*77c1e3ccSAndroid Build Coastguard Worker     RestorationStripeBoundaries *boundaries) {
1359*77c1e3ccSAndroid Build Coastguard Worker   const int is_uv = plane > 0;
1360*77c1e3ccSAndroid Build Coastguard Worker   const uint8_t *src_buf = REAL_PTR(use_highbd, frame->buffers[plane]);
1361*77c1e3ccSAndroid Build Coastguard Worker   const int src_stride = frame->strides[is_uv] << use_highbd;
1362*77c1e3ccSAndroid Build Coastguard Worker   const uint8_t *src_rows = src_buf + row * (ptrdiff_t)src_stride;
1363*77c1e3ccSAndroid Build Coastguard Worker 
1364*77c1e3ccSAndroid Build Coastguard Worker   uint8_t *bdry_buf = is_above ? boundaries->stripe_boundary_above
1365*77c1e3ccSAndroid Build Coastguard Worker                                : boundaries->stripe_boundary_below;
1366*77c1e3ccSAndroid Build Coastguard Worker   uint8_t *bdry_start = bdry_buf + (RESTORATION_EXTRA_HORZ << use_highbd);
1367*77c1e3ccSAndroid Build Coastguard Worker   const int bdry_stride = boundaries->stripe_boundary_stride << use_highbd;
1368*77c1e3ccSAndroid Build Coastguard Worker   uint8_t *bdry_rows = bdry_start + RESTORATION_CTX_VERT * stripe * bdry_stride;
1369*77c1e3ccSAndroid Build Coastguard Worker 
1370*77c1e3ccSAndroid Build Coastguard Worker   // There is a rare case in which a processing stripe can end 1px above the
1371*77c1e3ccSAndroid Build Coastguard Worker   // crop border. In this case, we do want to use deblocked pixels from below
1372*77c1e3ccSAndroid Build Coastguard Worker   // the stripe (hence why we ended up in this function), but instead of
1373*77c1e3ccSAndroid Build Coastguard Worker   // fetching 2 "below" rows we need to fetch one and duplicate it.
1374*77c1e3ccSAndroid Build Coastguard Worker   // This is equivalent to clamping the sample locations against the crop border
1375*77c1e3ccSAndroid Build Coastguard Worker   const int lines_to_save =
1376*77c1e3ccSAndroid Build Coastguard Worker       AOMMIN(RESTORATION_CTX_VERT, frame->crop_heights[is_uv] - row);
1377*77c1e3ccSAndroid Build Coastguard Worker   assert(lines_to_save == 1 || lines_to_save == 2);
1378*77c1e3ccSAndroid Build Coastguard Worker 
1379*77c1e3ccSAndroid Build Coastguard Worker   int upscaled_width;
1380*77c1e3ccSAndroid Build Coastguard Worker   int line_bytes;
1381*77c1e3ccSAndroid Build Coastguard Worker   if (av1_superres_scaled(cm)) {
1382*77c1e3ccSAndroid Build Coastguard Worker     const int ss_x = is_uv && cm->seq_params->subsampling_x;
1383*77c1e3ccSAndroid Build Coastguard Worker     upscaled_width = (cm->superres_upscaled_width + ss_x) >> ss_x;
1384*77c1e3ccSAndroid Build Coastguard Worker     line_bytes = upscaled_width << use_highbd;
1385*77c1e3ccSAndroid Build Coastguard Worker     if (use_highbd)
1386*77c1e3ccSAndroid Build Coastguard Worker       av1_upscale_normative_rows(
1387*77c1e3ccSAndroid Build Coastguard Worker           cm, CONVERT_TO_BYTEPTR(src_rows), frame->strides[is_uv],
1388*77c1e3ccSAndroid Build Coastguard Worker           CONVERT_TO_BYTEPTR(bdry_rows), boundaries->stripe_boundary_stride,
1389*77c1e3ccSAndroid Build Coastguard Worker           plane, lines_to_save);
1390*77c1e3ccSAndroid Build Coastguard Worker     else
1391*77c1e3ccSAndroid Build Coastguard Worker       av1_upscale_normative_rows(cm, src_rows, frame->strides[is_uv], bdry_rows,
1392*77c1e3ccSAndroid Build Coastguard Worker                                  boundaries->stripe_boundary_stride, plane,
1393*77c1e3ccSAndroid Build Coastguard Worker                                  lines_to_save);
1394*77c1e3ccSAndroid Build Coastguard Worker   } else {
1395*77c1e3ccSAndroid Build Coastguard Worker     upscaled_width = frame->crop_widths[is_uv];
1396*77c1e3ccSAndroid Build Coastguard Worker     line_bytes = upscaled_width << use_highbd;
1397*77c1e3ccSAndroid Build Coastguard Worker     for (int i = 0; i < lines_to_save; i++) {
1398*77c1e3ccSAndroid Build Coastguard Worker       memcpy(bdry_rows + i * bdry_stride, src_rows + i * src_stride,
1399*77c1e3ccSAndroid Build Coastguard Worker              line_bytes);
1400*77c1e3ccSAndroid Build Coastguard Worker     }
1401*77c1e3ccSAndroid Build Coastguard Worker   }
1402*77c1e3ccSAndroid Build Coastguard Worker   // If we only saved one line, then copy it into the second line buffer
1403*77c1e3ccSAndroid Build Coastguard Worker   if (lines_to_save == 1)
1404*77c1e3ccSAndroid Build Coastguard Worker     memcpy(bdry_rows + bdry_stride, bdry_rows, line_bytes);
1405*77c1e3ccSAndroid Build Coastguard Worker 
1406*77c1e3ccSAndroid Build Coastguard Worker   extend_lines(bdry_rows, upscaled_width, RESTORATION_CTX_VERT, bdry_stride,
1407*77c1e3ccSAndroid Build Coastguard Worker                RESTORATION_EXTRA_HORZ, use_highbd);
1408*77c1e3ccSAndroid Build Coastguard Worker }
1409*77c1e3ccSAndroid Build Coastguard Worker 
save_cdef_boundary_lines(const YV12_BUFFER_CONFIG * frame,const AV1_COMMON * cm,int plane,int row,int stripe,int use_highbd,int is_above,RestorationStripeBoundaries * boundaries)1410*77c1e3ccSAndroid Build Coastguard Worker static void save_cdef_boundary_lines(const YV12_BUFFER_CONFIG *frame,
1411*77c1e3ccSAndroid Build Coastguard Worker                                      const AV1_COMMON *cm, int plane, int row,
1412*77c1e3ccSAndroid Build Coastguard Worker                                      int stripe, int use_highbd, int is_above,
1413*77c1e3ccSAndroid Build Coastguard Worker                                      RestorationStripeBoundaries *boundaries) {
1414*77c1e3ccSAndroid Build Coastguard Worker   const int is_uv = plane > 0;
1415*77c1e3ccSAndroid Build Coastguard Worker   const uint8_t *src_buf = REAL_PTR(use_highbd, frame->buffers[plane]);
1416*77c1e3ccSAndroid Build Coastguard Worker   const int src_stride = frame->strides[is_uv] << use_highbd;
1417*77c1e3ccSAndroid Build Coastguard Worker   const uint8_t *src_rows = src_buf + row * (ptrdiff_t)src_stride;
1418*77c1e3ccSAndroid Build Coastguard Worker 
1419*77c1e3ccSAndroid Build Coastguard Worker   uint8_t *bdry_buf = is_above ? boundaries->stripe_boundary_above
1420*77c1e3ccSAndroid Build Coastguard Worker                                : boundaries->stripe_boundary_below;
1421*77c1e3ccSAndroid Build Coastguard Worker   uint8_t *bdry_start = bdry_buf + (RESTORATION_EXTRA_HORZ << use_highbd);
1422*77c1e3ccSAndroid Build Coastguard Worker   const int bdry_stride = boundaries->stripe_boundary_stride << use_highbd;
1423*77c1e3ccSAndroid Build Coastguard Worker   uint8_t *bdry_rows = bdry_start + RESTORATION_CTX_VERT * stripe * bdry_stride;
1424*77c1e3ccSAndroid Build Coastguard Worker   const int src_width = frame->crop_widths[is_uv];
1425*77c1e3ccSAndroid Build Coastguard Worker 
1426*77c1e3ccSAndroid Build Coastguard Worker   // At the point where this function is called, we've already applied
1427*77c1e3ccSAndroid Build Coastguard Worker   // superres. So we don't need to extend the lines here, we can just
1428*77c1e3ccSAndroid Build Coastguard Worker   // pull directly from the topmost row of the upscaled frame.
1429*77c1e3ccSAndroid Build Coastguard Worker   const int ss_x = is_uv && cm->seq_params->subsampling_x;
1430*77c1e3ccSAndroid Build Coastguard Worker   const int upscaled_width = av1_superres_scaled(cm)
1431*77c1e3ccSAndroid Build Coastguard Worker                                  ? (cm->superres_upscaled_width + ss_x) >> ss_x
1432*77c1e3ccSAndroid Build Coastguard Worker                                  : src_width;
1433*77c1e3ccSAndroid Build Coastguard Worker   const int line_bytes = upscaled_width << use_highbd;
1434*77c1e3ccSAndroid Build Coastguard Worker   for (int i = 0; i < RESTORATION_CTX_VERT; i++) {
1435*77c1e3ccSAndroid Build Coastguard Worker     // Copy the line at 'src_rows' into both context lines
1436*77c1e3ccSAndroid Build Coastguard Worker     memcpy(bdry_rows + i * bdry_stride, src_rows, line_bytes);
1437*77c1e3ccSAndroid Build Coastguard Worker   }
1438*77c1e3ccSAndroid Build Coastguard Worker   extend_lines(bdry_rows, upscaled_width, RESTORATION_CTX_VERT, bdry_stride,
1439*77c1e3ccSAndroid Build Coastguard Worker                RESTORATION_EXTRA_HORZ, use_highbd);
1440*77c1e3ccSAndroid Build Coastguard Worker }
1441*77c1e3ccSAndroid Build Coastguard Worker 
save_boundary_lines(const YV12_BUFFER_CONFIG * frame,int use_highbd,int plane,AV1_COMMON * cm,int after_cdef)1442*77c1e3ccSAndroid Build Coastguard Worker static void save_boundary_lines(const YV12_BUFFER_CONFIG *frame, int use_highbd,
1443*77c1e3ccSAndroid Build Coastguard Worker                                 int plane, AV1_COMMON *cm, int after_cdef) {
1444*77c1e3ccSAndroid Build Coastguard Worker   const int is_uv = plane > 0;
1445*77c1e3ccSAndroid Build Coastguard Worker   const int ss_y = is_uv && cm->seq_params->subsampling_y;
1446*77c1e3ccSAndroid Build Coastguard Worker   const int stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
1447*77c1e3ccSAndroid Build Coastguard Worker   const int stripe_off = RESTORATION_UNIT_OFFSET >> ss_y;
1448*77c1e3ccSAndroid Build Coastguard Worker 
1449*77c1e3ccSAndroid Build Coastguard Worker   int plane_w, plane_h;
1450*77c1e3ccSAndroid Build Coastguard Worker   av1_get_upsampled_plane_size(cm, is_uv, &plane_w, &plane_h);
1451*77c1e3ccSAndroid Build Coastguard Worker 
1452*77c1e3ccSAndroid Build Coastguard Worker   RestorationStripeBoundaries *boundaries = &cm->rst_info[plane].boundaries;
1453*77c1e3ccSAndroid Build Coastguard Worker 
1454*77c1e3ccSAndroid Build Coastguard Worker   const int plane_height = ROUND_POWER_OF_TWO(cm->height, ss_y);
1455*77c1e3ccSAndroid Build Coastguard Worker 
1456*77c1e3ccSAndroid Build Coastguard Worker   int stripe_idx;
1457*77c1e3ccSAndroid Build Coastguard Worker   for (stripe_idx = 0;; ++stripe_idx) {
1458*77c1e3ccSAndroid Build Coastguard Worker     const int rel_y0 = AOMMAX(0, stripe_idx * stripe_height - stripe_off);
1459*77c1e3ccSAndroid Build Coastguard Worker     const int y0 = rel_y0;
1460*77c1e3ccSAndroid Build Coastguard Worker     if (y0 >= plane_h) break;
1461*77c1e3ccSAndroid Build Coastguard Worker 
1462*77c1e3ccSAndroid Build Coastguard Worker     const int rel_y1 = (stripe_idx + 1) * stripe_height - stripe_off;
1463*77c1e3ccSAndroid Build Coastguard Worker     const int y1 = AOMMIN(rel_y1, plane_h);
1464*77c1e3ccSAndroid Build Coastguard Worker 
1465*77c1e3ccSAndroid Build Coastguard Worker     // Extend using CDEF pixels at the top and bottom of the frame,
1466*77c1e3ccSAndroid Build Coastguard Worker     // and deblocked pixels at internal stripe boundaries
1467*77c1e3ccSAndroid Build Coastguard Worker     const int use_deblock_above = (stripe_idx > 0);
1468*77c1e3ccSAndroid Build Coastguard Worker     const int use_deblock_below = (y1 < plane_height);
1469*77c1e3ccSAndroid Build Coastguard Worker 
1470*77c1e3ccSAndroid Build Coastguard Worker     if (!after_cdef) {
1471*77c1e3ccSAndroid Build Coastguard Worker       // Save deblocked context at internal stripe boundaries
1472*77c1e3ccSAndroid Build Coastguard Worker       if (use_deblock_above) {
1473*77c1e3ccSAndroid Build Coastguard Worker         save_deblock_boundary_lines(frame, cm, plane, y0 - RESTORATION_CTX_VERT,
1474*77c1e3ccSAndroid Build Coastguard Worker                                     stripe_idx, use_highbd, 1, boundaries);
1475*77c1e3ccSAndroid Build Coastguard Worker       }
1476*77c1e3ccSAndroid Build Coastguard Worker       if (use_deblock_below) {
1477*77c1e3ccSAndroid Build Coastguard Worker         save_deblock_boundary_lines(frame, cm, plane, y1, stripe_idx,
1478*77c1e3ccSAndroid Build Coastguard Worker                                     use_highbd, 0, boundaries);
1479*77c1e3ccSAndroid Build Coastguard Worker       }
1480*77c1e3ccSAndroid Build Coastguard Worker     } else {
1481*77c1e3ccSAndroid Build Coastguard Worker       // Save CDEF context at frame boundaries
1482*77c1e3ccSAndroid Build Coastguard Worker       if (!use_deblock_above) {
1483*77c1e3ccSAndroid Build Coastguard Worker         save_cdef_boundary_lines(frame, cm, plane, y0, stripe_idx, use_highbd,
1484*77c1e3ccSAndroid Build Coastguard Worker                                  1, boundaries);
1485*77c1e3ccSAndroid Build Coastguard Worker       }
1486*77c1e3ccSAndroid Build Coastguard Worker       if (!use_deblock_below) {
1487*77c1e3ccSAndroid Build Coastguard Worker         save_cdef_boundary_lines(frame, cm, plane, y1 - 1, stripe_idx,
1488*77c1e3ccSAndroid Build Coastguard Worker                                  use_highbd, 0, boundaries);
1489*77c1e3ccSAndroid Build Coastguard Worker       }
1490*77c1e3ccSAndroid Build Coastguard Worker     }
1491*77c1e3ccSAndroid Build Coastguard Worker   }
1492*77c1e3ccSAndroid Build Coastguard Worker }
1493*77c1e3ccSAndroid Build Coastguard Worker 
1494*77c1e3ccSAndroid Build Coastguard Worker // For each RESTORATION_PROC_UNIT_SIZE pixel high stripe, save 4 scan
1495*77c1e3ccSAndroid Build Coastguard Worker // lines to be used as boundary in the loop restoration process. The
1496*77c1e3ccSAndroid Build Coastguard Worker // lines are saved in rst_internal.stripe_boundary_lines
av1_loop_restoration_save_boundary_lines(const YV12_BUFFER_CONFIG * frame,AV1_COMMON * cm,int after_cdef)1497*77c1e3ccSAndroid Build Coastguard Worker void av1_loop_restoration_save_boundary_lines(const YV12_BUFFER_CONFIG *frame,
1498*77c1e3ccSAndroid Build Coastguard Worker                                               AV1_COMMON *cm, int after_cdef) {
1499*77c1e3ccSAndroid Build Coastguard Worker   const int num_planes = av1_num_planes(cm);
1500*77c1e3ccSAndroid Build Coastguard Worker   const int use_highbd = cm->seq_params->use_highbitdepth;
1501*77c1e3ccSAndroid Build Coastguard Worker   for (int p = 0; p < num_planes; ++p) {
1502*77c1e3ccSAndroid Build Coastguard Worker     save_boundary_lines(frame, use_highbd, p, cm, after_cdef);
1503*77c1e3ccSAndroid Build Coastguard Worker   }
1504*77c1e3ccSAndroid Build Coastguard Worker }
1505