xref: /aosp_15_r20/external/libdav1d/src/cdef_apply_tmpl.c (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1 /*
2  * Copyright © 2018, VideoLAN and dav1d authors
3  * Copyright © 2018, Two Orioles, LLC
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "config.h"
29 
30 #include <string.h>
31 
32 #include "common/intops.h"
33 
34 #include "src/cdef_apply.h"
35 
36 enum Backup2x8Flags {
37     BACKUP_2X8_Y = 1 << 0,
38     BACKUP_2X8_UV = 1 << 1,
39 };
40 
backup2lines(pixel * const dst[3],pixel * const src[3],const ptrdiff_t stride[2],const enum Dav1dPixelLayout layout)41 static void backup2lines(pixel *const dst[3], /*const*/ pixel *const src[3],
42                          const ptrdiff_t stride[2],
43                          const enum Dav1dPixelLayout layout)
44 {
45     const ptrdiff_t y_stride = PXSTRIDE(stride[0]);
46     if (y_stride < 0)
47         pixel_copy(dst[0] + y_stride, src[0] + 7 * y_stride, -2 * y_stride);
48     else
49         pixel_copy(dst[0], src[0] + 6 * y_stride, 2 * y_stride);
50 
51     if (layout != DAV1D_PIXEL_LAYOUT_I400) {
52         const ptrdiff_t uv_stride = PXSTRIDE(stride[1]);
53         if (uv_stride < 0) {
54             const int uv_off = layout == DAV1D_PIXEL_LAYOUT_I420 ? 3 : 7;
55             pixel_copy(dst[1] + uv_stride, src[1] + uv_off * uv_stride, -2 * uv_stride);
56             pixel_copy(dst[2] + uv_stride, src[2] + uv_off * uv_stride, -2 * uv_stride);
57         } else {
58             const int uv_off = layout == DAV1D_PIXEL_LAYOUT_I420 ? 2 : 6;
59             pixel_copy(dst[1], src[1] + uv_off * uv_stride, 2 * uv_stride);
60             pixel_copy(dst[2], src[2] + uv_off * uv_stride, 2 * uv_stride);
61         }
62     }
63 }
64 
backup2x8(pixel dst[3][8][2],pixel * const src[3],const ptrdiff_t src_stride[2],int x_off,const enum Dav1dPixelLayout layout,const enum Backup2x8Flags flag)65 static void backup2x8(pixel dst[3][8][2],
66                       /*const*/ pixel *const src[3],
67                       const ptrdiff_t src_stride[2], int x_off,
68                       const enum Dav1dPixelLayout layout,
69                       const enum Backup2x8Flags flag)
70 {
71     ptrdiff_t y_off = 0;
72     if (flag & BACKUP_2X8_Y) {
73         for (int y = 0; y < 8; y++, y_off += PXSTRIDE(src_stride[0]))
74             pixel_copy(dst[0][y], &src[0][y_off + x_off - 2], 2);
75     }
76 
77     if (layout == DAV1D_PIXEL_LAYOUT_I400 || !(flag & BACKUP_2X8_UV))
78         return;
79 
80     const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
81     const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
82 
83     x_off >>= ss_hor;
84     y_off = 0;
85     for (int y = 0; y < (8 >> ss_ver); y++, y_off += PXSTRIDE(src_stride[1])) {
86         pixel_copy(dst[1][y], &src[1][y_off + x_off - 2], 2);
87         pixel_copy(dst[2][y], &src[2][y_off + x_off - 2], 2);
88     }
89 }
90 
adjust_strength(const int strength,const unsigned var)91 static int adjust_strength(const int strength, const unsigned var) {
92     if (!var) return 0;
93     const int i = var >> 6 ? imin(ulog2(var >> 6), 12) : 0;
94     return (strength * (4 + i) + 8) >> 4;
95 }
96 
bytefn(dav1d_cdef_brow)97 void bytefn(dav1d_cdef_brow)(Dav1dTaskContext *const tc,
98                              pixel *const p[3],
99                              const Av1Filter *const lflvl,
100                              const int by_start, const int by_end,
101                              const int sbrow_start, const int sby)
102 {
103     Dav1dFrameContext *const f = (Dav1dFrameContext *)tc->f;
104     const int bitdepth_min_8 = BITDEPTH == 8 ? 0 : f->cur.p.bpc - 8;
105     const Dav1dDSPContext *const dsp = f->dsp;
106     enum CdefEdgeFlags edges = CDEF_HAVE_BOTTOM | (by_start > 0 ? CDEF_HAVE_TOP : 0);
107     pixel *ptrs[3] = { p[0], p[1], p[2] };
108     const int sbsz = 16;
109     const int sb64w = f->sb128w << 1;
110     const int damping = f->frame_hdr->cdef.damping + bitdepth_min_8;
111     const enum Dav1dPixelLayout layout = f->cur.p.layout;
112     const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;
113     const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
114     const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
115     static const uint8_t uv_dirs[2][8] = { { 0, 1, 2, 3, 4, 5, 6, 7 },
116                                            { 7, 0, 2, 4, 5, 6, 6, 6 } };
117     const uint8_t *uv_dir = uv_dirs[layout == DAV1D_PIXEL_LAYOUT_I422];
118     const int have_tt = f->c->n_tc > 1;
119     const int sb128 = f->seq_hdr->sb128;
120     const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
121     const ptrdiff_t y_stride = PXSTRIDE(f->cur.stride[0]);
122     const ptrdiff_t uv_stride = PXSTRIDE(f->cur.stride[1]);
123 
124     for (int bit = 0, by = by_start; by < by_end; by += 2, edges |= CDEF_HAVE_TOP) {
125         const int tf = tc->top_pre_cdef_toggle;
126         const int by_idx = (by & 30) >> 1;
127         if (by + 2 >= f->bh) edges &= ~CDEF_HAVE_BOTTOM;
128 
129         if ((!have_tt || sbrow_start || by + 2 < by_end) &&
130             edges & CDEF_HAVE_BOTTOM)
131         {
132             // backup pre-filter data for next iteration
133             pixel *const cdef_top_bak[3] = {
134                 f->lf.cdef_line[!tf][0] + have_tt * sby * 4 * y_stride,
135                 f->lf.cdef_line[!tf][1] + have_tt * sby * 8 * uv_stride,
136                 f->lf.cdef_line[!tf][2] + have_tt * sby * 8 * uv_stride
137             };
138             backup2lines(cdef_top_bak, ptrs, f->cur.stride, layout);
139         }
140 
141         ALIGN_STK_16(pixel, lr_bak, 2 /* idx */, [3 /* plane */][8 /* y */][2 /* x */]);
142         pixel *iptrs[3] = { ptrs[0], ptrs[1], ptrs[2] };
143         edges &= ~CDEF_HAVE_LEFT;
144         edges |= CDEF_HAVE_RIGHT;
145         enum Backup2x8Flags prev_flag = 0;
146         for (int sbx = 0, last_skip = 1; sbx < sb64w; sbx++, edges |= CDEF_HAVE_LEFT) {
147             const int sb128x = sbx >> 1;
148             const int sb64_idx = ((by & sbsz) >> 3) + (sbx & 1);
149             const int cdef_idx = lflvl[sb128x].cdef_idx[sb64_idx];
150             if (cdef_idx == -1 ||
151                 (!f->frame_hdr->cdef.y_strength[cdef_idx] &&
152                  !f->frame_hdr->cdef.uv_strength[cdef_idx]))
153             {
154                 last_skip = 1;
155                 goto next_sb;
156             }
157 
158             // Create a complete 32-bit mask for the sb row ahead of time.
159             const uint16_t (*noskip_row)[2] = &lflvl[sb128x].noskip_mask[by_idx];
160             const unsigned noskip_mask = (unsigned) noskip_row[0][1] << 16 |
161                                                     noskip_row[0][0];
162 
163             const int y_lvl = f->frame_hdr->cdef.y_strength[cdef_idx];
164             const int uv_lvl = f->frame_hdr->cdef.uv_strength[cdef_idx];
165             const enum Backup2x8Flags flag = !!y_lvl + (!!uv_lvl << 1);
166 
167             const int y_pri_lvl = (y_lvl >> 2) << bitdepth_min_8;
168             int y_sec_lvl = y_lvl & 3;
169             y_sec_lvl += y_sec_lvl == 3;
170             y_sec_lvl <<= bitdepth_min_8;
171 
172             const int uv_pri_lvl = (uv_lvl >> 2) << bitdepth_min_8;
173             int uv_sec_lvl = uv_lvl & 3;
174             uv_sec_lvl += uv_sec_lvl == 3;
175             uv_sec_lvl <<= bitdepth_min_8;
176 
177             pixel *bptrs[3] = { iptrs[0], iptrs[1], iptrs[2] };
178             for (int bx = sbx * sbsz; bx < imin((sbx + 1) * sbsz, f->bw);
179                  bx += 2, edges |= CDEF_HAVE_LEFT)
180             {
181                 if (bx + 2 >= f->bw) edges &= ~CDEF_HAVE_RIGHT;
182 
183                 // check if this 8x8 block had any coded coefficients; if not,
184                 // go to the next block
185                 const uint32_t bx_mask = 3U << (bx & 30);
186                 if (!(noskip_mask & bx_mask)) {
187                     last_skip = 1;
188                     goto next_b;
189                 }
190                 const int do_left = last_skip ? flag : (prev_flag ^ flag) & flag;
191                 prev_flag = flag;
192                 if (do_left && edges & CDEF_HAVE_LEFT) {
193                     // we didn't backup the prefilter data because it wasn't
194                     // there, so do it here instead
195                     backup2x8(lr_bak[bit], bptrs, f->cur.stride, 0, layout, do_left);
196                 }
197                 if (edges & CDEF_HAVE_RIGHT) {
198                     // backup pre-filter data for next iteration
199                     backup2x8(lr_bak[!bit], bptrs, f->cur.stride, 8, layout, flag);
200                 }
201 
202                 int dir;
203                 unsigned variance;
204                 if (y_pri_lvl || uv_pri_lvl)
205                     dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0],
206                                         &variance HIGHBD_CALL_SUFFIX);
207 
208                 const pixel *top, *bot;
209                 ptrdiff_t offset;
210 
211                 if (!have_tt) goto st_y;
212                 if (sbrow_start && by == by_start) {
213                     if (resize) {
214                         offset = (sby - 1) * 4 * y_stride + bx * 4;
215                         top = &f->lf.cdef_lpf_line[0][offset];
216                     } else {
217                         offset = (sby * (4 << sb128) - 4) * y_stride + bx * 4;
218                         top = &f->lf.lr_lpf_line[0][offset];
219                     }
220                     bot = bptrs[0] + 8 * y_stride;
221                 } else if (!sbrow_start && by + 2 >= by_end) {
222                     top = &f->lf.cdef_line[tf][0][sby * 4 * y_stride + bx * 4];
223                     if (resize) {
224                         offset = (sby * 4 + 2) * y_stride + bx * 4;
225                         bot = &f->lf.cdef_lpf_line[0][offset];
226                     } else {
227                         const int line = sby * (4 << sb128) + 4 * sb128 + 2;
228                         offset = line * y_stride + bx * 4;
229                         bot = &f->lf.lr_lpf_line[0][offset];
230                     }
231                 } else {
232             st_y:;
233                     offset = sby * 4 * y_stride;
234                     top = &f->lf.cdef_line[tf][0][have_tt * offset + bx * 4];
235                     bot = bptrs[0] + 8 * y_stride;
236                 }
237                 if (y_pri_lvl) {
238                     const int adj_y_pri_lvl = adjust_strength(y_pri_lvl, variance);
239                     if (adj_y_pri_lvl || y_sec_lvl)
240                         dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
241                                         top, bot, adj_y_pri_lvl, y_sec_lvl,
242                                         dir, damping, edges HIGHBD_CALL_SUFFIX);
243                 } else if (y_sec_lvl)
244                     dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
245                                     top, bot, 0, y_sec_lvl, 0, damping,
246                                     edges HIGHBD_CALL_SUFFIX);
247 
248                 if (!uv_lvl) goto skip_uv;
249                 assert(layout != DAV1D_PIXEL_LAYOUT_I400);
250 
251                 const int uvdir = uv_pri_lvl ? uv_dir[dir] : 0;
252                 for (int pl = 1; pl <= 2; pl++) {
253                     if (!have_tt) goto st_uv;
254                     if (sbrow_start && by == by_start) {
255                         if (resize) {
256                             offset = (sby - 1) * 4 * uv_stride + (bx * 4 >> ss_hor);
257                             top = &f->lf.cdef_lpf_line[pl][offset];
258                         } else {
259                             const int line = sby * (4 << sb128) - 4;
260                             offset = line * uv_stride + (bx * 4 >> ss_hor);
261                             top = &f->lf.lr_lpf_line[pl][offset];
262                         }
263                         bot = bptrs[pl] + (8 >> ss_ver) * uv_stride;
264                     } else if (!sbrow_start && by + 2 >= by_end) {
265                         const ptrdiff_t top_offset = sby * 8 * uv_stride +
266                                                      (bx * 4 >> ss_hor);
267                         top = &f->lf.cdef_line[tf][pl][top_offset];
268                         if (resize) {
269                             offset = (sby * 4 + 2) * uv_stride + (bx * 4 >> ss_hor);
270                             bot = &f->lf.cdef_lpf_line[pl][offset];
271                         } else {
272                             const int line = sby * (4 << sb128) + 4 * sb128 + 2;
273                             offset = line * uv_stride + (bx * 4 >> ss_hor);
274                             bot = &f->lf.lr_lpf_line[pl][offset];
275                         }
276                     } else {
277                 st_uv:;
278                         const ptrdiff_t offset = sby * 8 * uv_stride;
279                         top = &f->lf.cdef_line[tf][pl][have_tt * offset + (bx * 4 >> ss_hor)];
280                         bot = bptrs[pl] + (8 >> ss_ver) * uv_stride;
281                     }
282                     dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1],
283                                          lr_bak[bit][pl], top, bot,
284                                          uv_pri_lvl, uv_sec_lvl, uvdir,
285                                          damping - 1, edges HIGHBD_CALL_SUFFIX);
286                 }
287 
288             skip_uv:
289                 bit ^= 1;
290                 last_skip = 0;
291 
292             next_b:
293                 bptrs[0] += 8;
294                 bptrs[1] += 8 >> ss_hor;
295                 bptrs[2] += 8 >> ss_hor;
296             }
297 
298         next_sb:
299             iptrs[0] += sbsz * 4;
300             iptrs[1] += sbsz * 4 >> ss_hor;
301             iptrs[2] += sbsz * 4 >> ss_hor;
302         }
303 
304         ptrs[0] += 8 * PXSTRIDE(f->cur.stride[0]);
305         ptrs[1] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
306         ptrs[2] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
307         tc->top_pre_cdef_toggle ^= 1;
308     }
309 }
310