xref: /aosp_15_r20/external/libdav1d/tests/checkasm/mc.c (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1 /*
2  * Copyright © 2018, VideoLAN and dav1d authors
3  * Copyright © 2018, Two Orioles, LLC
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "tests/checkasm/checkasm.h"
29 
30 #include "src/levels.h"
31 #include "src/mc.h"
32 
33 static const char *const filter_names[] = {
34     "8tap_regular",        "8tap_regular_smooth", "8tap_regular_sharp",
35     "8tap_sharp_regular",  "8tap_sharp_smooth",   "8tap_sharp",
36     "8tap_smooth_regular", "8tap_smooth",         "8tap_smooth_sharp",
37     "bilinear"
38 };
39 
40 static const char *const mxy_names[] = { "0", "h", "v", "hv" };
41 static const char *const scaled_paths[] = { "", "_dy1", "_dy2" };
42 
mc_h_next(const int h)43 static int mc_h_next(const int h) {
44     switch (h) {
45     case 4:
46     case 8:
47     case 16:
48         return (h * 3) >> 1;
49     case 6:
50     case 12:
51     case 24:
52         return (h & (h - 1)) * 2;
53     default:
54         return h * 2;
55     }
56 }
57 
check_mc(Dav1dMCDSPContext * const c)58 static void check_mc(Dav1dMCDSPContext *const c) {
59     ALIGN_STK_64(pixel, src_buf, 135 * 135,);
60     PIXEL_RECT(c_dst, 128, 128);
61     PIXEL_RECT(a_dst, 128, 128);
62     const pixel *src = src_buf + 135 * 3 + 3;
63     const ptrdiff_t src_stride = 135 * sizeof(pixel);
64 
65     declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *src,
66                  ptrdiff_t src_stride, int w, int h, int mx, int my
67                  HIGHBD_DECL_SUFFIX);
68 
69     for (int filter = 0; filter < N_2D_FILTERS; filter++)
70         for (int w = 2; w <= 128; w <<= 1) {
71             for (int mxy = 0; mxy < 4; mxy++)
72                 if (check_func(c->mc[filter], "mc_%s_w%d_%s_%dbpc",
73                     filter_names[filter], w, mxy_names[mxy], BITDEPTH))
74                 {
75                     const int h_min = w <= 32 ? 2 : w / 4;
76                     const int h_max = imax(imin(w * 4, 128), 32);
77                     for (int h = h_min; h <= h_max; h = mc_h_next(h)) {
78                         const int mx = (mxy & 1) ? rnd() % 15 + 1 : 0;
79                         const int my = (mxy & 2) ? rnd() % 15 + 1 : 0;
80 #if BITDEPTH == 16
81                         const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
82 #else
83                         const int bitdepth_max = 0xff;
84 #endif
85 
86                         for (int i = 0; i < 135 * 135; i++)
87                             src_buf[i] = rnd() & bitdepth_max;
88 
89                         CLEAR_PIXEL_RECT(c_dst);
90                         CLEAR_PIXEL_RECT(a_dst);
91 
92                         call_ref(c_dst, c_dst_stride, src, src_stride, w, h,
93                                  mx, my HIGHBD_TAIL_SUFFIX);
94                         call_new(a_dst, a_dst_stride, src, src_stride, w, h,
95                                  mx, my HIGHBD_TAIL_SUFFIX);
96                         checkasm_check_pixel_padded(c_dst, c_dst_stride,
97                                                     a_dst, a_dst_stride,
98                                                     w, h, "dst");
99 
100                         if (filter == FILTER_2D_8TAP_REGULAR ||
101                             filter == FILTER_2D_8TAP_SHARP ||
102                             filter == FILTER_2D_BILINEAR)
103                         {
104                             bench_new(a_dst, a_dst_stride, src, src_stride, w, h,
105                                       mx, my HIGHBD_TAIL_SUFFIX);
106                         }
107                     }
108                 }
109         }
110     report("mc");
111 }
112 
113 /* Generate worst case input in the topleft corner, randomize the rest */
generate_mct_input(pixel * const buf,const int bitdepth_max)114 static void generate_mct_input(pixel *const buf, const int bitdepth_max) {
115     static const int8_t pattern[8] = { -1,  0, -1,  0,  0, -1,  0, -1 };
116     const int sign = -(rnd() & 1);
117 
118     for (int y = 0; y < 135; y++)
119         for (int x = 0; x < 135; x++)
120             buf[135*y+x] = ((x | y) < 8 ? (pattern[x] ^ pattern[y] ^ sign)
121                                         : rnd()) & bitdepth_max;
122 }
123 
check_mct(Dav1dMCDSPContext * const c)124 static void check_mct(Dav1dMCDSPContext *const c) {
125     ALIGN_STK_64(pixel, src_buf, 135 * 135,);
126     ALIGN_STK_64(int16_t, c_tmp, 128 * 128,);
127     ALIGN_STK_64(int16_t, a_tmp, 128 * 128,);
128     const pixel *src = src_buf + 135 * 3 + 3;
129     const ptrdiff_t src_stride = 135 * sizeof(pixel);
130 
131     declare_func(void, int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
132                  int w, int h, int mx, int my HIGHBD_DECL_SUFFIX);
133 
134     for (int filter = 0; filter < N_2D_FILTERS; filter++)
135         for (int w = 4; w <= 128; w <<= 1)
136             for (int mxy = 0; mxy < 4; mxy++)
137                 if (check_func(c->mct[filter], "mct_%s_w%d_%s_%dbpc",
138                     filter_names[filter], w, mxy_names[mxy], BITDEPTH))
139                     for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
140                     {
141                         const int mx = (mxy & 1) ? rnd() % 15 + 1 : 0;
142                         const int my = (mxy & 2) ? rnd() % 15 + 1 : 0;
143 #if BITDEPTH == 16
144                         const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
145 #else
146                         const int bitdepth_max = 0xff;
147 #endif
148                         generate_mct_input(src_buf, bitdepth_max);
149 
150                         call_ref(c_tmp, src, src_stride, w, h,
151                                  mx, my HIGHBD_TAIL_SUFFIX);
152                         call_new(a_tmp, src, src_stride, w, h,
153                                  mx, my HIGHBD_TAIL_SUFFIX);
154                         checkasm_check(int16_t, c_tmp, w * sizeof(*c_tmp),
155                                                 a_tmp, w * sizeof(*a_tmp),
156                                                 w, h, "tmp");
157 
158                         if (filter == FILTER_2D_8TAP_REGULAR ||
159                             filter == FILTER_2D_8TAP_SHARP ||
160                             filter == FILTER_2D_BILINEAR)
161                         {
162                             bench_new(a_tmp, src, src_stride, w, h,
163                                       mx, my HIGHBD_TAIL_SUFFIX);
164                         }
165                     }
166     report("mct");
167 }
168 
check_mc_scaled(Dav1dMCDSPContext * const c)169 static void check_mc_scaled(Dav1dMCDSPContext *const c) {
170     ALIGN_STK_64(pixel, src_buf, 263 * 263,);
171     PIXEL_RECT(c_dst, 128, 128);
172     PIXEL_RECT(a_dst, 128, 128);
173     const pixel *src = src_buf + 263 * 3 + 3;
174     const ptrdiff_t src_stride = 263 * sizeof(pixel);
175 #if BITDEPTH == 16
176     const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
177 #else
178     const int bitdepth_max = 0xff;
179 #endif
180 
181     declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *src,
182                  ptrdiff_t src_stride, int w, int h,
183                  int mx, int my, int dx, int dy HIGHBD_DECL_SUFFIX);
184 
185     for (int filter = 0; filter < N_2D_FILTERS; filter++)
186         for (int w = 2; w <= 128; w <<= 1) {
187             for (int p = 0; p < 3; ++p) {
188                 if (check_func(c->mc_scaled[filter], "mc_scaled_%s_w%d%s_%dbpc",
189                                filter_names[filter], w, scaled_paths[p], BITDEPTH))
190                 {
191                     const int h_min = w <= 32 ? 2 : w / 4;
192                     const int h_max = imax(imin(w * 4, 128), 32);
193                     for (int h = h_min; h <= h_max; h = mc_h_next(h)) {
194                         const int mx = rnd() % 1024;
195                         const int my = rnd() % 1024;
196                         const int dx = rnd() % 2048 + 1;
197                         const int dy = !p
198                             ? rnd() % 2048 + 1
199                             : p << 10; // ystep=1.0 and ystep=2.0 paths
200 
201                         for (int k = 0; k < 263 * 263; k++)
202                             src_buf[k] = rnd() & bitdepth_max;
203 
204                         CLEAR_PIXEL_RECT(c_dst);
205                         CLEAR_PIXEL_RECT(a_dst);
206 
207                         call_ref(c_dst, c_dst_stride, src, src_stride,
208                                  w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
209                         call_new(a_dst, a_dst_stride, src, src_stride,
210                                  w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
211                         checkasm_check_pixel_padded(c_dst, c_dst_stride,
212                                                     a_dst, a_dst_stride,
213                                                     w, h, "dst");
214 
215                         if (filter == FILTER_2D_8TAP_REGULAR ||
216                             filter == FILTER_2D_BILINEAR)
217                             bench_new(a_dst, a_dst_stride, src, src_stride,
218                                       w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
219                     }
220                 }
221             }
222         }
223     report("mc_scaled");
224 }
225 
check_mct_scaled(Dav1dMCDSPContext * const c)226 static void check_mct_scaled(Dav1dMCDSPContext *const c) {
227     ALIGN_STK_64(pixel, src_buf, 263 * 263,);
228     ALIGN_STK_64(int16_t, c_tmp,   128 * 128,);
229     ALIGN_STK_64(int16_t, a_tmp,   128 * 128,);
230     const pixel *src = src_buf + 263 * 3 + 3;
231     const ptrdiff_t src_stride = 263 * sizeof(pixel);
232 #if BITDEPTH == 16
233     const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
234 #else
235     const int bitdepth_max = 0xff;
236 #endif
237 
238     declare_func(void, int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
239                  int w, int h, int mx, int my, int dx, int dy HIGHBD_DECL_SUFFIX);
240 
241     for (int filter = 0; filter < N_2D_FILTERS; filter++)
242         for (int w = 4; w <= 128; w <<= 1)
243             for (int p = 0; p < 3; ++p) {
244                 if (check_func(c->mct_scaled[filter], "mct_scaled_%s_w%d%s_%dbpc",
245                                filter_names[filter], w, scaled_paths[p], BITDEPTH))
246                 {
247                     const int h_min = imax(w / 4, 4);
248                     const int h_max = imin(w * 4, 128);
249                     for (int h = h_min; h <= h_max; h = mc_h_next(h)) {
250                         const int mx = rnd() % 1024;
251                         const int my = rnd() % 1024;
252                         const int dx = rnd() % 2048 + 1;
253                         const int dy = !p
254                             ? rnd() % 2048 + 1
255                             : p << 10; // ystep=1.0 and ystep=2.0 paths
256 
257                         for (int k = 0; k < 263 * 263; k++)
258                             src_buf[k] = rnd() & bitdepth_max;
259 
260                         call_ref(c_tmp, src, src_stride,
261                                  w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
262                         call_new(a_tmp, src, src_stride,
263                                  w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
264                         checkasm_check(int16_t, c_tmp, w * sizeof(*c_tmp),
265                                                 a_tmp, w * sizeof(*a_tmp),
266                                                 w, h, "tmp");
267 
268                         if (filter == FILTER_2D_8TAP_REGULAR ||
269                             filter == FILTER_2D_BILINEAR)
270                             bench_new(a_tmp, src, src_stride,
271                                       w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
272                     }
273                 }
274             }
275     report("mct_scaled");
276 }
277 
init_tmp(Dav1dMCDSPContext * const c,pixel * const buf,int16_t (* const tmp)[128* 128],const int bitdepth_max)278 static void init_tmp(Dav1dMCDSPContext *const c, pixel *const buf,
279                      int16_t (*const tmp)[128 * 128], const int bitdepth_max)
280 {
281     for (int i = 0; i < 2; i++) {
282         generate_mct_input(buf, bitdepth_max);
283         c->mct[FILTER_2D_8TAP_SHARP](tmp[i], buf + 135 * 3 + 3,
284                                       135 * sizeof(pixel), 128, 128,
285                                       8, 8 HIGHBD_TAIL_SUFFIX);
286     }
287 }
288 
check_avg(Dav1dMCDSPContext * const c)289 static void check_avg(Dav1dMCDSPContext *const c) {
290     ALIGN_STK_64(int16_t, tmp, 2, [128 * 128]);
291     PIXEL_RECT(c_dst, 135, 135);
292     PIXEL_RECT(a_dst, 128, 128);
293 
294     declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
295                  const int16_t *tmp2, int w, int h HIGHBD_DECL_SUFFIX);
296 
297     for (int w = 4; w <= 128; w <<= 1)
298         if (check_func(c->avg, "avg_w%d_%dbpc", w, BITDEPTH)) {
299             for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
300             {
301 #if BITDEPTH == 16
302                 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
303 #else
304                 const int bitdepth_max = 0xff;
305 #endif
306 
307                 init_tmp(c, c_dst, tmp, bitdepth_max);
308 
309                 CLEAR_PIXEL_RECT(c_dst);
310                 CLEAR_PIXEL_RECT(a_dst);
311 
312                 call_ref(c_dst, c_dst_stride, tmp[0], tmp[1], w, h HIGHBD_TAIL_SUFFIX);
313                 call_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h HIGHBD_TAIL_SUFFIX);
314                 checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
315                                             w, h, "dst");
316 
317                 bench_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h HIGHBD_TAIL_SUFFIX);
318             }
319         }
320     report("avg");
321 }
322 
check_w_avg(Dav1dMCDSPContext * const c)323 static void check_w_avg(Dav1dMCDSPContext *const c) {
324     ALIGN_STK_64(int16_t, tmp, 2, [128 * 128]);
325     PIXEL_RECT(c_dst, 135, 135);
326     PIXEL_RECT(a_dst, 128, 128);
327 
328     declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
329                  const int16_t *tmp2, int w, int h, int weight HIGHBD_DECL_SUFFIX);
330 
331     for (int w = 4; w <= 128; w <<= 1)
332         if (check_func(c->w_avg, "w_avg_w%d_%dbpc", w, BITDEPTH)) {
333             for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
334             {
335                 int weight = rnd() % 15 + 1;
336 #if BITDEPTH == 16
337                 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
338 #else
339                 const int bitdepth_max = 0xff;
340 #endif
341                 init_tmp(c, c_dst, tmp, bitdepth_max);
342 
343                 CLEAR_PIXEL_RECT(c_dst);
344                 CLEAR_PIXEL_RECT(a_dst);
345 
346                 call_ref(c_dst, c_dst_stride, tmp[0], tmp[1], w, h, weight HIGHBD_TAIL_SUFFIX);
347                 call_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h, weight HIGHBD_TAIL_SUFFIX);
348                 checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
349                                             w, h, "dst");
350 
351                 bench_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h, weight HIGHBD_TAIL_SUFFIX);
352             }
353         }
354     report("w_avg");
355 }
356 
check_mask(Dav1dMCDSPContext * const c)357 static void check_mask(Dav1dMCDSPContext *const c) {
358     ALIGN_STK_64(int16_t, tmp, 2, [128 * 128]);
359     PIXEL_RECT(c_dst, 135, 135);
360     PIXEL_RECT(a_dst, 128, 128);
361     ALIGN_STK_64(uint8_t, mask,  128 * 128,);
362 
363     for (int i = 0; i < 128 * 128; i++)
364         mask[i] = rnd() % 65;
365 
366     declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
367                  const int16_t *tmp2, int w, int h, const uint8_t *mask
368                  HIGHBD_DECL_SUFFIX);
369 
370     for (int w = 4; w <= 128; w <<= 1)
371         if (check_func(c->mask, "mask_w%d_%dbpc", w, BITDEPTH)) {
372             for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
373             {
374 #if BITDEPTH == 16
375                 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
376 #else
377                 const int bitdepth_max = 0xff;
378 #endif
379                 init_tmp(c, c_dst, tmp, bitdepth_max);
380 
381                 CLEAR_PIXEL_RECT(c_dst);
382                 CLEAR_PIXEL_RECT(a_dst);
383 
384                 call_ref(c_dst, c_dst_stride, tmp[0], tmp[1], w, h, mask HIGHBD_TAIL_SUFFIX);
385                 call_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h, mask HIGHBD_TAIL_SUFFIX);
386                 checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
387                                             w, h, "dst");
388 
389                 bench_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h, mask HIGHBD_TAIL_SUFFIX);
390             }
391         }
392     report("mask");
393 }
394 
check_w_mask(Dav1dMCDSPContext * const c)395 static void check_w_mask(Dav1dMCDSPContext *const c) {
396     ALIGN_STK_64(int16_t, tmp, 2, [128 * 128]);
397     PIXEL_RECT(c_dst, 135, 135);
398     PIXEL_RECT(a_dst, 128, 128);
399     ALIGN_STK_64(uint8_t, c_mask, 128 * 128,);
400     ALIGN_STK_64(uint8_t, a_mask, 128 * 128,);
401 
402     declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
403                  const int16_t *tmp2, int w, int h, uint8_t *mask, int sign
404                  HIGHBD_DECL_SUFFIX);
405 
406     static const uint16_t ss[] = { 444, 422, 420 };
407     static const uint8_t ss_hor[] = { 0, 1, 1 };
408     static const uint8_t ss_ver[] = { 0, 0, 1 };
409 
410     for (int i = 0; i < 3; i++)
411         for (int w = 4; w <= 128; w <<= 1)
412             if (check_func(c->w_mask[i], "w_mask_%d_w%d_%dbpc", ss[i], w,
413                            BITDEPTH))
414             {
415                 for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
416                 {
417                     int sign = rnd() & 1;
418 #if BITDEPTH == 16
419                     const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
420 #else
421                     const int bitdepth_max = 0xff;
422 #endif
423                     init_tmp(c, c_dst, tmp, bitdepth_max);
424 
425                     CLEAR_PIXEL_RECT(c_dst);
426                     CLEAR_PIXEL_RECT(a_dst);
427 
428                     call_ref(c_dst, c_dst_stride, tmp[0], tmp[1], w, h,
429                              c_mask, sign HIGHBD_TAIL_SUFFIX);
430                     call_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h,
431                              a_mask, sign HIGHBD_TAIL_SUFFIX);
432                     checkasm_check_pixel_padded(c_dst, c_dst_stride,
433                                                 a_dst, a_dst_stride,
434                                                 w, h, "dst");
435                     checkasm_check(uint8_t, c_mask, w >> ss_hor[i],
436                                             a_mask, w >> ss_hor[i],
437                                             w >> ss_hor[i], h >> ss_ver[i],
438                                             "mask");
439 
440                     bench_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h,
441                               a_mask, sign HIGHBD_TAIL_SUFFIX);
442                 }
443             }
444     report("w_mask");
445 }
446 
check_blend(Dav1dMCDSPContext * const c)447 static void check_blend(Dav1dMCDSPContext *const c) {
448     ALIGN_STK_64(pixel, tmp, 32 * 32,);
449     PIXEL_RECT(c_dst, 32, 32);
450     PIXEL_RECT(a_dst, 32, 32);
451     ALIGN_STK_64(uint8_t, mask, 32 * 32,);
452 
453     declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *tmp,
454                  int w, int h, const uint8_t *mask);
455 
456     for (int w = 4; w <= 32; w <<= 1) {
457         if (check_func(c->blend, "blend_w%d_%dbpc", w, BITDEPTH))
458             for (int h = imax(w / 2, 4); h <= imin(w * 2, 32); h <<= 1) {
459 #if BITDEPTH == 16
460                 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
461 #else
462                 const int bitdepth_max = 0xff;
463 #endif
464                 for (int i = 0; i < 32 * 32; i++) {
465                     tmp[i] = rnd() & bitdepth_max;
466                     mask[i] = rnd() % 65;
467                 }
468 
469                 CLEAR_PIXEL_RECT(c_dst);
470                 CLEAR_PIXEL_RECT(a_dst);
471 
472                 for (int y = 0; y < h; y++)
473                     for (int x = 0; x < w; x++)
474                         c_dst[y*PXSTRIDE(c_dst_stride) + x] =
475                         a_dst[y*PXSTRIDE(a_dst_stride) + x] = rnd() & bitdepth_max;
476 
477                 call_ref(c_dst, c_dst_stride, tmp, w, h, mask);
478                 call_new(a_dst, a_dst_stride, tmp, w, h, mask);
479                 checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
480                                             w, h, "dst");
481 
482                 bench_new(alternate(c_dst, a_dst), a_dst_stride, tmp, w, h, mask);
483             }
484     }
485     report("blend");
486 }
487 
check_blend_v(Dav1dMCDSPContext * const c)488 static void check_blend_v(Dav1dMCDSPContext *const c) {
489     ALIGN_STK_64(pixel, tmp,   32 * 128,);
490     PIXEL_RECT(c_dst, 32, 128);
491     PIXEL_RECT(a_dst, 32, 128);
492 
493     declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *tmp,
494                  int w, int h);
495 
496     for (int w = 2; w <= 32; w <<= 1) {
497         if (check_func(c->blend_v, "blend_v_w%d_%dbpc", w, BITDEPTH))
498             for (int h = 2; h <= (w == 2 ? 64 : 128); h <<= 1) {
499 #if BITDEPTH == 16
500                 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
501 #else
502                 const int bitdepth_max = 0xff;
503 #endif
504 
505                 CLEAR_PIXEL_RECT(c_dst);
506                 CLEAR_PIXEL_RECT(a_dst);
507 
508                 for (int y = 0; y < h; y++)
509                     for (int x = 0; x < w; x++)
510                         c_dst[y*PXSTRIDE(c_dst_stride) + x] =
511                         a_dst[y*PXSTRIDE(a_dst_stride) + x] = rnd() & bitdepth_max;
512 
513                 for (int i = 0; i < 32 * 128; i++)
514                     tmp[i] = rnd() & bitdepth_max;
515 
516                 call_ref(c_dst, c_dst_stride, tmp, w, h);
517                 call_new(a_dst, a_dst_stride, tmp, w, h);
518                 checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
519                                             w, h, "dst");
520 
521                 bench_new(alternate(c_dst, a_dst), a_dst_stride, tmp, w, h);
522             }
523     }
524     report("blend_v");
525 }
526 
check_blend_h(Dav1dMCDSPContext * const c)527 static void check_blend_h(Dav1dMCDSPContext *const c) {
528     ALIGN_STK_64(pixel, tmp,   128 * 32,);
529     PIXEL_RECT(c_dst, 128, 32);
530     PIXEL_RECT(a_dst, 128, 32);
531 
532     declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *tmp,
533                  int w, int h);
534 
535     for (int w = 2; w <= 128; w <<= 1) {
536         if (check_func(c->blend_h, "blend_h_w%d_%dbpc", w, BITDEPTH))
537             for (int h = (w == 128 ? 4 : 2); h <= 32; h <<= 1) {
538 #if BITDEPTH == 16
539                 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
540 #else
541                 const int bitdepth_max = 0xff;
542 #endif
543                 CLEAR_PIXEL_RECT(c_dst);
544                 CLEAR_PIXEL_RECT(a_dst);
545 
546                 for (int y = 0; y < h; y++)
547                     for (int x = 0; x < w; x++)
548                         c_dst[y*PXSTRIDE(c_dst_stride) + x] =
549                         a_dst[y*PXSTRIDE(a_dst_stride) + x] = rnd() & bitdepth_max;
550 
551                 for (int i = 0; i < 128 * 32; i++)
552                     tmp[i] = rnd() & bitdepth_max;
553 
554                 call_ref(c_dst, c_dst_stride, tmp, w, h);
555                 call_new(a_dst, a_dst_stride, tmp, w, h);
556                 checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
557                                             w, h, "dst");
558 
559                 bench_new(alternate(c_dst, a_dst), a_dst_stride, tmp, w, h);
560             }
561     }
562     report("blend_h");
563 }
564 
check_warp8x8(Dav1dMCDSPContext * const c)565 static void check_warp8x8(Dav1dMCDSPContext *const c) {
566     ALIGN_STK_64(pixel, src_buf, 15 * 15,);
567     PIXEL_RECT(c_dst, 8, 8);
568     PIXEL_RECT(a_dst, 8, 8);
569     int16_t abcd[4];
570     const pixel *src = src_buf + 15 * 3 + 3;
571     const ptrdiff_t src_stride = 15 * sizeof(pixel);
572 
573     declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *src,
574                  ptrdiff_t src_stride, const int16_t *abcd, int mx, int my
575                  HIGHBD_DECL_SUFFIX);
576 
577     if (check_func(c->warp8x8, "warp_8x8_%dbpc", BITDEPTH)) {
578         const int mx = (rnd() & 0x1fff) - 0xa00;
579         const int my = (rnd() & 0x1fff) - 0xa00;
580 #if BITDEPTH == 16
581         const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
582 #else
583         const int bitdepth_max = 0xff;
584 #endif
585 
586         for (int i = 0; i < 4; i++)
587             abcd[i] = (rnd() & 0x1fff) - 0xa00;
588 
589         for (int i = 0; i < 15 * 15; i++)
590             src_buf[i] = rnd() & bitdepth_max;
591 
592         CLEAR_PIXEL_RECT(c_dst);
593         CLEAR_PIXEL_RECT(a_dst);
594 
595         call_ref(c_dst, c_dst_stride, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
596         call_new(a_dst, a_dst_stride, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
597         checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
598                                     8, 8, "dst");
599 
600         bench_new(a_dst, a_dst_stride, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
601     }
602     report("warp8x8");
603 }
604 
check_warp8x8t(Dav1dMCDSPContext * const c)605 static void check_warp8x8t(Dav1dMCDSPContext *const c) {
606     ALIGN_STK_64(pixel, src_buf, 15 * 15,);
607     ALIGN_STK_64(int16_t, c_tmp,  8 *  8,);
608     ALIGN_STK_64(int16_t, a_tmp,  8 *  8,);
609     int16_t abcd[4];
610     const pixel *src = src_buf + 15 * 3 + 3;
611     const ptrdiff_t src_stride = 15 * sizeof(pixel);
612 
613     declare_func(void, int16_t *tmp, ptrdiff_t tmp_stride, const pixel *src,
614                  ptrdiff_t src_stride, const int16_t *abcd, int mx, int my
615                  HIGHBD_DECL_SUFFIX);
616 
617     if (check_func(c->warp8x8t, "warp_8x8t_%dbpc", BITDEPTH)) {
618         const int mx = (rnd() & 0x1fff) - 0xa00;
619         const int my = (rnd() & 0x1fff) - 0xa00;
620 #if BITDEPTH == 16
621         const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
622 #else
623         const int bitdepth_max = 0xff;
624 #endif
625 
626         for (int i = 0; i < 4; i++)
627             abcd[i] = (rnd() & 0x1fff) - 0xa00;
628 
629         for (int i = 0; i < 15 * 15; i++)
630             src_buf[i] = rnd() & bitdepth_max;
631 
632         call_ref(c_tmp, 8, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
633         call_new(a_tmp, 8, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
634         checkasm_check(int16_t, c_tmp, 8 * sizeof(*c_tmp),
635                                 a_tmp, 8 * sizeof(*a_tmp),
636                                 8, 8, "tmp");
637 
638         bench_new(a_tmp, 8, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
639     }
640     report("warp8x8t");
641 }
642 
643 enum EdgeFlags {
644     HAVE_TOP = 1,
645     HAVE_BOTTOM = 2,
646     HAVE_LEFT = 4,
647     HAVE_RIGHT = 8,
648 };
649 
random_offset_for_edge(int * const x,int * const y,const int bw,const int bh,int * const iw,int * const ih,const enum EdgeFlags edge)650 static void random_offset_for_edge(int *const x, int *const y,
651                                    const int bw, const int bh,
652                                    int *const iw, int *const ih,
653                                    const enum EdgeFlags edge)
654 {
655 #define set_off(edge1, edge2, pos, dim) \
656     *i##dim = edge & (HAVE_##edge1 | HAVE_##edge2) ? 160 : 1 + (rnd() % (b##dim - 2)); \
657     switch (edge & (HAVE_##edge1 | HAVE_##edge2)) { \
658     case HAVE_##edge1 | HAVE_##edge2: \
659         assert(b##dim <= *i##dim); \
660         *pos = rnd() % (*i##dim - b##dim + 1); \
661         break; \
662     case HAVE_##edge1: \
663         *pos = (*i##dim - b##dim) + 1 + (rnd() % (b##dim - 1)); \
664         break; \
665     case HAVE_##edge2: \
666         *pos = -(1 + (rnd() % (b##dim - 1))); \
667         break; \
668     case 0: \
669         assert(b##dim - 1 > *i##dim); \
670         *pos = -(1 + (rnd() % (b##dim - *i##dim - 1))); \
671         break; \
672     }
673     set_off(LEFT, RIGHT, x, w);
674     set_off(TOP, BOTTOM, y, h);
675 }
676 
check_emuedge(Dav1dMCDSPContext * const c)677 static void check_emuedge(Dav1dMCDSPContext *const c) {
678     ALIGN_STK_64(pixel, c_dst, 135 * 192,);
679     ALIGN_STK_64(pixel, a_dst, 135 * 192,);
680     ALIGN_STK_64(pixel, src,   160 * 160,);
681 
682     for (int i = 0; i < 160 * 160; i++)
683         src[i] = rnd() & ((1U << BITDEPTH) - 1);
684 
685     declare_func(void, intptr_t bw, intptr_t bh, intptr_t iw, intptr_t ih,
686                  intptr_t x, intptr_t y,
687                  pixel *dst, ptrdiff_t dst_stride,
688                  const pixel *src, ptrdiff_t src_stride);
689 
690     int x, y, iw, ih;
691     for (int w = 4; w <= 128; w <<= 1)
692         if (check_func(c->emu_edge, "emu_edge_w%d_%dbpc", w, BITDEPTH)) {
693             for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1) {
694                 // we skip 0xf, since it implies that we don't need emu_edge
695                 for (enum EdgeFlags edge = 0; edge < 0xf; edge++) {
696                     const int bw = w + (rnd() & 7);
697                     const int bh = h + (rnd() & 7);
698                     random_offset_for_edge(&x, &y, bw, bh, &iw, &ih, edge);
699                     call_ref(bw, bh, iw, ih, x, y,
700                              c_dst, 192 * sizeof(pixel), src, 160 * sizeof(pixel));
701                     call_new(bw, bh, iw, ih, x, y,
702                              a_dst, 192 * sizeof(pixel), src, 160 * sizeof(pixel));
703                     checkasm_check_pixel(c_dst, 192 * sizeof(pixel),
704                                          a_dst, 192 * sizeof(pixel),
705                                          bw, bh, "dst");
706                 }
707             }
708             for (enum EdgeFlags edge = 1; edge < 0xf; edge <<= 1) {
709                 random_offset_for_edge(&x, &y, w + 7, w + 7, &iw, &ih, edge);
710                 bench_new(w + 7, w + 7, iw, ih, x, y,
711                           a_dst, 192 * sizeof(pixel), src, 160 * sizeof(pixel));
712             }
713         }
714     report("emu_edge");
715 }
716 
get_upscale_x0(const int in_w,const int out_w,const int step)717 static int get_upscale_x0(const int in_w, const int out_w, const int step) {
718     const int err = out_w * step - (in_w << 14);
719     const int x0 = (-((out_w - in_w) << 13) + (out_w >> 1)) / out_w + 128 - (err >> 1);
720     return x0 & 0x3fff;
721 }
722 
check_resize(Dav1dMCDSPContext * const c)723 static void check_resize(Dav1dMCDSPContext *const c) {
724     PIXEL_RECT(c_dst, 1024, 64);
725     PIXEL_RECT(a_dst, 1024, 64);
726     ALIGN_STK_64(pixel, src, 512 * 64,);
727 
728     const int height = 64;
729     const int max_src_width = 512;
730     const ptrdiff_t src_stride = 512 * sizeof(pixel);
731 
732     declare_func(void, pixel *dst, ptrdiff_t dst_stride,
733                  const pixel *src, ptrdiff_t src_stride,
734                  int dst_w, int src_w, int h, int dx, int mx0
735                  HIGHBD_DECL_SUFFIX);
736 
737     if (check_func(c->resize, "resize_%dbpc", BITDEPTH)) {
738 #if BITDEPTH == 16
739         const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
740 #else
741         const int bitdepth_max = 0xff;
742 #endif
743 
744         for (int i = 0; i < max_src_width * height; i++)
745             src[i] = rnd() & bitdepth_max;
746 
747         const int w_den = 9 + (rnd() & 7);
748         const int src_w = 16 + (rnd() % (max_src_width - 16 + 1));
749         const int dst_w = w_den * src_w >> 3;
750 #define scale_fac(ref_sz, this_sz) \
751     ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
752         const int dx = scale_fac(src_w, dst_w);
753 #undef scale_fac
754         const int mx0 = get_upscale_x0(src_w, dst_w, dx);
755 
756         CLEAR_PIXEL_RECT(c_dst);
757         CLEAR_PIXEL_RECT(a_dst);
758 
759         call_ref(c_dst, c_dst_stride, src, src_stride,
760                  dst_w, height, src_w, dx, mx0 HIGHBD_TAIL_SUFFIX);
761         call_new(a_dst, a_dst_stride, src, src_stride,
762                  dst_w, height, src_w, dx, mx0 HIGHBD_TAIL_SUFFIX);
763         checkasm_check_pixel_padded_align(c_dst, c_dst_stride, a_dst, a_dst_stride,
764                                           dst_w, height, "dst", 16, 1);
765 
766         bench_new(a_dst, a_dst_stride, src, src_stride,
767                   512, height, 512 * 8 / w_den, dx, mx0 HIGHBD_TAIL_SUFFIX);
768     }
769 
770     report("resize");
771 }
772 
bitfn(checkasm_check_mc)773 void bitfn(checkasm_check_mc)(void) {
774     Dav1dMCDSPContext c;
775     bitfn(dav1d_mc_dsp_init)(&c);
776 
777     check_mc(&c);
778     check_mct(&c);
779     check_mc_scaled(&c);
780     check_mct_scaled(&c);
781     check_avg(&c);
782     check_w_avg(&c);
783     check_mask(&c);
784     check_w_mask(&c);
785     check_blend(&c);
786     check_blend_v(&c);
787     check_blend_h(&c);
788     check_warp8x8(&c);
789     check_warp8x8t(&c);
790     check_emuedge(&c);
791     check_resize(&c);
792 }
793