1 /*
2 * Copyright © 2018, VideoLAN and dav1d authors
3 * Copyright © 2018, Two Orioles, LLC
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "tests/checkasm/checkasm.h"
29
30 #include "src/levels.h"
31 #include "src/mc.h"
32
33 static const char *const filter_names[] = {
34 "8tap_regular", "8tap_regular_smooth", "8tap_regular_sharp",
35 "8tap_sharp_regular", "8tap_sharp_smooth", "8tap_sharp",
36 "8tap_smooth_regular", "8tap_smooth", "8tap_smooth_sharp",
37 "bilinear"
38 };
39
40 static const char *const mxy_names[] = { "0", "h", "v", "hv" };
41 static const char *const scaled_paths[] = { "", "_dy1", "_dy2" };
42
mc_h_next(const int h)43 static int mc_h_next(const int h) {
44 switch (h) {
45 case 4:
46 case 8:
47 case 16:
48 return (h * 3) >> 1;
49 case 6:
50 case 12:
51 case 24:
52 return (h & (h - 1)) * 2;
53 default:
54 return h * 2;
55 }
56 }
57
check_mc(Dav1dMCDSPContext * const c)58 static void check_mc(Dav1dMCDSPContext *const c) {
59 ALIGN_STK_64(pixel, src_buf, 135 * 135,);
60 PIXEL_RECT(c_dst, 128, 128);
61 PIXEL_RECT(a_dst, 128, 128);
62 const pixel *src = src_buf + 135 * 3 + 3;
63 const ptrdiff_t src_stride = 135 * sizeof(pixel);
64
65 declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *src,
66 ptrdiff_t src_stride, int w, int h, int mx, int my
67 HIGHBD_DECL_SUFFIX);
68
69 for (int filter = 0; filter < N_2D_FILTERS; filter++)
70 for (int w = 2; w <= 128; w <<= 1) {
71 for (int mxy = 0; mxy < 4; mxy++)
72 if (check_func(c->mc[filter], "mc_%s_w%d_%s_%dbpc",
73 filter_names[filter], w, mxy_names[mxy], BITDEPTH))
74 {
75 const int h_min = w <= 32 ? 2 : w / 4;
76 const int h_max = imax(imin(w * 4, 128), 32);
77 for (int h = h_min; h <= h_max; h = mc_h_next(h)) {
78 const int mx = (mxy & 1) ? rnd() % 15 + 1 : 0;
79 const int my = (mxy & 2) ? rnd() % 15 + 1 : 0;
80 #if BITDEPTH == 16
81 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
82 #else
83 const int bitdepth_max = 0xff;
84 #endif
85
86 for (int i = 0; i < 135 * 135; i++)
87 src_buf[i] = rnd() & bitdepth_max;
88
89 CLEAR_PIXEL_RECT(c_dst);
90 CLEAR_PIXEL_RECT(a_dst);
91
92 call_ref(c_dst, c_dst_stride, src, src_stride, w, h,
93 mx, my HIGHBD_TAIL_SUFFIX);
94 call_new(a_dst, a_dst_stride, src, src_stride, w, h,
95 mx, my HIGHBD_TAIL_SUFFIX);
96 checkasm_check_pixel_padded(c_dst, c_dst_stride,
97 a_dst, a_dst_stride,
98 w, h, "dst");
99
100 if (filter == FILTER_2D_8TAP_REGULAR ||
101 filter == FILTER_2D_8TAP_SHARP ||
102 filter == FILTER_2D_BILINEAR)
103 {
104 bench_new(a_dst, a_dst_stride, src, src_stride, w, h,
105 mx, my HIGHBD_TAIL_SUFFIX);
106 }
107 }
108 }
109 }
110 report("mc");
111 }
112
113 /* Generate worst case input in the topleft corner, randomize the rest */
generate_mct_input(pixel * const buf,const int bitdepth_max)114 static void generate_mct_input(pixel *const buf, const int bitdepth_max) {
115 static const int8_t pattern[8] = { -1, 0, -1, 0, 0, -1, 0, -1 };
116 const int sign = -(rnd() & 1);
117
118 for (int y = 0; y < 135; y++)
119 for (int x = 0; x < 135; x++)
120 buf[135*y+x] = ((x | y) < 8 ? (pattern[x] ^ pattern[y] ^ sign)
121 : rnd()) & bitdepth_max;
122 }
123
check_mct(Dav1dMCDSPContext * const c)124 static void check_mct(Dav1dMCDSPContext *const c) {
125 ALIGN_STK_64(pixel, src_buf, 135 * 135,);
126 ALIGN_STK_64(int16_t, c_tmp, 128 * 128,);
127 ALIGN_STK_64(int16_t, a_tmp, 128 * 128,);
128 const pixel *src = src_buf + 135 * 3 + 3;
129 const ptrdiff_t src_stride = 135 * sizeof(pixel);
130
131 declare_func(void, int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
132 int w, int h, int mx, int my HIGHBD_DECL_SUFFIX);
133
134 for (int filter = 0; filter < N_2D_FILTERS; filter++)
135 for (int w = 4; w <= 128; w <<= 1)
136 for (int mxy = 0; mxy < 4; mxy++)
137 if (check_func(c->mct[filter], "mct_%s_w%d_%s_%dbpc",
138 filter_names[filter], w, mxy_names[mxy], BITDEPTH))
139 for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
140 {
141 const int mx = (mxy & 1) ? rnd() % 15 + 1 : 0;
142 const int my = (mxy & 2) ? rnd() % 15 + 1 : 0;
143 #if BITDEPTH == 16
144 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
145 #else
146 const int bitdepth_max = 0xff;
147 #endif
148 generate_mct_input(src_buf, bitdepth_max);
149
150 call_ref(c_tmp, src, src_stride, w, h,
151 mx, my HIGHBD_TAIL_SUFFIX);
152 call_new(a_tmp, src, src_stride, w, h,
153 mx, my HIGHBD_TAIL_SUFFIX);
154 checkasm_check(int16_t, c_tmp, w * sizeof(*c_tmp),
155 a_tmp, w * sizeof(*a_tmp),
156 w, h, "tmp");
157
158 if (filter == FILTER_2D_8TAP_REGULAR ||
159 filter == FILTER_2D_8TAP_SHARP ||
160 filter == FILTER_2D_BILINEAR)
161 {
162 bench_new(a_tmp, src, src_stride, w, h,
163 mx, my HIGHBD_TAIL_SUFFIX);
164 }
165 }
166 report("mct");
167 }
168
check_mc_scaled(Dav1dMCDSPContext * const c)169 static void check_mc_scaled(Dav1dMCDSPContext *const c) {
170 ALIGN_STK_64(pixel, src_buf, 263 * 263,);
171 PIXEL_RECT(c_dst, 128, 128);
172 PIXEL_RECT(a_dst, 128, 128);
173 const pixel *src = src_buf + 263 * 3 + 3;
174 const ptrdiff_t src_stride = 263 * sizeof(pixel);
175 #if BITDEPTH == 16
176 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
177 #else
178 const int bitdepth_max = 0xff;
179 #endif
180
181 declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *src,
182 ptrdiff_t src_stride, int w, int h,
183 int mx, int my, int dx, int dy HIGHBD_DECL_SUFFIX);
184
185 for (int filter = 0; filter < N_2D_FILTERS; filter++)
186 for (int w = 2; w <= 128; w <<= 1) {
187 for (int p = 0; p < 3; ++p) {
188 if (check_func(c->mc_scaled[filter], "mc_scaled_%s_w%d%s_%dbpc",
189 filter_names[filter], w, scaled_paths[p], BITDEPTH))
190 {
191 const int h_min = w <= 32 ? 2 : w / 4;
192 const int h_max = imax(imin(w * 4, 128), 32);
193 for (int h = h_min; h <= h_max; h = mc_h_next(h)) {
194 const int mx = rnd() % 1024;
195 const int my = rnd() % 1024;
196 const int dx = rnd() % 2048 + 1;
197 const int dy = !p
198 ? rnd() % 2048 + 1
199 : p << 10; // ystep=1.0 and ystep=2.0 paths
200
201 for (int k = 0; k < 263 * 263; k++)
202 src_buf[k] = rnd() & bitdepth_max;
203
204 CLEAR_PIXEL_RECT(c_dst);
205 CLEAR_PIXEL_RECT(a_dst);
206
207 call_ref(c_dst, c_dst_stride, src, src_stride,
208 w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
209 call_new(a_dst, a_dst_stride, src, src_stride,
210 w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
211 checkasm_check_pixel_padded(c_dst, c_dst_stride,
212 a_dst, a_dst_stride,
213 w, h, "dst");
214
215 if (filter == FILTER_2D_8TAP_REGULAR ||
216 filter == FILTER_2D_BILINEAR)
217 bench_new(a_dst, a_dst_stride, src, src_stride,
218 w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
219 }
220 }
221 }
222 }
223 report("mc_scaled");
224 }
225
check_mct_scaled(Dav1dMCDSPContext * const c)226 static void check_mct_scaled(Dav1dMCDSPContext *const c) {
227 ALIGN_STK_64(pixel, src_buf, 263 * 263,);
228 ALIGN_STK_64(int16_t, c_tmp, 128 * 128,);
229 ALIGN_STK_64(int16_t, a_tmp, 128 * 128,);
230 const pixel *src = src_buf + 263 * 3 + 3;
231 const ptrdiff_t src_stride = 263 * sizeof(pixel);
232 #if BITDEPTH == 16
233 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
234 #else
235 const int bitdepth_max = 0xff;
236 #endif
237
238 declare_func(void, int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
239 int w, int h, int mx, int my, int dx, int dy HIGHBD_DECL_SUFFIX);
240
241 for (int filter = 0; filter < N_2D_FILTERS; filter++)
242 for (int w = 4; w <= 128; w <<= 1)
243 for (int p = 0; p < 3; ++p) {
244 if (check_func(c->mct_scaled[filter], "mct_scaled_%s_w%d%s_%dbpc",
245 filter_names[filter], w, scaled_paths[p], BITDEPTH))
246 {
247 const int h_min = imax(w / 4, 4);
248 const int h_max = imin(w * 4, 128);
249 for (int h = h_min; h <= h_max; h = mc_h_next(h)) {
250 const int mx = rnd() % 1024;
251 const int my = rnd() % 1024;
252 const int dx = rnd() % 2048 + 1;
253 const int dy = !p
254 ? rnd() % 2048 + 1
255 : p << 10; // ystep=1.0 and ystep=2.0 paths
256
257 for (int k = 0; k < 263 * 263; k++)
258 src_buf[k] = rnd() & bitdepth_max;
259
260 call_ref(c_tmp, src, src_stride,
261 w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
262 call_new(a_tmp, src, src_stride,
263 w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
264 checkasm_check(int16_t, c_tmp, w * sizeof(*c_tmp),
265 a_tmp, w * sizeof(*a_tmp),
266 w, h, "tmp");
267
268 if (filter == FILTER_2D_8TAP_REGULAR ||
269 filter == FILTER_2D_BILINEAR)
270 bench_new(a_tmp, src, src_stride,
271 w, h, mx, my, dx, dy HIGHBD_TAIL_SUFFIX);
272 }
273 }
274 }
275 report("mct_scaled");
276 }
277
init_tmp(Dav1dMCDSPContext * const c,pixel * const buf,int16_t (* const tmp)[128* 128],const int bitdepth_max)278 static void init_tmp(Dav1dMCDSPContext *const c, pixel *const buf,
279 int16_t (*const tmp)[128 * 128], const int bitdepth_max)
280 {
281 for (int i = 0; i < 2; i++) {
282 generate_mct_input(buf, bitdepth_max);
283 c->mct[FILTER_2D_8TAP_SHARP](tmp[i], buf + 135 * 3 + 3,
284 135 * sizeof(pixel), 128, 128,
285 8, 8 HIGHBD_TAIL_SUFFIX);
286 }
287 }
288
check_avg(Dav1dMCDSPContext * const c)289 static void check_avg(Dav1dMCDSPContext *const c) {
290 ALIGN_STK_64(int16_t, tmp, 2, [128 * 128]);
291 PIXEL_RECT(c_dst, 135, 135);
292 PIXEL_RECT(a_dst, 128, 128);
293
294 declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
295 const int16_t *tmp2, int w, int h HIGHBD_DECL_SUFFIX);
296
297 for (int w = 4; w <= 128; w <<= 1)
298 if (check_func(c->avg, "avg_w%d_%dbpc", w, BITDEPTH)) {
299 for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
300 {
301 #if BITDEPTH == 16
302 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
303 #else
304 const int bitdepth_max = 0xff;
305 #endif
306
307 init_tmp(c, c_dst, tmp, bitdepth_max);
308
309 CLEAR_PIXEL_RECT(c_dst);
310 CLEAR_PIXEL_RECT(a_dst);
311
312 call_ref(c_dst, c_dst_stride, tmp[0], tmp[1], w, h HIGHBD_TAIL_SUFFIX);
313 call_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h HIGHBD_TAIL_SUFFIX);
314 checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
315 w, h, "dst");
316
317 bench_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h HIGHBD_TAIL_SUFFIX);
318 }
319 }
320 report("avg");
321 }
322
check_w_avg(Dav1dMCDSPContext * const c)323 static void check_w_avg(Dav1dMCDSPContext *const c) {
324 ALIGN_STK_64(int16_t, tmp, 2, [128 * 128]);
325 PIXEL_RECT(c_dst, 135, 135);
326 PIXEL_RECT(a_dst, 128, 128);
327
328 declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
329 const int16_t *tmp2, int w, int h, int weight HIGHBD_DECL_SUFFIX);
330
331 for (int w = 4; w <= 128; w <<= 1)
332 if (check_func(c->w_avg, "w_avg_w%d_%dbpc", w, BITDEPTH)) {
333 for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
334 {
335 int weight = rnd() % 15 + 1;
336 #if BITDEPTH == 16
337 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
338 #else
339 const int bitdepth_max = 0xff;
340 #endif
341 init_tmp(c, c_dst, tmp, bitdepth_max);
342
343 CLEAR_PIXEL_RECT(c_dst);
344 CLEAR_PIXEL_RECT(a_dst);
345
346 call_ref(c_dst, c_dst_stride, tmp[0], tmp[1], w, h, weight HIGHBD_TAIL_SUFFIX);
347 call_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h, weight HIGHBD_TAIL_SUFFIX);
348 checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
349 w, h, "dst");
350
351 bench_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h, weight HIGHBD_TAIL_SUFFIX);
352 }
353 }
354 report("w_avg");
355 }
356
check_mask(Dav1dMCDSPContext * const c)357 static void check_mask(Dav1dMCDSPContext *const c) {
358 ALIGN_STK_64(int16_t, tmp, 2, [128 * 128]);
359 PIXEL_RECT(c_dst, 135, 135);
360 PIXEL_RECT(a_dst, 128, 128);
361 ALIGN_STK_64(uint8_t, mask, 128 * 128,);
362
363 for (int i = 0; i < 128 * 128; i++)
364 mask[i] = rnd() % 65;
365
366 declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
367 const int16_t *tmp2, int w, int h, const uint8_t *mask
368 HIGHBD_DECL_SUFFIX);
369
370 for (int w = 4; w <= 128; w <<= 1)
371 if (check_func(c->mask, "mask_w%d_%dbpc", w, BITDEPTH)) {
372 for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
373 {
374 #if BITDEPTH == 16
375 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
376 #else
377 const int bitdepth_max = 0xff;
378 #endif
379 init_tmp(c, c_dst, tmp, bitdepth_max);
380
381 CLEAR_PIXEL_RECT(c_dst);
382 CLEAR_PIXEL_RECT(a_dst);
383
384 call_ref(c_dst, c_dst_stride, tmp[0], tmp[1], w, h, mask HIGHBD_TAIL_SUFFIX);
385 call_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h, mask HIGHBD_TAIL_SUFFIX);
386 checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
387 w, h, "dst");
388
389 bench_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h, mask HIGHBD_TAIL_SUFFIX);
390 }
391 }
392 report("mask");
393 }
394
check_w_mask(Dav1dMCDSPContext * const c)395 static void check_w_mask(Dav1dMCDSPContext *const c) {
396 ALIGN_STK_64(int16_t, tmp, 2, [128 * 128]);
397 PIXEL_RECT(c_dst, 135, 135);
398 PIXEL_RECT(a_dst, 128, 128);
399 ALIGN_STK_64(uint8_t, c_mask, 128 * 128,);
400 ALIGN_STK_64(uint8_t, a_mask, 128 * 128,);
401
402 declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,
403 const int16_t *tmp2, int w, int h, uint8_t *mask, int sign
404 HIGHBD_DECL_SUFFIX);
405
406 static const uint16_t ss[] = { 444, 422, 420 };
407 static const uint8_t ss_hor[] = { 0, 1, 1 };
408 static const uint8_t ss_ver[] = { 0, 0, 1 };
409
410 for (int i = 0; i < 3; i++)
411 for (int w = 4; w <= 128; w <<= 1)
412 if (check_func(c->w_mask[i], "w_mask_%d_w%d_%dbpc", ss[i], w,
413 BITDEPTH))
414 {
415 for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1)
416 {
417 int sign = rnd() & 1;
418 #if BITDEPTH == 16
419 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
420 #else
421 const int bitdepth_max = 0xff;
422 #endif
423 init_tmp(c, c_dst, tmp, bitdepth_max);
424
425 CLEAR_PIXEL_RECT(c_dst);
426 CLEAR_PIXEL_RECT(a_dst);
427
428 call_ref(c_dst, c_dst_stride, tmp[0], tmp[1], w, h,
429 c_mask, sign HIGHBD_TAIL_SUFFIX);
430 call_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h,
431 a_mask, sign HIGHBD_TAIL_SUFFIX);
432 checkasm_check_pixel_padded(c_dst, c_dst_stride,
433 a_dst, a_dst_stride,
434 w, h, "dst");
435 checkasm_check(uint8_t, c_mask, w >> ss_hor[i],
436 a_mask, w >> ss_hor[i],
437 w >> ss_hor[i], h >> ss_ver[i],
438 "mask");
439
440 bench_new(a_dst, a_dst_stride, tmp[0], tmp[1], w, h,
441 a_mask, sign HIGHBD_TAIL_SUFFIX);
442 }
443 }
444 report("w_mask");
445 }
446
check_blend(Dav1dMCDSPContext * const c)447 static void check_blend(Dav1dMCDSPContext *const c) {
448 ALIGN_STK_64(pixel, tmp, 32 * 32,);
449 PIXEL_RECT(c_dst, 32, 32);
450 PIXEL_RECT(a_dst, 32, 32);
451 ALIGN_STK_64(uint8_t, mask, 32 * 32,);
452
453 declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *tmp,
454 int w, int h, const uint8_t *mask);
455
456 for (int w = 4; w <= 32; w <<= 1) {
457 if (check_func(c->blend, "blend_w%d_%dbpc", w, BITDEPTH))
458 for (int h = imax(w / 2, 4); h <= imin(w * 2, 32); h <<= 1) {
459 #if BITDEPTH == 16
460 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
461 #else
462 const int bitdepth_max = 0xff;
463 #endif
464 for (int i = 0; i < 32 * 32; i++) {
465 tmp[i] = rnd() & bitdepth_max;
466 mask[i] = rnd() % 65;
467 }
468
469 CLEAR_PIXEL_RECT(c_dst);
470 CLEAR_PIXEL_RECT(a_dst);
471
472 for (int y = 0; y < h; y++)
473 for (int x = 0; x < w; x++)
474 c_dst[y*PXSTRIDE(c_dst_stride) + x] =
475 a_dst[y*PXSTRIDE(a_dst_stride) + x] = rnd() & bitdepth_max;
476
477 call_ref(c_dst, c_dst_stride, tmp, w, h, mask);
478 call_new(a_dst, a_dst_stride, tmp, w, h, mask);
479 checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
480 w, h, "dst");
481
482 bench_new(alternate(c_dst, a_dst), a_dst_stride, tmp, w, h, mask);
483 }
484 }
485 report("blend");
486 }
487
check_blend_v(Dav1dMCDSPContext * const c)488 static void check_blend_v(Dav1dMCDSPContext *const c) {
489 ALIGN_STK_64(pixel, tmp, 32 * 128,);
490 PIXEL_RECT(c_dst, 32, 128);
491 PIXEL_RECT(a_dst, 32, 128);
492
493 declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *tmp,
494 int w, int h);
495
496 for (int w = 2; w <= 32; w <<= 1) {
497 if (check_func(c->blend_v, "blend_v_w%d_%dbpc", w, BITDEPTH))
498 for (int h = 2; h <= (w == 2 ? 64 : 128); h <<= 1) {
499 #if BITDEPTH == 16
500 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
501 #else
502 const int bitdepth_max = 0xff;
503 #endif
504
505 CLEAR_PIXEL_RECT(c_dst);
506 CLEAR_PIXEL_RECT(a_dst);
507
508 for (int y = 0; y < h; y++)
509 for (int x = 0; x < w; x++)
510 c_dst[y*PXSTRIDE(c_dst_stride) + x] =
511 a_dst[y*PXSTRIDE(a_dst_stride) + x] = rnd() & bitdepth_max;
512
513 for (int i = 0; i < 32 * 128; i++)
514 tmp[i] = rnd() & bitdepth_max;
515
516 call_ref(c_dst, c_dst_stride, tmp, w, h);
517 call_new(a_dst, a_dst_stride, tmp, w, h);
518 checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
519 w, h, "dst");
520
521 bench_new(alternate(c_dst, a_dst), a_dst_stride, tmp, w, h);
522 }
523 }
524 report("blend_v");
525 }
526
check_blend_h(Dav1dMCDSPContext * const c)527 static void check_blend_h(Dav1dMCDSPContext *const c) {
528 ALIGN_STK_64(pixel, tmp, 128 * 32,);
529 PIXEL_RECT(c_dst, 128, 32);
530 PIXEL_RECT(a_dst, 128, 32);
531
532 declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *tmp,
533 int w, int h);
534
535 for (int w = 2; w <= 128; w <<= 1) {
536 if (check_func(c->blend_h, "blend_h_w%d_%dbpc", w, BITDEPTH))
537 for (int h = (w == 128 ? 4 : 2); h <= 32; h <<= 1) {
538 #if BITDEPTH == 16
539 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
540 #else
541 const int bitdepth_max = 0xff;
542 #endif
543 CLEAR_PIXEL_RECT(c_dst);
544 CLEAR_PIXEL_RECT(a_dst);
545
546 for (int y = 0; y < h; y++)
547 for (int x = 0; x < w; x++)
548 c_dst[y*PXSTRIDE(c_dst_stride) + x] =
549 a_dst[y*PXSTRIDE(a_dst_stride) + x] = rnd() & bitdepth_max;
550
551 for (int i = 0; i < 128 * 32; i++)
552 tmp[i] = rnd() & bitdepth_max;
553
554 call_ref(c_dst, c_dst_stride, tmp, w, h);
555 call_new(a_dst, a_dst_stride, tmp, w, h);
556 checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
557 w, h, "dst");
558
559 bench_new(alternate(c_dst, a_dst), a_dst_stride, tmp, w, h);
560 }
561 }
562 report("blend_h");
563 }
564
check_warp8x8(Dav1dMCDSPContext * const c)565 static void check_warp8x8(Dav1dMCDSPContext *const c) {
566 ALIGN_STK_64(pixel, src_buf, 15 * 15,);
567 PIXEL_RECT(c_dst, 8, 8);
568 PIXEL_RECT(a_dst, 8, 8);
569 int16_t abcd[4];
570 const pixel *src = src_buf + 15 * 3 + 3;
571 const ptrdiff_t src_stride = 15 * sizeof(pixel);
572
573 declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel *src,
574 ptrdiff_t src_stride, const int16_t *abcd, int mx, int my
575 HIGHBD_DECL_SUFFIX);
576
577 if (check_func(c->warp8x8, "warp_8x8_%dbpc", BITDEPTH)) {
578 const int mx = (rnd() & 0x1fff) - 0xa00;
579 const int my = (rnd() & 0x1fff) - 0xa00;
580 #if BITDEPTH == 16
581 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
582 #else
583 const int bitdepth_max = 0xff;
584 #endif
585
586 for (int i = 0; i < 4; i++)
587 abcd[i] = (rnd() & 0x1fff) - 0xa00;
588
589 for (int i = 0; i < 15 * 15; i++)
590 src_buf[i] = rnd() & bitdepth_max;
591
592 CLEAR_PIXEL_RECT(c_dst);
593 CLEAR_PIXEL_RECT(a_dst);
594
595 call_ref(c_dst, c_dst_stride, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
596 call_new(a_dst, a_dst_stride, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
597 checkasm_check_pixel_padded(c_dst, c_dst_stride, a_dst, a_dst_stride,
598 8, 8, "dst");
599
600 bench_new(a_dst, a_dst_stride, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
601 }
602 report("warp8x8");
603 }
604
check_warp8x8t(Dav1dMCDSPContext * const c)605 static void check_warp8x8t(Dav1dMCDSPContext *const c) {
606 ALIGN_STK_64(pixel, src_buf, 15 * 15,);
607 ALIGN_STK_64(int16_t, c_tmp, 8 * 8,);
608 ALIGN_STK_64(int16_t, a_tmp, 8 * 8,);
609 int16_t abcd[4];
610 const pixel *src = src_buf + 15 * 3 + 3;
611 const ptrdiff_t src_stride = 15 * sizeof(pixel);
612
613 declare_func(void, int16_t *tmp, ptrdiff_t tmp_stride, const pixel *src,
614 ptrdiff_t src_stride, const int16_t *abcd, int mx, int my
615 HIGHBD_DECL_SUFFIX);
616
617 if (check_func(c->warp8x8t, "warp_8x8t_%dbpc", BITDEPTH)) {
618 const int mx = (rnd() & 0x1fff) - 0xa00;
619 const int my = (rnd() & 0x1fff) - 0xa00;
620 #if BITDEPTH == 16
621 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
622 #else
623 const int bitdepth_max = 0xff;
624 #endif
625
626 for (int i = 0; i < 4; i++)
627 abcd[i] = (rnd() & 0x1fff) - 0xa00;
628
629 for (int i = 0; i < 15 * 15; i++)
630 src_buf[i] = rnd() & bitdepth_max;
631
632 call_ref(c_tmp, 8, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
633 call_new(a_tmp, 8, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
634 checkasm_check(int16_t, c_tmp, 8 * sizeof(*c_tmp),
635 a_tmp, 8 * sizeof(*a_tmp),
636 8, 8, "tmp");
637
638 bench_new(a_tmp, 8, src, src_stride, abcd, mx, my HIGHBD_TAIL_SUFFIX);
639 }
640 report("warp8x8t");
641 }
642
643 enum EdgeFlags {
644 HAVE_TOP = 1,
645 HAVE_BOTTOM = 2,
646 HAVE_LEFT = 4,
647 HAVE_RIGHT = 8,
648 };
649
random_offset_for_edge(int * const x,int * const y,const int bw,const int bh,int * const iw,int * const ih,const enum EdgeFlags edge)650 static void random_offset_for_edge(int *const x, int *const y,
651 const int bw, const int bh,
652 int *const iw, int *const ih,
653 const enum EdgeFlags edge)
654 {
655 #define set_off(edge1, edge2, pos, dim) \
656 *i##dim = edge & (HAVE_##edge1 | HAVE_##edge2) ? 160 : 1 + (rnd() % (b##dim - 2)); \
657 switch (edge & (HAVE_##edge1 | HAVE_##edge2)) { \
658 case HAVE_##edge1 | HAVE_##edge2: \
659 assert(b##dim <= *i##dim); \
660 *pos = rnd() % (*i##dim - b##dim + 1); \
661 break; \
662 case HAVE_##edge1: \
663 *pos = (*i##dim - b##dim) + 1 + (rnd() % (b##dim - 1)); \
664 break; \
665 case HAVE_##edge2: \
666 *pos = -(1 + (rnd() % (b##dim - 1))); \
667 break; \
668 case 0: \
669 assert(b##dim - 1 > *i##dim); \
670 *pos = -(1 + (rnd() % (b##dim - *i##dim - 1))); \
671 break; \
672 }
673 set_off(LEFT, RIGHT, x, w);
674 set_off(TOP, BOTTOM, y, h);
675 }
676
check_emuedge(Dav1dMCDSPContext * const c)677 static void check_emuedge(Dav1dMCDSPContext *const c) {
678 ALIGN_STK_64(pixel, c_dst, 135 * 192,);
679 ALIGN_STK_64(pixel, a_dst, 135 * 192,);
680 ALIGN_STK_64(pixel, src, 160 * 160,);
681
682 for (int i = 0; i < 160 * 160; i++)
683 src[i] = rnd() & ((1U << BITDEPTH) - 1);
684
685 declare_func(void, intptr_t bw, intptr_t bh, intptr_t iw, intptr_t ih,
686 intptr_t x, intptr_t y,
687 pixel *dst, ptrdiff_t dst_stride,
688 const pixel *src, ptrdiff_t src_stride);
689
690 int x, y, iw, ih;
691 for (int w = 4; w <= 128; w <<= 1)
692 if (check_func(c->emu_edge, "emu_edge_w%d_%dbpc", w, BITDEPTH)) {
693 for (int h = imax(w / 4, 4); h <= imin(w * 4, 128); h <<= 1) {
694 // we skip 0xf, since it implies that we don't need emu_edge
695 for (enum EdgeFlags edge = 0; edge < 0xf; edge++) {
696 const int bw = w + (rnd() & 7);
697 const int bh = h + (rnd() & 7);
698 random_offset_for_edge(&x, &y, bw, bh, &iw, &ih, edge);
699 call_ref(bw, bh, iw, ih, x, y,
700 c_dst, 192 * sizeof(pixel), src, 160 * sizeof(pixel));
701 call_new(bw, bh, iw, ih, x, y,
702 a_dst, 192 * sizeof(pixel), src, 160 * sizeof(pixel));
703 checkasm_check_pixel(c_dst, 192 * sizeof(pixel),
704 a_dst, 192 * sizeof(pixel),
705 bw, bh, "dst");
706 }
707 }
708 for (enum EdgeFlags edge = 1; edge < 0xf; edge <<= 1) {
709 random_offset_for_edge(&x, &y, w + 7, w + 7, &iw, &ih, edge);
710 bench_new(w + 7, w + 7, iw, ih, x, y,
711 a_dst, 192 * sizeof(pixel), src, 160 * sizeof(pixel));
712 }
713 }
714 report("emu_edge");
715 }
716
get_upscale_x0(const int in_w,const int out_w,const int step)717 static int get_upscale_x0(const int in_w, const int out_w, const int step) {
718 const int err = out_w * step - (in_w << 14);
719 const int x0 = (-((out_w - in_w) << 13) + (out_w >> 1)) / out_w + 128 - (err >> 1);
720 return x0 & 0x3fff;
721 }
722
check_resize(Dav1dMCDSPContext * const c)723 static void check_resize(Dav1dMCDSPContext *const c) {
724 PIXEL_RECT(c_dst, 1024, 64);
725 PIXEL_RECT(a_dst, 1024, 64);
726 ALIGN_STK_64(pixel, src, 512 * 64,);
727
728 const int height = 64;
729 const int max_src_width = 512;
730 const ptrdiff_t src_stride = 512 * sizeof(pixel);
731
732 declare_func(void, pixel *dst, ptrdiff_t dst_stride,
733 const pixel *src, ptrdiff_t src_stride,
734 int dst_w, int src_w, int h, int dx, int mx0
735 HIGHBD_DECL_SUFFIX);
736
737 if (check_func(c->resize, "resize_%dbpc", BITDEPTH)) {
738 #if BITDEPTH == 16
739 const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
740 #else
741 const int bitdepth_max = 0xff;
742 #endif
743
744 for (int i = 0; i < max_src_width * height; i++)
745 src[i] = rnd() & bitdepth_max;
746
747 const int w_den = 9 + (rnd() & 7);
748 const int src_w = 16 + (rnd() % (max_src_width - 16 + 1));
749 const int dst_w = w_den * src_w >> 3;
750 #define scale_fac(ref_sz, this_sz) \
751 ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
752 const int dx = scale_fac(src_w, dst_w);
753 #undef scale_fac
754 const int mx0 = get_upscale_x0(src_w, dst_w, dx);
755
756 CLEAR_PIXEL_RECT(c_dst);
757 CLEAR_PIXEL_RECT(a_dst);
758
759 call_ref(c_dst, c_dst_stride, src, src_stride,
760 dst_w, height, src_w, dx, mx0 HIGHBD_TAIL_SUFFIX);
761 call_new(a_dst, a_dst_stride, src, src_stride,
762 dst_w, height, src_w, dx, mx0 HIGHBD_TAIL_SUFFIX);
763 checkasm_check_pixel_padded_align(c_dst, c_dst_stride, a_dst, a_dst_stride,
764 dst_w, height, "dst", 16, 1);
765
766 bench_new(a_dst, a_dst_stride, src, src_stride,
767 512, height, 512 * 8 / w_den, dx, mx0 HIGHBD_TAIL_SUFFIX);
768 }
769
770 report("resize");
771 }
772
bitfn(checkasm_check_mc)773 void bitfn(checkasm_check_mc)(void) {
774 Dav1dMCDSPContext c;
775 bitfn(dav1d_mc_dsp_init)(&c);
776
777 check_mc(&c);
778 check_mct(&c);
779 check_mc_scaled(&c);
780 check_mct_scaled(&c);
781 check_avg(&c);
782 check_w_avg(&c);
783 check_mask(&c);
784 check_w_mask(&c);
785 check_blend(&c);
786 check_blend_v(&c);
787 check_blend_h(&c);
788 check_warp8x8(&c);
789 check_warp8x8t(&c);
790 check_emuedge(&c);
791 check_resize(&c);
792 }
793