xref: /aosp_15_r20/external/libdav1d/tests/checkasm/checkasm.c (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1 /*
2  * Copyright © 2018, VideoLAN and dav1d authors
3  * Copyright © 2018, Two Orioles, LLC
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 #include "tests/checkasm/checkasm.h"
28 
29 #include <errno.h>
30 #include <math.h>
31 #include <signal.h>
32 #include <stdarg.h>
33 #include <stdio.h>
34 #include <string.h>
35 
36 #include "src/cpu.h"
37 
38 #ifdef _WIN32
39 #ifndef SIGBUS
40 /* non-standard, use the same value as mingw-w64 */
41 #define SIGBUS 10
42 #endif
43 #ifndef ENABLE_VIRTUAL_TERMINAL_PROCESSING
44 #define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x04
45 #endif
46 #else
47 #include <time.h>
48 #if HAVE_UNISTD_H
49 #include <unistd.h>
50 #endif
51 #if HAVE_PTHREAD_SETAFFINITY_NP
52 #include <pthread.h>
53 #if HAVE_PTHREAD_NP_H
54 #include <pthread_np.h>
55 #endif
56 #endif
57 #ifdef __APPLE__
58 #include <mach/mach_time.h>
59 #endif
60 #endif
61 #if CONFIG_MACOS_KPERF
62 #include <dlfcn.h>
63 #endif
64 
65 #define COLOR_RED    31
66 #define COLOR_GREEN  32
67 #define COLOR_YELLOW 33
68 
69 /* List of tests to invoke */
70 static const struct {
71     const char *name;
72     void (*func)(void);
73 } tests[] = {
74     { "msac", checkasm_check_msac },
75     { "pal", checkasm_check_pal },
76     { "refmvs", checkasm_check_refmvs },
77 #if CONFIG_8BPC
78     { "cdef_8bpc", checkasm_check_cdef_8bpc },
79     { "filmgrain_8bpc", checkasm_check_filmgrain_8bpc },
80     { "ipred_8bpc", checkasm_check_ipred_8bpc },
81     { "itx_8bpc", checkasm_check_itx_8bpc },
82     { "loopfilter_8bpc", checkasm_check_loopfilter_8bpc },
83     { "looprestoration_8bpc", checkasm_check_looprestoration_8bpc },
84     { "mc_8bpc", checkasm_check_mc_8bpc },
85 #endif
86 #if CONFIG_16BPC
87     { "cdef_16bpc", checkasm_check_cdef_16bpc },
88     { "filmgrain_16bpc", checkasm_check_filmgrain_16bpc },
89     { "ipred_16bpc", checkasm_check_ipred_16bpc },
90     { "itx_16bpc", checkasm_check_itx_16bpc },
91     { "loopfilter_16bpc", checkasm_check_loopfilter_16bpc },
92     { "looprestoration_16bpc", checkasm_check_looprestoration_16bpc },
93     { "mc_16bpc", checkasm_check_mc_16bpc },
94 #endif
95     { 0 }
96 };
97 
98 /* List of cpu flags to check */
99 static const struct {
100     const char *name;
101     const char *suffix;
102     unsigned flag;
103 } cpus[] = {
104 #if ARCH_X86
105     { "SSE2",               "sse2",      DAV1D_X86_CPU_FLAG_SSE2 },
106     { "SSSE3",              "ssse3",     DAV1D_X86_CPU_FLAG_SSSE3 },
107     { "SSE4.1",             "sse4",      DAV1D_X86_CPU_FLAG_SSE41 },
108     { "AVX2",               "avx2",      DAV1D_X86_CPU_FLAG_AVX2 },
109     { "AVX-512 (Ice Lake)", "avx512icl", DAV1D_X86_CPU_FLAG_AVX512ICL },
110 #elif ARCH_AARCH64 || ARCH_ARM
111     { "NEON",               "neon",      DAV1D_ARM_CPU_FLAG_NEON },
112     { "DOTPROD",            "dotprod",   DAV1D_ARM_CPU_FLAG_DOTPROD },
113     { "I8MM",               "i8mm",      DAV1D_ARM_CPU_FLAG_I8MM },
114 #if ARCH_AARCH64
115     { "SVE",                "sve",       DAV1D_ARM_CPU_FLAG_SVE },
116     { "SVE2",               "sve2",      DAV1D_ARM_CPU_FLAG_SVE2 },
117 #endif /* ARCH_AARCH64 */
118 #elif ARCH_LOONGARCH
119     { "LSX",                "lsx",       DAV1D_LOONGARCH_CPU_FLAG_LSX },
120     { "LASX",               "lasx",      DAV1D_LOONGARCH_CPU_FLAG_LASX },
121 #elif ARCH_PPC64LE
122     { "VSX",                "vsx",       DAV1D_PPC_CPU_FLAG_VSX },
123     { "PWR9",               "pwr9",      DAV1D_PPC_CPU_FLAG_PWR9 },
124 #elif ARCH_RISCV
125     { "RVV",                "rvv",       DAV1D_RISCV_CPU_FLAG_V },
126 #endif
127     { 0 }
128 };
129 
130 #if ARCH_AARCH64 && HAVE_SVE
131 int checkasm_sve_length(void);
132 #endif
133 
134 typedef struct CheckasmFuncVersion {
135     struct CheckasmFuncVersion *next;
136     void *func;
137     int ok;
138     unsigned cpu;
139     int iterations;
140     uint64_t cycles;
141 } CheckasmFuncVersion;
142 
143 /* Binary search tree node */
144 typedef struct CheckasmFunc {
145     struct CheckasmFunc *child[2];
146     CheckasmFuncVersion versions;
147     uint8_t color; /* 0 = red, 1 = black */
148     char name[];
149 } CheckasmFunc;
150 
151 typedef enum {
152     RUN_NORMAL = 0,
153     RUN_BENCHMARK,
154     RUN_CPUFLAG_LISTING,
155     RUN_FUNCTION_LISTING,
156 } CheckasmRunMode;
157 
158 /* Internal state */
159 static struct {
160     CheckasmFunc *funcs;
161     CheckasmFunc *current_func;
162     CheckasmFuncVersion *current_func_ver;
163     const char *current_test_name;
164     int num_checked;
165     int num_failed;
166     double nop_time;
167     unsigned cpu_flag;
168     const char *cpu_flag_name;
169     const char *test_pattern;
170     const char *function_pattern;
171     unsigned seed;
172     CheckasmRunMode run_mode;
173     int verbose;
174     volatile sig_atomic_t sig; // SIG_ATOMIC_MAX = signal handling enabled
175     int suffix_length;
176     int max_function_name_length;
177 #if ARCH_X86_64
178     void (*simd_warmup)(void);
179 #endif
180 } state;
181 
182 /* float compare support code */
183 typedef union {
184     float f;
185     uint32_t i;
186 } intfloat;
187 
188 static uint32_t xs_state[4];
189 
xor128_srand(unsigned seed)190 static void xor128_srand(unsigned seed) {
191     xs_state[0] = seed;
192     xs_state[1] = ( seed & 0xffff0000) | (~seed & 0x0000ffff);
193     xs_state[2] = (~seed & 0xffff0000) | ( seed & 0x0000ffff);
194     xs_state[3] = ~seed;
195 }
196 
197 // xor128 from Marsaglia, George (July 2003). "Xorshift RNGs".
198 //             Journal of Statistical Software. 8 (14).
199 //             doi:10.18637/jss.v008.i14.
xor128_rand(void)200 int xor128_rand(void) {
201     const uint32_t x = xs_state[0];
202     const uint32_t t = x ^ (x << 11);
203 
204     xs_state[0] = xs_state[1];
205     xs_state[1] = xs_state[2];
206     xs_state[2] = xs_state[3];
207     uint32_t w = xs_state[3];
208 
209     w = (w ^ (w >> 19)) ^ (t ^ (t >> 8));
210     xs_state[3] = w;
211 
212     return w >> 1;
213 }
214 
215 #if CONFIG_MACOS_KPERF
216 
217 static int (*kpc_get_thread_counters)(int, unsigned int, void *);
218 
219 #define CFGWORD_EL0A64EN_MASK (0x20000)
220 
221 #define CPMU_CORE_CYCLE 0x02
222 
223 #define KPC_CLASS_FIXED_MASK        (1 << 0)
224 #define KPC_CLASS_CONFIGURABLE_MASK (1 << 1)
225 
226 #define COUNTERS_COUNT 10
227 #define CONFIG_COUNT 8
228 #define KPC_MASK (KPC_CLASS_CONFIGURABLE_MASK | KPC_CLASS_FIXED_MASK)
229 
kperf_init(void)230 static int kperf_init(void) {
231     uint64_t config[COUNTERS_COUNT] = { 0 };
232 
233     void *kperf = dlopen("/System/Library/PrivateFrameworks/kperf.framework/kperf", RTLD_LAZY);
234     if (!kperf) {
235         fprintf(stderr, "checkasm: Unable to load kperf: %s\n", dlerror());
236         return 1;
237     }
238 
239     int (*kpc_force_all_ctrs_set)(int) = dlsym(kperf, "kpc_force_all_ctrs_set");
240     int (*kpc_set_counting)(uint32_t) = dlsym(kperf, "kpc_set_counting");
241     int (*kpc_set_thread_counting)(uint32_t) = dlsym(kperf, "kpc_set_thread_counting");
242     int (*kpc_set_config)(uint32_t, void *) = dlsym(kperf, "kpc_set_config");
243     uint32_t (*kpc_get_counter_count)(uint32_t) = dlsym(kperf, "kpc_get_counter_count");
244     uint32_t (*kpc_get_config_count)(uint32_t) = dlsym(kperf, "kpc_get_config_count");
245     kpc_get_thread_counters = dlsym(kperf, "kpc_get_thread_counters");
246 
247     if (!kpc_get_thread_counters) {
248         fprintf(stderr, "checkasm: Unable to load kpc_get_thread_counters\n");
249         return 1;
250     }
251 
252     if (!kpc_get_counter_count || kpc_get_counter_count(KPC_MASK) != COUNTERS_COUNT) {
253         fprintf(stderr, "checkasm: Unxpected kpc_get_counter_count\n");
254         return 1;
255     }
256     if (!kpc_get_config_count || kpc_get_config_count(KPC_MASK) != CONFIG_COUNT) {
257         fprintf(stderr, "checkasm: Unxpected kpc_get_config_count\n");
258         return 1;
259     }
260 
261     config[0] = CPMU_CORE_CYCLE | CFGWORD_EL0A64EN_MASK;
262 
263     if (!kpc_set_config || kpc_set_config(KPC_MASK, config)) {
264         fprintf(stderr, "checkasm: The kperf API needs to be run as root\n");
265         return 1;
266     }
267     if (!kpc_force_all_ctrs_set || kpc_force_all_ctrs_set(1)) {
268         fprintf(stderr, "checkasm: kpc_force_all_ctrs_set failed\n");
269         return 1;
270     }
271     if (!kpc_set_counting || kpc_set_counting(KPC_MASK)) {
272         fprintf(stderr, "checkasm: kpc_set_counting failed\n");
273         return 1;
274     }
275     if (!kpc_set_counting || kpc_set_thread_counting(KPC_MASK)) {
276         fprintf(stderr, "checkasm: kpc_set_thread_counting failed\n");
277         return 1;
278     }
279     return 0;
280 }
281 
checkasm_kperf_cycles(void)282 uint64_t checkasm_kperf_cycles(void) {
283     uint64_t counters[COUNTERS_COUNT];
284     if (kpc_get_thread_counters(0, COUNTERS_COUNT, counters))
285         return -1;
286 
287     return counters[0];
288 }
289 #endif
290 
is_negative(const intfloat u)291 static int is_negative(const intfloat u) {
292     return u.i >> 31;
293 }
294 
float_near_ulp(const float a,const float b,const unsigned max_ulp)295 int float_near_ulp(const float a, const float b, const unsigned max_ulp) {
296     intfloat x, y;
297 
298     x.f = a;
299     y.f = b;
300 
301     if (is_negative(x) != is_negative(y)) {
302         // handle -0.0 == +0.0
303         return a == b;
304     }
305 
306     if (llabs((int64_t)x.i - y.i) <= max_ulp)
307         return 1;
308 
309     return 0;
310 }
311 
float_near_ulp_array(const float * const a,const float * const b,const unsigned max_ulp,const int len)312 int float_near_ulp_array(const float *const a, const float *const b,
313                          const unsigned max_ulp, const int len)
314 {
315     for (int i = 0; i < len; i++)
316         if (!float_near_ulp(a[i], b[i], max_ulp))
317             return 0;
318 
319     return 1;
320 }
321 
float_near_abs_eps(const float a,const float b,const float eps)322 int float_near_abs_eps(const float a, const float b, const float eps) {
323     return fabsf(a - b) < eps;
324 }
325 
float_near_abs_eps_array(const float * const a,const float * const b,const float eps,const int len)326 int float_near_abs_eps_array(const float *const a, const float *const b,
327                              const float eps, const int len)
328 {
329     for (int i = 0; i < len; i++)
330         if (!float_near_abs_eps(a[i], b[i], eps))
331             return 0;
332 
333     return 1;
334 }
335 
float_near_abs_eps_ulp(const float a,const float b,const float eps,const unsigned max_ulp)336 int float_near_abs_eps_ulp(const float a, const float b, const float eps,
337                            const unsigned max_ulp)
338 {
339     return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps);
340 }
341 
float_near_abs_eps_array_ulp(const float * const a,const float * const b,const float eps,const unsigned max_ulp,const int len)342 int float_near_abs_eps_array_ulp(const float *const a, const float *const b,
343                                  const float eps, const unsigned max_ulp,
344                                  const int len)
345 {
346     for (int i = 0; i < len; i++)
347         if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp))
348             return 0;
349 
350     return 1;
351 }
352 
353 /* Print colored text to stderr if the terminal supports it */
354 static int use_printf_color;
color_fprintf(FILE * const f,const int color,const char * const fmt,...)355 static void color_fprintf(FILE *const f, const int color, const char *const fmt, ...) {
356     va_list arg;
357 
358     if (use_printf_color)
359         fprintf(f, "\x1b[0;%dm", color);
360 
361     va_start(arg, fmt);
362     vfprintf(f, fmt, arg);
363     va_end(arg);
364 
365     if (use_printf_color)
366         fprintf(f, "\x1b[0m");
367 }
368 
369 /* Deallocate a tree */
destroy_func_tree(CheckasmFunc * const f)370 static void destroy_func_tree(CheckasmFunc *const f) {
371     if (f) {
372         CheckasmFuncVersion *v = f->versions.next;
373         while (v) {
374             CheckasmFuncVersion *next = v->next;
375             free(v);
376             v = next;
377         }
378 
379         destroy_func_tree(f->child[0]);
380         destroy_func_tree(f->child[1]);
381         free(f);
382     }
383 }
384 
385 /* Allocate a zero-initialized block, clean up and exit on failure */
checkasm_malloc(const size_t size)386 static void *checkasm_malloc(const size_t size) {
387     void *const ptr = calloc(1, size);
388     if (!ptr) {
389         fprintf(stderr, "checkasm: malloc failed\n");
390         destroy_func_tree(state.funcs);
391         exit(1);
392     }
393     return ptr;
394 }
395 
396 /* Get the suffix of the specified cpu flag */
cpu_suffix(const unsigned cpu)397 static const char *cpu_suffix(const unsigned cpu) {
398     for (int i = (int)(sizeof(cpus) / sizeof(*cpus)) - 2; i >= 0; i--)
399         if (cpu & cpus[i].flag)
400             return cpus[i].suffix;
401 
402     return "c";
403 }
404 
405 #ifdef readtime
cmp_nop(const void * a,const void * b)406 static int cmp_nop(const void *a, const void *b) {
407     return *(const uint16_t*)a - *(const uint16_t*)b;
408 }
409 
410 /* Measure the overhead of the timing code (in decicycles) */
measure_nop_time(void)411 static double measure_nop_time(void) {
412     uint16_t nops[10000];
413     int nop_sum = 0;
414 
415     for (int i = 0; i < 10000; i++) {
416         uint64_t t = readtime();
417         nops[i] = (uint16_t) (readtime() - t);
418     }
419 
420     qsort(nops, 10000, sizeof(uint16_t), cmp_nop);
421     for (int i = 2500; i < 7500; i++)
422         nop_sum += nops[i];
423 
424     return nop_sum / 5000.0;
425 }
426 
avg_cycles_per_call(const CheckasmFuncVersion * const v)427 static double avg_cycles_per_call(const CheckasmFuncVersion *const v) {
428     if (v->iterations) {
429         const double cycles = (double)v->cycles / v->iterations - state.nop_time;
430         if (cycles > 0.0)
431             return cycles / 4.0; /* 4 calls per iteration */
432     }
433     return 0.0;
434 }
435 
436 /* Print benchmark results */
print_benchs(const CheckasmFunc * const f)437 static void print_benchs(const CheckasmFunc *const f) {
438     if (f) {
439         print_benchs(f->child[0]);
440 
441         /* Only print functions with at least one assembly version */
442         const CheckasmFuncVersion *v = &f->versions;
443         if (v->iterations) {
444             const double baseline = avg_cycles_per_call(v);
445             do {
446                 const int pad_length = 10 + state.max_function_name_length -
447                     printf("%s_%s:", f->name, cpu_suffix(v->cpu));
448                 const double cycles = avg_cycles_per_call(v);
449                 const double ratio = cycles ? baseline / cycles : 0.0;
450                 printf("%*.1f (%5.2fx)\n", imax(pad_length, 0), cycles, ratio);
451             } while ((v = v->next));
452         }
453 
454         print_benchs(f->child[1]);
455     }
456 }
457 #endif
458 
print_functions(const CheckasmFunc * const f)459 static void print_functions(const CheckasmFunc *const f) {
460     if (f) {
461         print_functions(f->child[0]);
462         const CheckasmFuncVersion *v = &f->versions;
463         printf("%s (%s", f->name, cpu_suffix(v->cpu));
464         while ((v = v->next))
465             printf(", %s", cpu_suffix(v->cpu));
466         printf(")\n");
467         print_functions(f->child[1]);
468     }
469 }
470 
471 #define is_digit(x) ((x) >= '0' && (x) <= '9')
472 
473 /* ASCIIbetical sort except preserving natural order for numbers */
cmp_func_names(const char * a,const char * b)474 static int cmp_func_names(const char *a, const char *b) {
475     const char *const start = a;
476     int ascii_diff, digit_diff;
477 
478     for (; !(ascii_diff = *(const unsigned char*)a -
479                           *(const unsigned char*)b) && *a; a++, b++);
480     for (; is_digit(*a) && is_digit(*b); a++, b++);
481 
482     if (a > start && is_digit(a[-1]) &&
483         (digit_diff = is_digit(*a) - is_digit(*b)))
484     {
485         return digit_diff;
486     }
487 
488     return ascii_diff;
489 }
490 
491 /* Perform a tree rotation in the specified direction and return the new root */
rotate_tree(CheckasmFunc * const f,const int dir)492 static CheckasmFunc *rotate_tree(CheckasmFunc *const f, const int dir) {
493     CheckasmFunc *const r = f->child[dir^1];
494     f->child[dir^1] = r->child[dir];
495     r->child[dir] = f;
496     r->color = f->color;
497     f->color = 0;
498     return r;
499 }
500 
501 #define is_red(f) ((f) && !(f)->color)
502 
503 /* Balance a left-leaning red-black tree at the specified node */
balance_tree(CheckasmFunc ** const root)504 static void balance_tree(CheckasmFunc **const root) {
505     CheckasmFunc *const f = *root;
506 
507     if (is_red(f->child[0]) && is_red(f->child[1])) {
508         f->color ^= 1;
509         f->child[0]->color = f->child[1]->color = 1;
510     }
511     else if (!is_red(f->child[0]) && is_red(f->child[1]))
512         *root = rotate_tree(f, 0); /* Rotate left */
513     else if (is_red(f->child[0]) && is_red(f->child[0]->child[0]))
514         *root = rotate_tree(f, 1); /* Rotate right */
515 }
516 
517 /* Get a node with the specified name, creating it if it doesn't exist */
get_func(CheckasmFunc ** const root,const char * const name)518 static CheckasmFunc *get_func(CheckasmFunc **const root, const char *const name) {
519     CheckasmFunc *f = *root;
520 
521     if (f) {
522         /* Search the tree for a matching node */
523         const int cmp = cmp_func_names(name, f->name);
524         if (cmp) {
525             f = get_func(&f->child[cmp > 0], name);
526 
527             /* Rebalance the tree on the way up if a new node was inserted */
528             if (!f->versions.func)
529                 balance_tree(root);
530         }
531     } else {
532         /* Allocate and insert a new node into the tree */
533         const size_t name_length = strlen(name) + 1;
534         f = *root = checkasm_malloc(offsetof(CheckasmFunc, name) + name_length);
535         memcpy(f->name, name, name_length);
536     }
537 
538     return f;
539 }
540 
541 checkasm_context checkasm_context_buf;
542 
543 /* Crash handling: attempt to catch crashes and handle them
544  * gracefully instead of just aborting abruptly. */
545 #ifdef _WIN32
546 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
signal_handler(EXCEPTION_POINTERS * const e)547 static LONG NTAPI signal_handler(EXCEPTION_POINTERS *const e) {
548     if (state.sig == SIG_ATOMIC_MAX) {
549         int s;
550         switch (e->ExceptionRecord->ExceptionCode) {
551         case EXCEPTION_FLT_DIVIDE_BY_ZERO:
552         case EXCEPTION_INT_DIVIDE_BY_ZERO:
553             s = SIGFPE;
554             break;
555         case EXCEPTION_ILLEGAL_INSTRUCTION:
556         case EXCEPTION_PRIV_INSTRUCTION:
557             s = SIGILL;
558             break;
559         case EXCEPTION_ACCESS_VIOLATION:
560         case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
561         case EXCEPTION_DATATYPE_MISALIGNMENT:
562         case EXCEPTION_STACK_OVERFLOW:
563             s = SIGSEGV;
564             break;
565         case EXCEPTION_IN_PAGE_ERROR:
566             s = SIGBUS;
567             break;
568         default:
569             return EXCEPTION_CONTINUE_SEARCH;
570         }
571         state.sig = s;
572         checkasm_load_context();
573     }
574     return EXCEPTION_CONTINUE_SEARCH;
575 }
576 #endif
577 #else
578 static void signal_handler(int s);
579 
580 static const struct sigaction signal_handler_act = {
581     .sa_handler = signal_handler,
582     .sa_flags = SA_RESETHAND,
583 };
584 
signal_handler(const int s)585 static void signal_handler(const int s) {
586     if (state.sig == SIG_ATOMIC_MAX) {
587         state.sig = s;
588         sigaction(s, &signal_handler_act, NULL);
589         checkasm_load_context();
590     }
591 }
592 #endif
593 
594 /* Compares a string with a wildcard pattern. */
wildstrcmp(const char * str,const char * pattern)595 static int wildstrcmp(const char *str, const char *pattern) {
596     const char *wild = strchr(pattern, '*');
597     if (wild) {
598         const size_t len = wild - pattern;
599         if (strncmp(str, pattern, len)) return 1;
600         while (*++wild == '*');
601         if (!*wild) return 0;
602         str += len;
603         while (*str && wildstrcmp(str, wild)) str++;
604         return !*str;
605     }
606     return strcmp(str, pattern);
607 }
608 
609 /* Perform tests and benchmarks for the specified
610  * cpu flag if supported by the host */
check_cpu_flag(const char * const name,unsigned flag)611 static void check_cpu_flag(const char *const name, unsigned flag) {
612     const unsigned old_cpu_flag = state.cpu_flag;
613 
614     flag |= old_cpu_flag;
615     dav1d_set_cpu_flags_mask(flag);
616     state.cpu_flag = dav1d_get_cpu_flags();
617 
618     if (!flag || state.cpu_flag != old_cpu_flag) {
619         state.cpu_flag_name = name;
620         state.suffix_length = (int)strlen(cpu_suffix(flag)) + 1;
621         for (int i = 0; tests[i].func; i++) {
622             if (state.test_pattern && wildstrcmp(tests[i].name, state.test_pattern))
623                 continue;
624             xor128_srand(state.seed);
625             state.current_test_name = tests[i].name;
626             tests[i].func();
627         }
628     }
629 }
630 
631 /* Print the name of the current CPU flag, but only do it once */
print_cpu_name(void)632 static void print_cpu_name(void) {
633     if (state.cpu_flag_name) {
634         color_fprintf(stderr, COLOR_YELLOW, "%s:\n", state.cpu_flag_name);
635         state.cpu_flag_name = NULL;
636     }
637 }
638 
get_seed(void)639 static unsigned get_seed(void) {
640 #ifdef _WIN32
641     LARGE_INTEGER i;
642     QueryPerformanceCounter(&i);
643     return i.LowPart;
644 #elif defined(__APPLE__)
645     return (unsigned) mach_absolute_time();
646 #else
647     struct timespec ts;
648     clock_gettime(CLOCK_MONOTONIC, &ts);
649     return (unsigned) (1000000000ULL * ts.tv_sec + ts.tv_nsec);
650 #endif
651 }
652 
checkasm_strtoul(unsigned long * const dst,const char * const str,const int base)653 static int checkasm_strtoul(unsigned long *const dst, const char *const str, const int base) {
654     char *end;
655     errno = 0;
656     *dst = strtoul(str, &end, base);
657     return errno || end == str || *end;
658 }
659 
main(int argc,char * argv[])660 int main(int argc, char *argv[]) {
661     state.seed = get_seed();
662 
663     while (argc > 1) {
664         if (!strncmp(argv[1], "--help", 6) || !strcmp(argv[1], "-h")) {
665             fprintf(stderr,
666                     "checkasm [options] <random seed>\n"
667                     "    <random seed>              Numeric value to seed the rng\n"
668                     "Options:\n"
669                     "    --affinity=<cpu>           Run the process on CPU <cpu>\n"
670                     "    --test=<pattern> -t        Test only <pattern>\n"
671                     "    --function=<pattern> -f    Test only the functions matching <pattern>\n"
672                     "    --bench -b                 Benchmark the tested functions\n"
673                     "    --list-cpuflags            List available cpu flags\n"
674                     "    --list-functions           List available functions\n"
675                     "    --list-tests               List available tests\n"
676                     "    --verbose -v               Print verbose output\n");
677             return 0;
678         } else if (!strcmp(argv[1], "--bench") || !strcmp(argv[1], "-b")) {
679 #ifndef readtime
680             fprintf(stderr,
681                     "checkasm: --bench is not supported on your system\n");
682             return 1;
683 #endif
684             state.run_mode = RUN_BENCHMARK;
685         } else if (!strncmp(argv[1], "--test=", 7)) {
686             state.test_pattern = argv[1] + 7;
687         } else if (!strcmp(argv[1], "-t")) {
688             state.test_pattern = argc > 1 ? argv[2] : "";
689             argc--;
690             argv++;
691         } else if (!strncmp(argv[1], "--function=", 11)) {
692             state.function_pattern = argv[1] + 11;
693         } else if (!strcmp(argv[1], "-f")) {
694             state.function_pattern = argc > 1 ? argv[2] : "";
695             argc--;
696             argv++;
697         } else if (!strcmp(argv[1], "--list-cpuflags")) {
698             state.run_mode = RUN_CPUFLAG_LISTING;
699             break;
700         } else if (!strcmp(argv[1], "--list-functions")) {
701             state.run_mode = RUN_FUNCTION_LISTING;
702         } else if (!strcmp(argv[1], "--list-tests")) {
703             for (int i = 0; tests[i].name; i++)
704                 printf("%s\n", tests[i].name);
705             return 0;
706         } else if (!strcmp(argv[1], "--verbose") || !strcmp(argv[1], "-v")) {
707             state.verbose = 1;
708         } else if (!strncmp(argv[1], "--affinity=", 11)) {
709             const char *const s = argv[1] + 11;
710             unsigned long affinity;
711             if (checkasm_strtoul(&affinity, s, 16)) {
712                 fprintf(stderr, "checkasm: invalid cpu affinity (%s)\n", s);
713                 return 1;
714             }
715 #ifdef _WIN32
716             int affinity_err;
717             HANDLE process = GetCurrentProcess();
718 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
719             BOOL (WINAPI *spdcs)(HANDLE, const ULONG*, ULONG) =
720                 (void*)GetProcAddress(GetModuleHandleW(L"kernel32.dll"), "SetProcessDefaultCpuSets");
721             if (spdcs)
722                 affinity_err = !spdcs(process, (ULONG[]){ affinity + 256 }, 1);
723             else
724 #endif
725             {
726                 if (affinity < sizeof(DWORD_PTR) * 8)
727                     affinity_err = !SetProcessAffinityMask(process, (DWORD_PTR)1 << affinity);
728                 else
729                     affinity_err = 1;
730             }
731             if (affinity_err) {
732                 fprintf(stderr, "checkasm: invalid cpu affinity (%lu)\n", affinity);
733                 return 1;
734             } else {
735                 fprintf(stderr, "checkasm: running on cpu %lu\n", affinity);
736             }
737 #elif HAVE_PTHREAD_SETAFFINITY_NP && defined(CPU_SET)
738             cpu_set_t set;
739             CPU_ZERO(&set);
740             CPU_SET(affinity, &set);
741             if (pthread_setaffinity_np(pthread_self(), sizeof(set), &set)) {
742                 fprintf(stderr, "checkasm: invalid cpu affinity (%lu)\n", affinity);
743                 return 1;
744             } else {
745                 fprintf(stderr, "checkasm: running on cpu %lu\n", affinity);
746             }
747 #else
748             (void)affinity;
749             fprintf(stderr,
750                     "checkasm: --affinity is not supported on your system\n");
751             return 1;
752 #endif
753         } else {
754             unsigned long seed;
755             if (checkasm_strtoul(&seed, argv[1], 10)) {
756                 fprintf(stderr, "checkasm: unknown option (%s)\n", argv[1]);
757                 return 1;
758             }
759             state.seed = (unsigned)seed;
760         }
761 
762         argc--;
763         argv++;
764     }
765 
766 #if TRIM_DSP_FUNCTIONS
767     fprintf(stderr, "checkasm: reference functions unavailable, reconfigure using '-Dtrim_dsp=false'\n");
768     return 0;
769 #endif
770 
771     dav1d_init_cpu();
772 
773 #ifdef _WIN32
774 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
775     AddVectoredExceptionHandler(0, signal_handler);
776 
777     HANDLE con = GetStdHandle(state.run_mode >= RUN_CPUFLAG_LISTING ?
778                               STD_OUTPUT_HANDLE : STD_ERROR_HANDLE);
779     DWORD con_mode = 0;
780     use_printf_color = con && con != INVALID_HANDLE_VALUE &&
781                        GetConsoleMode(con, &con_mode) &&
782                        SetConsoleMode(con, con_mode | ENABLE_VIRTUAL_TERMINAL_PROCESSING);
783 #endif
784 #else
785     sigaction(SIGBUS,  &signal_handler_act, NULL);
786     sigaction(SIGFPE,  &signal_handler_act, NULL);
787     sigaction(SIGILL,  &signal_handler_act, NULL);
788     sigaction(SIGSEGV, &signal_handler_act, NULL);
789 
790     if (isatty(state.run_mode >= RUN_CPUFLAG_LISTING ? 1 : 2)) {
791         const char *const term = getenv("TERM");
792         use_printf_color = term && strcmp(term, "dumb");
793     }
794 #endif
795 
796 #ifdef readtime
797     if (state.run_mode == RUN_BENCHMARK) {
798 #if CONFIG_MACOS_KPERF
799         if (kperf_init())
800             return 1;
801 #endif
802         if (!checkasm_save_context()) {
803             checkasm_set_signal_handler_state(1);
804             readtime();
805             checkasm_set_signal_handler_state(0);
806         } else {
807             fprintf(stderr, "checkasm: unable to access cycle counter\n");
808             return 1;
809         }
810     }
811 #endif
812 
813     int ret = 0;
814 
815     if (state.run_mode != RUN_FUNCTION_LISTING) {
816         const unsigned cpu_flags = dav1d_get_cpu_flags();
817         if (state.run_mode == RUN_CPUFLAG_LISTING) {
818             const int last_i = (int)(sizeof(cpus) / sizeof(*cpus)) - 2;
819             for (int i = 0; i <= last_i ; i++) {
820                 if (cpus[i].flag & cpu_flags)
821                     color_fprintf(stdout, COLOR_GREEN, "%s", cpus[i].suffix);
822                 else
823                     color_fprintf(stdout, COLOR_RED, "~%s", cpus[i].suffix);
824                 printf(i == last_i ? "\n" : ", ");
825             }
826             return 0;
827         }
828 #if ARCH_X86_64
829         void checkasm_warmup_avx2(void);
830         void checkasm_warmup_avx512(void);
831         if (cpu_flags & DAV1D_X86_CPU_FLAG_AVX512ICL)
832             state.simd_warmup = checkasm_warmup_avx512;
833         else if (cpu_flags & DAV1D_X86_CPU_FLAG_AVX2)
834             state.simd_warmup = checkasm_warmup_avx2;
835         checkasm_simd_warmup();
836 #endif
837 #if ARCH_ARM
838         void checkasm_checked_call_vfp(void *func, int dummy, ...);
839         void checkasm_checked_call_novfp(void *func, int dummy, ...);
840         if (cpu_flags & DAV1D_ARM_CPU_FLAG_NEON)
841             checkasm_checked_call_ptr = checkasm_checked_call_vfp;
842         else
843             checkasm_checked_call_ptr = checkasm_checked_call_novfp;
844 #endif
845 #if ARCH_X86
846         unsigned checkasm_init_x86(char *name);
847         char name[48];
848         const unsigned cpuid = checkasm_init_x86(name);
849         for (size_t len = strlen(name); len && name[len-1] == ' '; len--)
850             name[len-1] = '\0'; /* trim trailing whitespace */
851         fprintf(stderr, "checkasm: %s (%08X) using random seed %u\n", name, cpuid, state.seed);
852 #elif ARCH_RISCV
853         char buf[32] = "";
854         if (cpu_flags & DAV1D_RISCV_CPU_FLAG_V)
855             snprintf(buf, sizeof(buf), "VLEN=%i bits, ", dav1d_get_vlen());
856         fprintf(stderr, "checkasm: %susing random seed %u\n", buf, state.seed);
857 #elif ARCH_AARCH64 && HAVE_SVE
858         char buf[48] = "";
859         if (cpu_flags & DAV1D_ARM_CPU_FLAG_SVE)
860             snprintf(buf, sizeof(buf), "SVE %d bits, ", checkasm_sve_length());
861         fprintf(stderr, "checkasm: %susing random seed %u\n", buf, state.seed);
862 #else
863         fprintf(stderr, "checkasm: using random seed %u\n", state.seed);
864 #endif
865     }
866 
867     check_cpu_flag(NULL, 0);
868     for (int i = 0; cpus[i].flag; i++)
869         check_cpu_flag(cpus[i].name, cpus[i].flag);
870 
871     if (state.run_mode == RUN_FUNCTION_LISTING) {
872         print_functions(state.funcs);
873     } else if (state.num_failed) {
874         fprintf(stderr, "checkasm: %d of %d tests failed\n",
875                 state.num_failed, state.num_checked);
876         ret = 1;
877     } else {
878         if (state.num_checked)
879             fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked);
880         else
881             fprintf(stderr, "checkasm: no tests to perform\n");
882 #ifdef readtime
883         if (state.run_mode == RUN_BENCHMARK && state.max_function_name_length) {
884             state.nop_time = measure_nop_time();
885             if (state.verbose)
886                 printf("nop:%*.1f\n", state.max_function_name_length + 6, state.nop_time);
887             print_benchs(state.funcs);
888         }
889 #endif
890     }
891 
892     destroy_func_tree(state.funcs);
893     return ret;
894 }
895 
896 /* Decide whether or not the specified function needs to be tested and
897  * allocate/initialize data structures if needed. Returns a pointer to a
898  * reference function if the function should be tested, otherwise NULL */
checkasm_check_func(void * const func,const char * const name,...)899 void *checkasm_check_func(void *const func, const char *const name, ...) {
900     char name_buf[256];
901     va_list arg;
902 
903     va_start(arg, name);
904     int name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg);
905     va_end(arg);
906 
907     if (!func || name_length <= 0 || (size_t)name_length >= sizeof(name_buf) ||
908         (state.function_pattern && wildstrcmp(name_buf, state.function_pattern)))
909     {
910         return NULL;
911     }
912 
913     state.current_func = get_func(&state.funcs, name_buf);
914 
915     state.funcs->color = 1;
916     CheckasmFuncVersion *v = &state.current_func->versions;
917     void *ref = func;
918 
919     if (v->func) {
920         CheckasmFuncVersion *prev;
921         do {
922             /* Only test functions that haven't already been tested */
923             if (v->func == func)
924                 return NULL;
925 
926             if (v->ok)
927                 ref = v->func;
928 
929             prev = v;
930         } while ((v = v->next));
931 
932         v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion));
933     }
934 
935     name_length += state.suffix_length;
936     if (name_length > state.max_function_name_length)
937         state.max_function_name_length = name_length;
938 
939     v->func = func;
940     v->ok = 1;
941     v->cpu = state.cpu_flag;
942     state.current_func_ver = v;
943     if (state.run_mode == RUN_FUNCTION_LISTING) /* Save function names without running tests */
944         return NULL;
945 
946     xor128_srand(state.seed);
947 
948     if (state.cpu_flag)
949         state.num_checked++;
950 
951     return ref;
952 }
953 
954 /* Decide whether or not the current function needs to be benchmarked */
checkasm_bench_func(void)955 int checkasm_bench_func(void) {
956     return !state.num_failed && state.run_mode == RUN_BENCHMARK;
957 }
958 
959 /* Indicate that the current test has failed, return whether verbose printing
960  * is requested. */
checkasm_fail_func(const char * const msg,...)961 int checkasm_fail_func(const char *const msg, ...) {
962     if (state.current_func_ver && state.current_func_ver->cpu &&
963         state.current_func_ver->ok)
964     {
965         va_list arg;
966 
967         print_cpu_name();
968         fprintf(stderr, "   %s_%s (", state.current_func->name,
969                 cpu_suffix(state.current_func_ver->cpu));
970         va_start(arg, msg);
971         vfprintf(stderr, msg, arg);
972         va_end(arg);
973         fprintf(stderr, ")\n");
974 
975         state.current_func_ver->ok = 0;
976         state.num_failed++;
977     }
978     return state.verbose;
979 }
980 
981 /* Update benchmark results of the current function */
checkasm_update_bench(const int iterations,const uint64_t cycles)982 void checkasm_update_bench(const int iterations, const uint64_t cycles) {
983     state.current_func_ver->iterations += iterations;
984     state.current_func_ver->cycles += cycles;
985 }
986 
987 /* Print the outcome of all tests performed since
988  * the last time this function was called */
checkasm_report(const char * const name,...)989 void checkasm_report(const char *const name, ...) {
990     static int prev_checked, prev_failed;
991     static size_t max_length;
992 
993     if (state.num_checked > prev_checked) {
994         int pad_length = (int) max_length + 4;
995         va_list arg;
996 
997         print_cpu_name();
998         pad_length -= fprintf(stderr, " - %s.", state.current_test_name);
999         va_start(arg, name);
1000         pad_length -= vfprintf(stderr, name, arg);
1001         va_end(arg);
1002         fprintf(stderr, "%*c", imax(pad_length, 0) + 2, '[');
1003 
1004         if (state.num_failed == prev_failed)
1005             color_fprintf(stderr, COLOR_GREEN, "OK");
1006         else
1007             color_fprintf(stderr, COLOR_RED, "FAILED");
1008         fprintf(stderr, "]\n");
1009 
1010         prev_checked = state.num_checked;
1011         prev_failed  = state.num_failed;
1012     } else if (!state.cpu_flag) {
1013         /* Calculate the amount of padding required
1014          * to make the output vertically aligned */
1015         size_t length = strlen(state.current_test_name);
1016         va_list arg;
1017 
1018         va_start(arg, name);
1019         length += vsnprintf(NULL, 0, name, arg);
1020         va_end(arg);
1021 
1022         if (length > max_length)
1023             max_length = length;
1024     }
1025 }
1026 
checkasm_set_signal_handler_state(const int enabled)1027 void checkasm_set_signal_handler_state(const int enabled) {
1028     state.sig = enabled ? SIG_ATOMIC_MAX : 0;
1029 }
1030 
checkasm_handle_signal(void)1031 void checkasm_handle_signal(void) {
1032     const int s = state.sig;
1033     checkasm_fail_func(s == SIGFPE ? "fatal arithmetic error" :
1034                        s == SIGILL ? "illegal instruction" :
1035                        s == SIGBUS ? "bus error" :
1036                                      "segmentation fault");
1037 }
1038 
check_err(const char * const file,const int line,const char * const name,const int w,const int h,int * const err)1039 static int check_err(const char *const file, const int line,
1040                      const char *const name, const int w, const int h,
1041                      int *const err)
1042 {
1043     if (*err)
1044         return 0;
1045     if (!checkasm_fail_func("%s:%d", file, line))
1046         return 1;
1047     *err = 1;
1048     fprintf(stderr, "%s (%dx%d):\n", name, w, h);
1049     return 0;
1050 }
1051 
1052 #define DEF_CHECKASM_CHECK_FUNC(type, fmt) \
1053 int checkasm_check_##type(const char *const file, const int line, \
1054                           const type *buf1, ptrdiff_t stride1, \
1055                           const type *buf2, ptrdiff_t stride2, \
1056                           const int w, int h, const char *const name, \
1057                           const int align_w, const int align_h, \
1058                           const int padding) \
1059 { \
1060     int aligned_w = (w + align_w - 1) & ~(align_w - 1); \
1061     int aligned_h = (h + align_h - 1) & ~(align_h - 1); \
1062     int err = 0; \
1063     stride1 /= sizeof(*buf1); \
1064     stride2 /= sizeof(*buf2); \
1065     int y = 0; \
1066     for (y = 0; y < h; y++) \
1067         if (memcmp(&buf1[y*stride1], &buf2[y*stride2], w*sizeof(*buf1))) \
1068             break; \
1069     if (y != h) { \
1070         if (check_err(file, line, name, w, h, &err)) \
1071             return 1; \
1072         for (y = 0; y < h; y++) { \
1073             for (int x = 0; x < w; x++) \
1074                 fprintf(stderr, " " fmt, buf1[x]); \
1075             fprintf(stderr, "    "); \
1076             for (int x = 0; x < w; x++) \
1077                 fprintf(stderr, " " fmt, buf2[x]); \
1078             fprintf(stderr, "    "); \
1079             for (int x = 0; x < w; x++) \
1080                 fprintf(stderr, "%c", buf1[x] != buf2[x] ? 'x' : '.'); \
1081             buf1 += stride1; \
1082             buf2 += stride2; \
1083             fprintf(stderr, "\n"); \
1084         } \
1085         buf1 -= h*stride1; \
1086         buf2 -= h*stride2; \
1087     } \
1088     for (y = -padding; y < 0; y++) \
1089         if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
1090                    (w + 2*padding)*sizeof(*buf1))) { \
1091             if (check_err(file, line, name, w, h, &err)) \
1092                 return 1; \
1093             fprintf(stderr, " overwrite above\n"); \
1094             break; \
1095         } \
1096     for (y = aligned_h; y < aligned_h + padding; y++) \
1097         if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
1098                    (w + 2*padding)*sizeof(*buf1))) { \
1099             if (check_err(file, line, name, w, h, &err)) \
1100                 return 1; \
1101             fprintf(stderr, " overwrite below\n"); \
1102             break; \
1103         } \
1104     for (y = 0; y < h; y++) \
1105         if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
1106                    padding*sizeof(*buf1))) { \
1107             if (check_err(file, line, name, w, h, &err)) \
1108                 return 1; \
1109             fprintf(stderr, " overwrite left\n"); \
1110             break; \
1111         } \
1112     for (y = 0; y < h; y++) \
1113         if (memcmp(&buf1[y*stride1 + aligned_w], &buf2[y*stride2 + aligned_w], \
1114                    padding*sizeof(*buf1))) { \
1115             if (check_err(file, line, name, w, h, &err)) \
1116                 return 1; \
1117             fprintf(stderr, " overwrite right\n"); \
1118             break; \
1119         } \
1120     return err; \
1121 }
1122 
1123 DEF_CHECKASM_CHECK_FUNC(int8_t,   "%4d")
1124 DEF_CHECKASM_CHECK_FUNC(int16_t,  "%6d")
1125 DEF_CHECKASM_CHECK_FUNC(int32_t,  "%9d")
1126 DEF_CHECKASM_CHECK_FUNC(uint8_t,  "%02x")
1127 DEF_CHECKASM_CHECK_FUNC(uint16_t, "%04x")
1128 DEF_CHECKASM_CHECK_FUNC(uint32_t, "%08x")
1129 
1130 #if ARCH_X86_64
checkasm_simd_warmup(void)1131 void checkasm_simd_warmup(void)
1132 {
1133     if (state.simd_warmup)
1134         state.simd_warmup();
1135 }
1136 #endif
1137 
1138 #if ARCH_ARM
1139 void (*checkasm_checked_call_ptr)(void *func, int dummy, ...);
1140 #endif
1141