1 /*
2 * Copyright © 2018, VideoLAN and dav1d authors
3 * Copyright © 2018, Two Orioles, LLC
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 #include "tests/checkasm/checkasm.h"
28
29 #include <errno.h>
30 #include <math.h>
31 #include <signal.h>
32 #include <stdarg.h>
33 #include <stdio.h>
34 #include <string.h>
35
36 #include "src/cpu.h"
37
38 #ifdef _WIN32
39 #ifndef SIGBUS
40 /* non-standard, use the same value as mingw-w64 */
41 #define SIGBUS 10
42 #endif
43 #ifndef ENABLE_VIRTUAL_TERMINAL_PROCESSING
44 #define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x04
45 #endif
46 #else
47 #include <time.h>
48 #if HAVE_UNISTD_H
49 #include <unistd.h>
50 #endif
51 #if HAVE_PTHREAD_SETAFFINITY_NP
52 #include <pthread.h>
53 #if HAVE_PTHREAD_NP_H
54 #include <pthread_np.h>
55 #endif
56 #endif
57 #ifdef __APPLE__
58 #include <mach/mach_time.h>
59 #endif
60 #endif
61 #if CONFIG_MACOS_KPERF
62 #include <dlfcn.h>
63 #endif
64
65 #define COLOR_RED 31
66 #define COLOR_GREEN 32
67 #define COLOR_YELLOW 33
68
69 /* List of tests to invoke */
70 static const struct {
71 const char *name;
72 void (*func)(void);
73 } tests[] = {
74 { "msac", checkasm_check_msac },
75 { "pal", checkasm_check_pal },
76 { "refmvs", checkasm_check_refmvs },
77 #if CONFIG_8BPC
78 { "cdef_8bpc", checkasm_check_cdef_8bpc },
79 { "filmgrain_8bpc", checkasm_check_filmgrain_8bpc },
80 { "ipred_8bpc", checkasm_check_ipred_8bpc },
81 { "itx_8bpc", checkasm_check_itx_8bpc },
82 { "loopfilter_8bpc", checkasm_check_loopfilter_8bpc },
83 { "looprestoration_8bpc", checkasm_check_looprestoration_8bpc },
84 { "mc_8bpc", checkasm_check_mc_8bpc },
85 #endif
86 #if CONFIG_16BPC
87 { "cdef_16bpc", checkasm_check_cdef_16bpc },
88 { "filmgrain_16bpc", checkasm_check_filmgrain_16bpc },
89 { "ipred_16bpc", checkasm_check_ipred_16bpc },
90 { "itx_16bpc", checkasm_check_itx_16bpc },
91 { "loopfilter_16bpc", checkasm_check_loopfilter_16bpc },
92 { "looprestoration_16bpc", checkasm_check_looprestoration_16bpc },
93 { "mc_16bpc", checkasm_check_mc_16bpc },
94 #endif
95 { 0 }
96 };
97
98 /* List of cpu flags to check */
99 static const struct {
100 const char *name;
101 const char *suffix;
102 unsigned flag;
103 } cpus[] = {
104 #if ARCH_X86
105 { "SSE2", "sse2", DAV1D_X86_CPU_FLAG_SSE2 },
106 { "SSSE3", "ssse3", DAV1D_X86_CPU_FLAG_SSSE3 },
107 { "SSE4.1", "sse4", DAV1D_X86_CPU_FLAG_SSE41 },
108 { "AVX2", "avx2", DAV1D_X86_CPU_FLAG_AVX2 },
109 { "AVX-512 (Ice Lake)", "avx512icl", DAV1D_X86_CPU_FLAG_AVX512ICL },
110 #elif ARCH_AARCH64 || ARCH_ARM
111 { "NEON", "neon", DAV1D_ARM_CPU_FLAG_NEON },
112 { "DOTPROD", "dotprod", DAV1D_ARM_CPU_FLAG_DOTPROD },
113 { "I8MM", "i8mm", DAV1D_ARM_CPU_FLAG_I8MM },
114 #if ARCH_AARCH64
115 { "SVE", "sve", DAV1D_ARM_CPU_FLAG_SVE },
116 { "SVE2", "sve2", DAV1D_ARM_CPU_FLAG_SVE2 },
117 #endif /* ARCH_AARCH64 */
118 #elif ARCH_LOONGARCH
119 { "LSX", "lsx", DAV1D_LOONGARCH_CPU_FLAG_LSX },
120 { "LASX", "lasx", DAV1D_LOONGARCH_CPU_FLAG_LASX },
121 #elif ARCH_PPC64LE
122 { "VSX", "vsx", DAV1D_PPC_CPU_FLAG_VSX },
123 { "PWR9", "pwr9", DAV1D_PPC_CPU_FLAG_PWR9 },
124 #elif ARCH_RISCV
125 { "RVV", "rvv", DAV1D_RISCV_CPU_FLAG_V },
126 #endif
127 { 0 }
128 };
129
130 #if ARCH_AARCH64 && HAVE_SVE
131 int checkasm_sve_length(void);
132 #endif
133
134 typedef struct CheckasmFuncVersion {
135 struct CheckasmFuncVersion *next;
136 void *func;
137 int ok;
138 unsigned cpu;
139 int iterations;
140 uint64_t cycles;
141 } CheckasmFuncVersion;
142
143 /* Binary search tree node */
144 typedef struct CheckasmFunc {
145 struct CheckasmFunc *child[2];
146 CheckasmFuncVersion versions;
147 uint8_t color; /* 0 = red, 1 = black */
148 char name[];
149 } CheckasmFunc;
150
151 typedef enum {
152 RUN_NORMAL = 0,
153 RUN_BENCHMARK,
154 RUN_CPUFLAG_LISTING,
155 RUN_FUNCTION_LISTING,
156 } CheckasmRunMode;
157
158 /* Internal state */
159 static struct {
160 CheckasmFunc *funcs;
161 CheckasmFunc *current_func;
162 CheckasmFuncVersion *current_func_ver;
163 const char *current_test_name;
164 int num_checked;
165 int num_failed;
166 double nop_time;
167 unsigned cpu_flag;
168 const char *cpu_flag_name;
169 const char *test_pattern;
170 const char *function_pattern;
171 unsigned seed;
172 CheckasmRunMode run_mode;
173 int verbose;
174 volatile sig_atomic_t sig; // SIG_ATOMIC_MAX = signal handling enabled
175 int suffix_length;
176 int max_function_name_length;
177 #if ARCH_X86_64
178 void (*simd_warmup)(void);
179 #endif
180 } state;
181
182 /* float compare support code */
183 typedef union {
184 float f;
185 uint32_t i;
186 } intfloat;
187
188 static uint32_t xs_state[4];
189
xor128_srand(unsigned seed)190 static void xor128_srand(unsigned seed) {
191 xs_state[0] = seed;
192 xs_state[1] = ( seed & 0xffff0000) | (~seed & 0x0000ffff);
193 xs_state[2] = (~seed & 0xffff0000) | ( seed & 0x0000ffff);
194 xs_state[3] = ~seed;
195 }
196
197 // xor128 from Marsaglia, George (July 2003). "Xorshift RNGs".
198 // Journal of Statistical Software. 8 (14).
199 // doi:10.18637/jss.v008.i14.
xor128_rand(void)200 int xor128_rand(void) {
201 const uint32_t x = xs_state[0];
202 const uint32_t t = x ^ (x << 11);
203
204 xs_state[0] = xs_state[1];
205 xs_state[1] = xs_state[2];
206 xs_state[2] = xs_state[3];
207 uint32_t w = xs_state[3];
208
209 w = (w ^ (w >> 19)) ^ (t ^ (t >> 8));
210 xs_state[3] = w;
211
212 return w >> 1;
213 }
214
215 #if CONFIG_MACOS_KPERF
216
217 static int (*kpc_get_thread_counters)(int, unsigned int, void *);
218
219 #define CFGWORD_EL0A64EN_MASK (0x20000)
220
221 #define CPMU_CORE_CYCLE 0x02
222
223 #define KPC_CLASS_FIXED_MASK (1 << 0)
224 #define KPC_CLASS_CONFIGURABLE_MASK (1 << 1)
225
226 #define COUNTERS_COUNT 10
227 #define CONFIG_COUNT 8
228 #define KPC_MASK (KPC_CLASS_CONFIGURABLE_MASK | KPC_CLASS_FIXED_MASK)
229
kperf_init(void)230 static int kperf_init(void) {
231 uint64_t config[COUNTERS_COUNT] = { 0 };
232
233 void *kperf = dlopen("/System/Library/PrivateFrameworks/kperf.framework/kperf", RTLD_LAZY);
234 if (!kperf) {
235 fprintf(stderr, "checkasm: Unable to load kperf: %s\n", dlerror());
236 return 1;
237 }
238
239 int (*kpc_force_all_ctrs_set)(int) = dlsym(kperf, "kpc_force_all_ctrs_set");
240 int (*kpc_set_counting)(uint32_t) = dlsym(kperf, "kpc_set_counting");
241 int (*kpc_set_thread_counting)(uint32_t) = dlsym(kperf, "kpc_set_thread_counting");
242 int (*kpc_set_config)(uint32_t, void *) = dlsym(kperf, "kpc_set_config");
243 uint32_t (*kpc_get_counter_count)(uint32_t) = dlsym(kperf, "kpc_get_counter_count");
244 uint32_t (*kpc_get_config_count)(uint32_t) = dlsym(kperf, "kpc_get_config_count");
245 kpc_get_thread_counters = dlsym(kperf, "kpc_get_thread_counters");
246
247 if (!kpc_get_thread_counters) {
248 fprintf(stderr, "checkasm: Unable to load kpc_get_thread_counters\n");
249 return 1;
250 }
251
252 if (!kpc_get_counter_count || kpc_get_counter_count(KPC_MASK) != COUNTERS_COUNT) {
253 fprintf(stderr, "checkasm: Unxpected kpc_get_counter_count\n");
254 return 1;
255 }
256 if (!kpc_get_config_count || kpc_get_config_count(KPC_MASK) != CONFIG_COUNT) {
257 fprintf(stderr, "checkasm: Unxpected kpc_get_config_count\n");
258 return 1;
259 }
260
261 config[0] = CPMU_CORE_CYCLE | CFGWORD_EL0A64EN_MASK;
262
263 if (!kpc_set_config || kpc_set_config(KPC_MASK, config)) {
264 fprintf(stderr, "checkasm: The kperf API needs to be run as root\n");
265 return 1;
266 }
267 if (!kpc_force_all_ctrs_set || kpc_force_all_ctrs_set(1)) {
268 fprintf(stderr, "checkasm: kpc_force_all_ctrs_set failed\n");
269 return 1;
270 }
271 if (!kpc_set_counting || kpc_set_counting(KPC_MASK)) {
272 fprintf(stderr, "checkasm: kpc_set_counting failed\n");
273 return 1;
274 }
275 if (!kpc_set_counting || kpc_set_thread_counting(KPC_MASK)) {
276 fprintf(stderr, "checkasm: kpc_set_thread_counting failed\n");
277 return 1;
278 }
279 return 0;
280 }
281
checkasm_kperf_cycles(void)282 uint64_t checkasm_kperf_cycles(void) {
283 uint64_t counters[COUNTERS_COUNT];
284 if (kpc_get_thread_counters(0, COUNTERS_COUNT, counters))
285 return -1;
286
287 return counters[0];
288 }
289 #endif
290
is_negative(const intfloat u)291 static int is_negative(const intfloat u) {
292 return u.i >> 31;
293 }
294
float_near_ulp(const float a,const float b,const unsigned max_ulp)295 int float_near_ulp(const float a, const float b, const unsigned max_ulp) {
296 intfloat x, y;
297
298 x.f = a;
299 y.f = b;
300
301 if (is_negative(x) != is_negative(y)) {
302 // handle -0.0 == +0.0
303 return a == b;
304 }
305
306 if (llabs((int64_t)x.i - y.i) <= max_ulp)
307 return 1;
308
309 return 0;
310 }
311
float_near_ulp_array(const float * const a,const float * const b,const unsigned max_ulp,const int len)312 int float_near_ulp_array(const float *const a, const float *const b,
313 const unsigned max_ulp, const int len)
314 {
315 for (int i = 0; i < len; i++)
316 if (!float_near_ulp(a[i], b[i], max_ulp))
317 return 0;
318
319 return 1;
320 }
321
float_near_abs_eps(const float a,const float b,const float eps)322 int float_near_abs_eps(const float a, const float b, const float eps) {
323 return fabsf(a - b) < eps;
324 }
325
float_near_abs_eps_array(const float * const a,const float * const b,const float eps,const int len)326 int float_near_abs_eps_array(const float *const a, const float *const b,
327 const float eps, const int len)
328 {
329 for (int i = 0; i < len; i++)
330 if (!float_near_abs_eps(a[i], b[i], eps))
331 return 0;
332
333 return 1;
334 }
335
float_near_abs_eps_ulp(const float a,const float b,const float eps,const unsigned max_ulp)336 int float_near_abs_eps_ulp(const float a, const float b, const float eps,
337 const unsigned max_ulp)
338 {
339 return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps);
340 }
341
float_near_abs_eps_array_ulp(const float * const a,const float * const b,const float eps,const unsigned max_ulp,const int len)342 int float_near_abs_eps_array_ulp(const float *const a, const float *const b,
343 const float eps, const unsigned max_ulp,
344 const int len)
345 {
346 for (int i = 0; i < len; i++)
347 if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp))
348 return 0;
349
350 return 1;
351 }
352
353 /* Print colored text to stderr if the terminal supports it */
354 static int use_printf_color;
color_fprintf(FILE * const f,const int color,const char * const fmt,...)355 static void color_fprintf(FILE *const f, const int color, const char *const fmt, ...) {
356 va_list arg;
357
358 if (use_printf_color)
359 fprintf(f, "\x1b[0;%dm", color);
360
361 va_start(arg, fmt);
362 vfprintf(f, fmt, arg);
363 va_end(arg);
364
365 if (use_printf_color)
366 fprintf(f, "\x1b[0m");
367 }
368
369 /* Deallocate a tree */
destroy_func_tree(CheckasmFunc * const f)370 static void destroy_func_tree(CheckasmFunc *const f) {
371 if (f) {
372 CheckasmFuncVersion *v = f->versions.next;
373 while (v) {
374 CheckasmFuncVersion *next = v->next;
375 free(v);
376 v = next;
377 }
378
379 destroy_func_tree(f->child[0]);
380 destroy_func_tree(f->child[1]);
381 free(f);
382 }
383 }
384
385 /* Allocate a zero-initialized block, clean up and exit on failure */
checkasm_malloc(const size_t size)386 static void *checkasm_malloc(const size_t size) {
387 void *const ptr = calloc(1, size);
388 if (!ptr) {
389 fprintf(stderr, "checkasm: malloc failed\n");
390 destroy_func_tree(state.funcs);
391 exit(1);
392 }
393 return ptr;
394 }
395
396 /* Get the suffix of the specified cpu flag */
cpu_suffix(const unsigned cpu)397 static const char *cpu_suffix(const unsigned cpu) {
398 for (int i = (int)(sizeof(cpus) / sizeof(*cpus)) - 2; i >= 0; i--)
399 if (cpu & cpus[i].flag)
400 return cpus[i].suffix;
401
402 return "c";
403 }
404
405 #ifdef readtime
cmp_nop(const void * a,const void * b)406 static int cmp_nop(const void *a, const void *b) {
407 return *(const uint16_t*)a - *(const uint16_t*)b;
408 }
409
410 /* Measure the overhead of the timing code (in decicycles) */
measure_nop_time(void)411 static double measure_nop_time(void) {
412 uint16_t nops[10000];
413 int nop_sum = 0;
414
415 for (int i = 0; i < 10000; i++) {
416 uint64_t t = readtime();
417 nops[i] = (uint16_t) (readtime() - t);
418 }
419
420 qsort(nops, 10000, sizeof(uint16_t), cmp_nop);
421 for (int i = 2500; i < 7500; i++)
422 nop_sum += nops[i];
423
424 return nop_sum / 5000.0;
425 }
426
avg_cycles_per_call(const CheckasmFuncVersion * const v)427 static double avg_cycles_per_call(const CheckasmFuncVersion *const v) {
428 if (v->iterations) {
429 const double cycles = (double)v->cycles / v->iterations - state.nop_time;
430 if (cycles > 0.0)
431 return cycles / 4.0; /* 4 calls per iteration */
432 }
433 return 0.0;
434 }
435
436 /* Print benchmark results */
print_benchs(const CheckasmFunc * const f)437 static void print_benchs(const CheckasmFunc *const f) {
438 if (f) {
439 print_benchs(f->child[0]);
440
441 /* Only print functions with at least one assembly version */
442 const CheckasmFuncVersion *v = &f->versions;
443 if (v->iterations) {
444 const double baseline = avg_cycles_per_call(v);
445 do {
446 const int pad_length = 10 + state.max_function_name_length -
447 printf("%s_%s:", f->name, cpu_suffix(v->cpu));
448 const double cycles = avg_cycles_per_call(v);
449 const double ratio = cycles ? baseline / cycles : 0.0;
450 printf("%*.1f (%5.2fx)\n", imax(pad_length, 0), cycles, ratio);
451 } while ((v = v->next));
452 }
453
454 print_benchs(f->child[1]);
455 }
456 }
457 #endif
458
print_functions(const CheckasmFunc * const f)459 static void print_functions(const CheckasmFunc *const f) {
460 if (f) {
461 print_functions(f->child[0]);
462 const CheckasmFuncVersion *v = &f->versions;
463 printf("%s (%s", f->name, cpu_suffix(v->cpu));
464 while ((v = v->next))
465 printf(", %s", cpu_suffix(v->cpu));
466 printf(")\n");
467 print_functions(f->child[1]);
468 }
469 }
470
471 #define is_digit(x) ((x) >= '0' && (x) <= '9')
472
473 /* ASCIIbetical sort except preserving natural order for numbers */
cmp_func_names(const char * a,const char * b)474 static int cmp_func_names(const char *a, const char *b) {
475 const char *const start = a;
476 int ascii_diff, digit_diff;
477
478 for (; !(ascii_diff = *(const unsigned char*)a -
479 *(const unsigned char*)b) && *a; a++, b++);
480 for (; is_digit(*a) && is_digit(*b); a++, b++);
481
482 if (a > start && is_digit(a[-1]) &&
483 (digit_diff = is_digit(*a) - is_digit(*b)))
484 {
485 return digit_diff;
486 }
487
488 return ascii_diff;
489 }
490
491 /* Perform a tree rotation in the specified direction and return the new root */
rotate_tree(CheckasmFunc * const f,const int dir)492 static CheckasmFunc *rotate_tree(CheckasmFunc *const f, const int dir) {
493 CheckasmFunc *const r = f->child[dir^1];
494 f->child[dir^1] = r->child[dir];
495 r->child[dir] = f;
496 r->color = f->color;
497 f->color = 0;
498 return r;
499 }
500
501 #define is_red(f) ((f) && !(f)->color)
502
503 /* Balance a left-leaning red-black tree at the specified node */
balance_tree(CheckasmFunc ** const root)504 static void balance_tree(CheckasmFunc **const root) {
505 CheckasmFunc *const f = *root;
506
507 if (is_red(f->child[0]) && is_red(f->child[1])) {
508 f->color ^= 1;
509 f->child[0]->color = f->child[1]->color = 1;
510 }
511 else if (!is_red(f->child[0]) && is_red(f->child[1]))
512 *root = rotate_tree(f, 0); /* Rotate left */
513 else if (is_red(f->child[0]) && is_red(f->child[0]->child[0]))
514 *root = rotate_tree(f, 1); /* Rotate right */
515 }
516
517 /* Get a node with the specified name, creating it if it doesn't exist */
get_func(CheckasmFunc ** const root,const char * const name)518 static CheckasmFunc *get_func(CheckasmFunc **const root, const char *const name) {
519 CheckasmFunc *f = *root;
520
521 if (f) {
522 /* Search the tree for a matching node */
523 const int cmp = cmp_func_names(name, f->name);
524 if (cmp) {
525 f = get_func(&f->child[cmp > 0], name);
526
527 /* Rebalance the tree on the way up if a new node was inserted */
528 if (!f->versions.func)
529 balance_tree(root);
530 }
531 } else {
532 /* Allocate and insert a new node into the tree */
533 const size_t name_length = strlen(name) + 1;
534 f = *root = checkasm_malloc(offsetof(CheckasmFunc, name) + name_length);
535 memcpy(f->name, name, name_length);
536 }
537
538 return f;
539 }
540
541 checkasm_context checkasm_context_buf;
542
543 /* Crash handling: attempt to catch crashes and handle them
544 * gracefully instead of just aborting abruptly. */
545 #ifdef _WIN32
546 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
signal_handler(EXCEPTION_POINTERS * const e)547 static LONG NTAPI signal_handler(EXCEPTION_POINTERS *const e) {
548 if (state.sig == SIG_ATOMIC_MAX) {
549 int s;
550 switch (e->ExceptionRecord->ExceptionCode) {
551 case EXCEPTION_FLT_DIVIDE_BY_ZERO:
552 case EXCEPTION_INT_DIVIDE_BY_ZERO:
553 s = SIGFPE;
554 break;
555 case EXCEPTION_ILLEGAL_INSTRUCTION:
556 case EXCEPTION_PRIV_INSTRUCTION:
557 s = SIGILL;
558 break;
559 case EXCEPTION_ACCESS_VIOLATION:
560 case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
561 case EXCEPTION_DATATYPE_MISALIGNMENT:
562 case EXCEPTION_STACK_OVERFLOW:
563 s = SIGSEGV;
564 break;
565 case EXCEPTION_IN_PAGE_ERROR:
566 s = SIGBUS;
567 break;
568 default:
569 return EXCEPTION_CONTINUE_SEARCH;
570 }
571 state.sig = s;
572 checkasm_load_context();
573 }
574 return EXCEPTION_CONTINUE_SEARCH;
575 }
576 #endif
577 #else
578 static void signal_handler(int s);
579
580 static const struct sigaction signal_handler_act = {
581 .sa_handler = signal_handler,
582 .sa_flags = SA_RESETHAND,
583 };
584
signal_handler(const int s)585 static void signal_handler(const int s) {
586 if (state.sig == SIG_ATOMIC_MAX) {
587 state.sig = s;
588 sigaction(s, &signal_handler_act, NULL);
589 checkasm_load_context();
590 }
591 }
592 #endif
593
594 /* Compares a string with a wildcard pattern. */
wildstrcmp(const char * str,const char * pattern)595 static int wildstrcmp(const char *str, const char *pattern) {
596 const char *wild = strchr(pattern, '*');
597 if (wild) {
598 const size_t len = wild - pattern;
599 if (strncmp(str, pattern, len)) return 1;
600 while (*++wild == '*');
601 if (!*wild) return 0;
602 str += len;
603 while (*str && wildstrcmp(str, wild)) str++;
604 return !*str;
605 }
606 return strcmp(str, pattern);
607 }
608
609 /* Perform tests and benchmarks for the specified
610 * cpu flag if supported by the host */
check_cpu_flag(const char * const name,unsigned flag)611 static void check_cpu_flag(const char *const name, unsigned flag) {
612 const unsigned old_cpu_flag = state.cpu_flag;
613
614 flag |= old_cpu_flag;
615 dav1d_set_cpu_flags_mask(flag);
616 state.cpu_flag = dav1d_get_cpu_flags();
617
618 if (!flag || state.cpu_flag != old_cpu_flag) {
619 state.cpu_flag_name = name;
620 state.suffix_length = (int)strlen(cpu_suffix(flag)) + 1;
621 for (int i = 0; tests[i].func; i++) {
622 if (state.test_pattern && wildstrcmp(tests[i].name, state.test_pattern))
623 continue;
624 xor128_srand(state.seed);
625 state.current_test_name = tests[i].name;
626 tests[i].func();
627 }
628 }
629 }
630
631 /* Print the name of the current CPU flag, but only do it once */
print_cpu_name(void)632 static void print_cpu_name(void) {
633 if (state.cpu_flag_name) {
634 color_fprintf(stderr, COLOR_YELLOW, "%s:\n", state.cpu_flag_name);
635 state.cpu_flag_name = NULL;
636 }
637 }
638
get_seed(void)639 static unsigned get_seed(void) {
640 #ifdef _WIN32
641 LARGE_INTEGER i;
642 QueryPerformanceCounter(&i);
643 return i.LowPart;
644 #elif defined(__APPLE__)
645 return (unsigned) mach_absolute_time();
646 #else
647 struct timespec ts;
648 clock_gettime(CLOCK_MONOTONIC, &ts);
649 return (unsigned) (1000000000ULL * ts.tv_sec + ts.tv_nsec);
650 #endif
651 }
652
checkasm_strtoul(unsigned long * const dst,const char * const str,const int base)653 static int checkasm_strtoul(unsigned long *const dst, const char *const str, const int base) {
654 char *end;
655 errno = 0;
656 *dst = strtoul(str, &end, base);
657 return errno || end == str || *end;
658 }
659
main(int argc,char * argv[])660 int main(int argc, char *argv[]) {
661 state.seed = get_seed();
662
663 while (argc > 1) {
664 if (!strncmp(argv[1], "--help", 6) || !strcmp(argv[1], "-h")) {
665 fprintf(stderr,
666 "checkasm [options] <random seed>\n"
667 " <random seed> Numeric value to seed the rng\n"
668 "Options:\n"
669 " --affinity=<cpu> Run the process on CPU <cpu>\n"
670 " --test=<pattern> -t Test only <pattern>\n"
671 " --function=<pattern> -f Test only the functions matching <pattern>\n"
672 " --bench -b Benchmark the tested functions\n"
673 " --list-cpuflags List available cpu flags\n"
674 " --list-functions List available functions\n"
675 " --list-tests List available tests\n"
676 " --verbose -v Print verbose output\n");
677 return 0;
678 } else if (!strcmp(argv[1], "--bench") || !strcmp(argv[1], "-b")) {
679 #ifndef readtime
680 fprintf(stderr,
681 "checkasm: --bench is not supported on your system\n");
682 return 1;
683 #endif
684 state.run_mode = RUN_BENCHMARK;
685 } else if (!strncmp(argv[1], "--test=", 7)) {
686 state.test_pattern = argv[1] + 7;
687 } else if (!strcmp(argv[1], "-t")) {
688 state.test_pattern = argc > 1 ? argv[2] : "";
689 argc--;
690 argv++;
691 } else if (!strncmp(argv[1], "--function=", 11)) {
692 state.function_pattern = argv[1] + 11;
693 } else if (!strcmp(argv[1], "-f")) {
694 state.function_pattern = argc > 1 ? argv[2] : "";
695 argc--;
696 argv++;
697 } else if (!strcmp(argv[1], "--list-cpuflags")) {
698 state.run_mode = RUN_CPUFLAG_LISTING;
699 break;
700 } else if (!strcmp(argv[1], "--list-functions")) {
701 state.run_mode = RUN_FUNCTION_LISTING;
702 } else if (!strcmp(argv[1], "--list-tests")) {
703 for (int i = 0; tests[i].name; i++)
704 printf("%s\n", tests[i].name);
705 return 0;
706 } else if (!strcmp(argv[1], "--verbose") || !strcmp(argv[1], "-v")) {
707 state.verbose = 1;
708 } else if (!strncmp(argv[1], "--affinity=", 11)) {
709 const char *const s = argv[1] + 11;
710 unsigned long affinity;
711 if (checkasm_strtoul(&affinity, s, 16)) {
712 fprintf(stderr, "checkasm: invalid cpu affinity (%s)\n", s);
713 return 1;
714 }
715 #ifdef _WIN32
716 int affinity_err;
717 HANDLE process = GetCurrentProcess();
718 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
719 BOOL (WINAPI *spdcs)(HANDLE, const ULONG*, ULONG) =
720 (void*)GetProcAddress(GetModuleHandleW(L"kernel32.dll"), "SetProcessDefaultCpuSets");
721 if (spdcs)
722 affinity_err = !spdcs(process, (ULONG[]){ affinity + 256 }, 1);
723 else
724 #endif
725 {
726 if (affinity < sizeof(DWORD_PTR) * 8)
727 affinity_err = !SetProcessAffinityMask(process, (DWORD_PTR)1 << affinity);
728 else
729 affinity_err = 1;
730 }
731 if (affinity_err) {
732 fprintf(stderr, "checkasm: invalid cpu affinity (%lu)\n", affinity);
733 return 1;
734 } else {
735 fprintf(stderr, "checkasm: running on cpu %lu\n", affinity);
736 }
737 #elif HAVE_PTHREAD_SETAFFINITY_NP && defined(CPU_SET)
738 cpu_set_t set;
739 CPU_ZERO(&set);
740 CPU_SET(affinity, &set);
741 if (pthread_setaffinity_np(pthread_self(), sizeof(set), &set)) {
742 fprintf(stderr, "checkasm: invalid cpu affinity (%lu)\n", affinity);
743 return 1;
744 } else {
745 fprintf(stderr, "checkasm: running on cpu %lu\n", affinity);
746 }
747 #else
748 (void)affinity;
749 fprintf(stderr,
750 "checkasm: --affinity is not supported on your system\n");
751 return 1;
752 #endif
753 } else {
754 unsigned long seed;
755 if (checkasm_strtoul(&seed, argv[1], 10)) {
756 fprintf(stderr, "checkasm: unknown option (%s)\n", argv[1]);
757 return 1;
758 }
759 state.seed = (unsigned)seed;
760 }
761
762 argc--;
763 argv++;
764 }
765
766 #if TRIM_DSP_FUNCTIONS
767 fprintf(stderr, "checkasm: reference functions unavailable, reconfigure using '-Dtrim_dsp=false'\n");
768 return 0;
769 #endif
770
771 dav1d_init_cpu();
772
773 #ifdef _WIN32
774 #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
775 AddVectoredExceptionHandler(0, signal_handler);
776
777 HANDLE con = GetStdHandle(state.run_mode >= RUN_CPUFLAG_LISTING ?
778 STD_OUTPUT_HANDLE : STD_ERROR_HANDLE);
779 DWORD con_mode = 0;
780 use_printf_color = con && con != INVALID_HANDLE_VALUE &&
781 GetConsoleMode(con, &con_mode) &&
782 SetConsoleMode(con, con_mode | ENABLE_VIRTUAL_TERMINAL_PROCESSING);
783 #endif
784 #else
785 sigaction(SIGBUS, &signal_handler_act, NULL);
786 sigaction(SIGFPE, &signal_handler_act, NULL);
787 sigaction(SIGILL, &signal_handler_act, NULL);
788 sigaction(SIGSEGV, &signal_handler_act, NULL);
789
790 if (isatty(state.run_mode >= RUN_CPUFLAG_LISTING ? 1 : 2)) {
791 const char *const term = getenv("TERM");
792 use_printf_color = term && strcmp(term, "dumb");
793 }
794 #endif
795
796 #ifdef readtime
797 if (state.run_mode == RUN_BENCHMARK) {
798 #if CONFIG_MACOS_KPERF
799 if (kperf_init())
800 return 1;
801 #endif
802 if (!checkasm_save_context()) {
803 checkasm_set_signal_handler_state(1);
804 readtime();
805 checkasm_set_signal_handler_state(0);
806 } else {
807 fprintf(stderr, "checkasm: unable to access cycle counter\n");
808 return 1;
809 }
810 }
811 #endif
812
813 int ret = 0;
814
815 if (state.run_mode != RUN_FUNCTION_LISTING) {
816 const unsigned cpu_flags = dav1d_get_cpu_flags();
817 if (state.run_mode == RUN_CPUFLAG_LISTING) {
818 const int last_i = (int)(sizeof(cpus) / sizeof(*cpus)) - 2;
819 for (int i = 0; i <= last_i ; i++) {
820 if (cpus[i].flag & cpu_flags)
821 color_fprintf(stdout, COLOR_GREEN, "%s", cpus[i].suffix);
822 else
823 color_fprintf(stdout, COLOR_RED, "~%s", cpus[i].suffix);
824 printf(i == last_i ? "\n" : ", ");
825 }
826 return 0;
827 }
828 #if ARCH_X86_64
829 void checkasm_warmup_avx2(void);
830 void checkasm_warmup_avx512(void);
831 if (cpu_flags & DAV1D_X86_CPU_FLAG_AVX512ICL)
832 state.simd_warmup = checkasm_warmup_avx512;
833 else if (cpu_flags & DAV1D_X86_CPU_FLAG_AVX2)
834 state.simd_warmup = checkasm_warmup_avx2;
835 checkasm_simd_warmup();
836 #endif
837 #if ARCH_ARM
838 void checkasm_checked_call_vfp(void *func, int dummy, ...);
839 void checkasm_checked_call_novfp(void *func, int dummy, ...);
840 if (cpu_flags & DAV1D_ARM_CPU_FLAG_NEON)
841 checkasm_checked_call_ptr = checkasm_checked_call_vfp;
842 else
843 checkasm_checked_call_ptr = checkasm_checked_call_novfp;
844 #endif
845 #if ARCH_X86
846 unsigned checkasm_init_x86(char *name);
847 char name[48];
848 const unsigned cpuid = checkasm_init_x86(name);
849 for (size_t len = strlen(name); len && name[len-1] == ' '; len--)
850 name[len-1] = '\0'; /* trim trailing whitespace */
851 fprintf(stderr, "checkasm: %s (%08X) using random seed %u\n", name, cpuid, state.seed);
852 #elif ARCH_RISCV
853 char buf[32] = "";
854 if (cpu_flags & DAV1D_RISCV_CPU_FLAG_V)
855 snprintf(buf, sizeof(buf), "VLEN=%i bits, ", dav1d_get_vlen());
856 fprintf(stderr, "checkasm: %susing random seed %u\n", buf, state.seed);
857 #elif ARCH_AARCH64 && HAVE_SVE
858 char buf[48] = "";
859 if (cpu_flags & DAV1D_ARM_CPU_FLAG_SVE)
860 snprintf(buf, sizeof(buf), "SVE %d bits, ", checkasm_sve_length());
861 fprintf(stderr, "checkasm: %susing random seed %u\n", buf, state.seed);
862 #else
863 fprintf(stderr, "checkasm: using random seed %u\n", state.seed);
864 #endif
865 }
866
867 check_cpu_flag(NULL, 0);
868 for (int i = 0; cpus[i].flag; i++)
869 check_cpu_flag(cpus[i].name, cpus[i].flag);
870
871 if (state.run_mode == RUN_FUNCTION_LISTING) {
872 print_functions(state.funcs);
873 } else if (state.num_failed) {
874 fprintf(stderr, "checkasm: %d of %d tests failed\n",
875 state.num_failed, state.num_checked);
876 ret = 1;
877 } else {
878 if (state.num_checked)
879 fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked);
880 else
881 fprintf(stderr, "checkasm: no tests to perform\n");
882 #ifdef readtime
883 if (state.run_mode == RUN_BENCHMARK && state.max_function_name_length) {
884 state.nop_time = measure_nop_time();
885 if (state.verbose)
886 printf("nop:%*.1f\n", state.max_function_name_length + 6, state.nop_time);
887 print_benchs(state.funcs);
888 }
889 #endif
890 }
891
892 destroy_func_tree(state.funcs);
893 return ret;
894 }
895
896 /* Decide whether or not the specified function needs to be tested and
897 * allocate/initialize data structures if needed. Returns a pointer to a
898 * reference function if the function should be tested, otherwise NULL */
checkasm_check_func(void * const func,const char * const name,...)899 void *checkasm_check_func(void *const func, const char *const name, ...) {
900 char name_buf[256];
901 va_list arg;
902
903 va_start(arg, name);
904 int name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg);
905 va_end(arg);
906
907 if (!func || name_length <= 0 || (size_t)name_length >= sizeof(name_buf) ||
908 (state.function_pattern && wildstrcmp(name_buf, state.function_pattern)))
909 {
910 return NULL;
911 }
912
913 state.current_func = get_func(&state.funcs, name_buf);
914
915 state.funcs->color = 1;
916 CheckasmFuncVersion *v = &state.current_func->versions;
917 void *ref = func;
918
919 if (v->func) {
920 CheckasmFuncVersion *prev;
921 do {
922 /* Only test functions that haven't already been tested */
923 if (v->func == func)
924 return NULL;
925
926 if (v->ok)
927 ref = v->func;
928
929 prev = v;
930 } while ((v = v->next));
931
932 v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion));
933 }
934
935 name_length += state.suffix_length;
936 if (name_length > state.max_function_name_length)
937 state.max_function_name_length = name_length;
938
939 v->func = func;
940 v->ok = 1;
941 v->cpu = state.cpu_flag;
942 state.current_func_ver = v;
943 if (state.run_mode == RUN_FUNCTION_LISTING) /* Save function names without running tests */
944 return NULL;
945
946 xor128_srand(state.seed);
947
948 if (state.cpu_flag)
949 state.num_checked++;
950
951 return ref;
952 }
953
954 /* Decide whether or not the current function needs to be benchmarked */
checkasm_bench_func(void)955 int checkasm_bench_func(void) {
956 return !state.num_failed && state.run_mode == RUN_BENCHMARK;
957 }
958
959 /* Indicate that the current test has failed, return whether verbose printing
960 * is requested. */
checkasm_fail_func(const char * const msg,...)961 int checkasm_fail_func(const char *const msg, ...) {
962 if (state.current_func_ver && state.current_func_ver->cpu &&
963 state.current_func_ver->ok)
964 {
965 va_list arg;
966
967 print_cpu_name();
968 fprintf(stderr, " %s_%s (", state.current_func->name,
969 cpu_suffix(state.current_func_ver->cpu));
970 va_start(arg, msg);
971 vfprintf(stderr, msg, arg);
972 va_end(arg);
973 fprintf(stderr, ")\n");
974
975 state.current_func_ver->ok = 0;
976 state.num_failed++;
977 }
978 return state.verbose;
979 }
980
981 /* Update benchmark results of the current function */
checkasm_update_bench(const int iterations,const uint64_t cycles)982 void checkasm_update_bench(const int iterations, const uint64_t cycles) {
983 state.current_func_ver->iterations += iterations;
984 state.current_func_ver->cycles += cycles;
985 }
986
987 /* Print the outcome of all tests performed since
988 * the last time this function was called */
checkasm_report(const char * const name,...)989 void checkasm_report(const char *const name, ...) {
990 static int prev_checked, prev_failed;
991 static size_t max_length;
992
993 if (state.num_checked > prev_checked) {
994 int pad_length = (int) max_length + 4;
995 va_list arg;
996
997 print_cpu_name();
998 pad_length -= fprintf(stderr, " - %s.", state.current_test_name);
999 va_start(arg, name);
1000 pad_length -= vfprintf(stderr, name, arg);
1001 va_end(arg);
1002 fprintf(stderr, "%*c", imax(pad_length, 0) + 2, '[');
1003
1004 if (state.num_failed == prev_failed)
1005 color_fprintf(stderr, COLOR_GREEN, "OK");
1006 else
1007 color_fprintf(stderr, COLOR_RED, "FAILED");
1008 fprintf(stderr, "]\n");
1009
1010 prev_checked = state.num_checked;
1011 prev_failed = state.num_failed;
1012 } else if (!state.cpu_flag) {
1013 /* Calculate the amount of padding required
1014 * to make the output vertically aligned */
1015 size_t length = strlen(state.current_test_name);
1016 va_list arg;
1017
1018 va_start(arg, name);
1019 length += vsnprintf(NULL, 0, name, arg);
1020 va_end(arg);
1021
1022 if (length > max_length)
1023 max_length = length;
1024 }
1025 }
1026
checkasm_set_signal_handler_state(const int enabled)1027 void checkasm_set_signal_handler_state(const int enabled) {
1028 state.sig = enabled ? SIG_ATOMIC_MAX : 0;
1029 }
1030
checkasm_handle_signal(void)1031 void checkasm_handle_signal(void) {
1032 const int s = state.sig;
1033 checkasm_fail_func(s == SIGFPE ? "fatal arithmetic error" :
1034 s == SIGILL ? "illegal instruction" :
1035 s == SIGBUS ? "bus error" :
1036 "segmentation fault");
1037 }
1038
check_err(const char * const file,const int line,const char * const name,const int w,const int h,int * const err)1039 static int check_err(const char *const file, const int line,
1040 const char *const name, const int w, const int h,
1041 int *const err)
1042 {
1043 if (*err)
1044 return 0;
1045 if (!checkasm_fail_func("%s:%d", file, line))
1046 return 1;
1047 *err = 1;
1048 fprintf(stderr, "%s (%dx%d):\n", name, w, h);
1049 return 0;
1050 }
1051
1052 #define DEF_CHECKASM_CHECK_FUNC(type, fmt) \
1053 int checkasm_check_##type(const char *const file, const int line, \
1054 const type *buf1, ptrdiff_t stride1, \
1055 const type *buf2, ptrdiff_t stride2, \
1056 const int w, int h, const char *const name, \
1057 const int align_w, const int align_h, \
1058 const int padding) \
1059 { \
1060 int aligned_w = (w + align_w - 1) & ~(align_w - 1); \
1061 int aligned_h = (h + align_h - 1) & ~(align_h - 1); \
1062 int err = 0; \
1063 stride1 /= sizeof(*buf1); \
1064 stride2 /= sizeof(*buf2); \
1065 int y = 0; \
1066 for (y = 0; y < h; y++) \
1067 if (memcmp(&buf1[y*stride1], &buf2[y*stride2], w*sizeof(*buf1))) \
1068 break; \
1069 if (y != h) { \
1070 if (check_err(file, line, name, w, h, &err)) \
1071 return 1; \
1072 for (y = 0; y < h; y++) { \
1073 for (int x = 0; x < w; x++) \
1074 fprintf(stderr, " " fmt, buf1[x]); \
1075 fprintf(stderr, " "); \
1076 for (int x = 0; x < w; x++) \
1077 fprintf(stderr, " " fmt, buf2[x]); \
1078 fprintf(stderr, " "); \
1079 for (int x = 0; x < w; x++) \
1080 fprintf(stderr, "%c", buf1[x] != buf2[x] ? 'x' : '.'); \
1081 buf1 += stride1; \
1082 buf2 += stride2; \
1083 fprintf(stderr, "\n"); \
1084 } \
1085 buf1 -= h*stride1; \
1086 buf2 -= h*stride2; \
1087 } \
1088 for (y = -padding; y < 0; y++) \
1089 if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
1090 (w + 2*padding)*sizeof(*buf1))) { \
1091 if (check_err(file, line, name, w, h, &err)) \
1092 return 1; \
1093 fprintf(stderr, " overwrite above\n"); \
1094 break; \
1095 } \
1096 for (y = aligned_h; y < aligned_h + padding; y++) \
1097 if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
1098 (w + 2*padding)*sizeof(*buf1))) { \
1099 if (check_err(file, line, name, w, h, &err)) \
1100 return 1; \
1101 fprintf(stderr, " overwrite below\n"); \
1102 break; \
1103 } \
1104 for (y = 0; y < h; y++) \
1105 if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
1106 padding*sizeof(*buf1))) { \
1107 if (check_err(file, line, name, w, h, &err)) \
1108 return 1; \
1109 fprintf(stderr, " overwrite left\n"); \
1110 break; \
1111 } \
1112 for (y = 0; y < h; y++) \
1113 if (memcmp(&buf1[y*stride1 + aligned_w], &buf2[y*stride2 + aligned_w], \
1114 padding*sizeof(*buf1))) { \
1115 if (check_err(file, line, name, w, h, &err)) \
1116 return 1; \
1117 fprintf(stderr, " overwrite right\n"); \
1118 break; \
1119 } \
1120 return err; \
1121 }
1122
1123 DEF_CHECKASM_CHECK_FUNC(int8_t, "%4d")
1124 DEF_CHECKASM_CHECK_FUNC(int16_t, "%6d")
1125 DEF_CHECKASM_CHECK_FUNC(int32_t, "%9d")
1126 DEF_CHECKASM_CHECK_FUNC(uint8_t, "%02x")
1127 DEF_CHECKASM_CHECK_FUNC(uint16_t, "%04x")
1128 DEF_CHECKASM_CHECK_FUNC(uint32_t, "%08x")
1129
1130 #if ARCH_X86_64
checkasm_simd_warmup(void)1131 void checkasm_simd_warmup(void)
1132 {
1133 if (state.simd_warmup)
1134 state.simd_warmup();
1135 }
1136 #endif
1137
1138 #if ARCH_ARM
1139 void (*checkasm_checked_call_ptr)(void *func, int dummy, ...);
1140 #endif
1141