xref: /aosp_15_r20/external/libdav1d/tests/checkasm/checkasm.h (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1 /*
2  * Copyright © 2018, VideoLAN and dav1d authors
3  * Copyright © 2018, Two Orioles, LLC
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #ifndef DAV1D_TESTS_CHECKASM_CHECKASM_H
29 #define DAV1D_TESTS_CHECKASM_CHECKASM_H
30 
31 #include "config.h"
32 
33 #include <stdint.h>
34 #include <stdlib.h>
35 
36 #ifdef _WIN32
37 #include <windows.h>
38 #if ARCH_X86_32
39 #include <setjmp.h>
40 typedef jmp_buf checkasm_context;
41 #define checkasm_save_context() setjmp(checkasm_context_buf)
42 #define checkasm_load_context() longjmp(checkasm_context_buf, 1)
43 #elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
44 /* setjmp/longjmp on Windows on architectures using SEH (all except x86_32)
45  * will try to use SEH to unwind the stack, which doesn't work for assembly
46  * functions without unwind information. */
47 typedef struct { CONTEXT c; int status; } checkasm_context;
48 #define checkasm_save_context() \
49     (checkasm_context_buf.status = 0, \
50      RtlCaptureContext(&checkasm_context_buf.c), \
51      checkasm_context_buf.status)
52 #define checkasm_load_context() \
53     (checkasm_context_buf.status = 1, \
54      RtlRestoreContext(&checkasm_context_buf.c, NULL))
55 #else
56 typedef void* checkasm_context;
57 #define checkasm_save_context() 0
58 #define checkasm_load_context() do {} while (0)
59 #endif
60 #else
61 #include <setjmp.h>
62 typedef sigjmp_buf checkasm_context;
63 #define checkasm_save_context() sigsetjmp(checkasm_context_buf, 1)
64 #define checkasm_load_context() siglongjmp(checkasm_context_buf, 1)
65 #endif
66 
67 #include "include/common/attributes.h"
68 #include "include/common/bitdepth.h"
69 #include "include/common/intops.h"
70 
71 #if ARCH_ARM
72 #include "src/arm/arm-arch.h"
73 #endif
74 
75 int xor128_rand(void);
76 #define rnd xor128_rand
77 
78 #define decl_check_bitfns(name) \
79 name##_8bpc(void); \
80 name##_16bpc(void)
81 
82 void checkasm_check_msac(void);
83 void checkasm_check_pal(void);
84 void checkasm_check_refmvs(void);
85 decl_check_bitfns(void checkasm_check_cdef);
86 decl_check_bitfns(void checkasm_check_filmgrain);
87 decl_check_bitfns(void checkasm_check_ipred);
88 decl_check_bitfns(void checkasm_check_itx);
89 decl_check_bitfns(void checkasm_check_loopfilter);
90 decl_check_bitfns(void checkasm_check_looprestoration);
91 decl_check_bitfns(void checkasm_check_mc);
92 
93 void *checkasm_check_func(void *func, const char *name, ...);
94 int checkasm_bench_func(void);
95 int checkasm_fail_func(const char *msg, ...);
96 void checkasm_update_bench(int iterations, uint64_t cycles);
97 void checkasm_report(const char *name, ...);
98 void checkasm_set_signal_handler_state(int enabled);
99 void checkasm_handle_signal(void);
100 extern checkasm_context checkasm_context_buf;
101 
102 /* float compare utilities */
103 int float_near_ulp(float a, float b, unsigned max_ulp);
104 int float_near_abs_eps(float a, float b, float eps);
105 int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp);
106 int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp,
107                          int len);
108 int float_near_abs_eps_array(const float *a, const float *b, float eps,
109                              int len);
110 int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps,
111                                  unsigned max_ulp, int len);
112 
113 #define BENCH_RUNS (1 << 12) /* Trade-off between accuracy and speed */
114 
115 /* Decide whether or not the specified function needs to be tested */
116 #define check_func(func, ...)\
117     (func_ref = checkasm_check_func((func_new = func), __VA_ARGS__))
118 
119 /* Declare the function prototype. The first argument is the return value,
120  * the remaining arguments are the function parameters. Naming parameters
121  * is optional. */
122 #define declare_func(ret, ...)\
123     declare_new(ret, __VA_ARGS__)\
124     void *func_ref, *func_new;\
125     typedef ret func_type(__VA_ARGS__);\
126     if (checkasm_save_context()) checkasm_handle_signal()
127 
128 /* Indicate that the current test has failed */
129 #define fail() checkasm_fail_func("%s:%d", __FILE__, __LINE__)
130 
131 /* Print the test outcome */
132 #define report checkasm_report
133 
134 /* Call the reference function */
135 #define call_ref(...)\
136     (checkasm_set_signal_handler_state(1),\
137      ((func_type *)func_ref)(__VA_ARGS__));\
138     checkasm_set_signal_handler_state(0)
139 
140 #if HAVE_ASM
141 #if ARCH_X86
142 #if defined(_MSC_VER) && !defined(__clang__)
143 #include <intrin.h>
144 #define readtime() (_mm_lfence(), __rdtsc())
145 #else
readtime(void)146 static inline uint64_t readtime(void) {
147     uint32_t eax, edx;
148     __asm__ __volatile__("lfence\nrdtsc" : "=a"(eax), "=d"(edx));
149     return (((uint64_t)edx) << 32) | eax;
150 }
151 #define readtime readtime
152 #endif
153 #elif CONFIG_MACOS_KPERF
154 uint64_t checkasm_kperf_cycles(void);
155 #define readtime() checkasm_kperf_cycles()
156 #elif (ARCH_AARCH64 || ARCH_ARM) && defined(__APPLE__)
157 #include <mach/mach_time.h>
158 #define readtime() mach_absolute_time()
159 #elif ARCH_AARCH64
160 #ifdef _MSC_VER
161 #include <windows.h>
162 #define readtime() (_InstructionSynchronizationBarrier(), ReadTimeStampCounter())
163 #else
readtime(void)164 static inline uint64_t readtime(void) {
165     uint64_t cycle_counter;
166     /* This requires enabling user mode access to the cycle counter (which
167      * can only be done from kernel space).
168      * This could also read cntvct_el0 instead of pmccntr_el0; that register
169      * might also be readable (depending on kernel version), but it has much
170      * worse precision (it's a fixed 50 MHz timer). */
171     __asm__ __volatile__("isb\nmrs %0, pmccntr_el0"
172                          : "=r"(cycle_counter)
173                          :: "memory");
174     return cycle_counter;
175 }
176 #define readtime readtime
177 #endif
178 #elif ARCH_ARM && !defined(_MSC_VER) && __ARM_ARCH >= 7
readtime(void)179 static inline uint64_t readtime(void) {
180     uint32_t cycle_counter;
181     /* This requires enabling user mode access to the cycle counter (which
182      * can only be done from kernel space). */
183     __asm__ __volatile__("isb\nmrc p15, 0, %0, c9, c13, 0"
184                          : "=r"(cycle_counter)
185                          :: "memory");
186     return cycle_counter;
187 }
188 #define readtime readtime
189 #elif ARCH_PPC64LE
readtime(void)190 static inline uint64_t readtime(void) {
191     uint32_t tbu, tbl, temp;
192 
193     __asm__ __volatile__(
194         "1:\n"
195         "mfspr %2,269\n"
196         "mfspr %0,268\n"
197         "mfspr %1,269\n"
198         "cmpw   %2,%1\n"
199         "bne    1b\n"
200     : "=r"(tbl), "=r"(tbu), "=r"(temp)
201     :
202     : "cc");
203 
204     return (((uint64_t)tbu) << 32) | (uint64_t)tbl;
205 }
206 #define readtime readtime
207 #elif ARCH_RISCV
208 #include <time.h>
clock_gettime_nsec(void)209 static inline uint64_t clock_gettime_nsec(void) {
210   struct timespec ts;
211   clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
212   return ((uint64_t)ts.tv_sec*1000000000u) + (uint64_t)ts.tv_nsec;
213 }
214 #define readtime clock_gettime_nsec
215 #elif ARCH_LOONGARCH
readtime(void)216 static inline uint64_t readtime(void) {
217 #if ARCH_LOONGARCH64
218     uint64_t a, id;
219     __asm__ __volatile__("rdtime.d  %0, %1"
220                          : "=r"(a), "=r"(id)
221                          :: );
222     return a;
223 #else
224     uint32_t a, id;
225     __asm__ __volatile__("rdtimel.w  %0, %1"
226                          : "=r"(a), "=r"(id)
227                          :: );
228     return (uint64_t)a;
229 #endif
230 }
231 #define readtime readtime
232 #endif
233 
234 /* Verifies that clobbered callee-saved registers
235  * are properly saved and restored */
236 void checkasm_checked_call(void *func, ...);
237 
238 #if ARCH_X86_64
239 /* YMM and ZMM registers on x86 are turned off to save power when they haven't
240  * been used for some period of time. When they are used there will be a
241  * "warmup" period during which performance will be reduced and inconsistent
242  * which is problematic when trying to benchmark individual functions. We can
243  * work around this by periodically issuing "dummy" instructions that uses
244  * those registers to keep them powered on. */
245 void checkasm_simd_warmup(void);
246 
247 /* The upper 32 bits of 32-bit data types are undefined when passed as function
248  * parameters. In practice those bits usually end up being zero which may hide
249  * certain bugs, such as using a register containing undefined bits as a pointer
250  * offset, so we want to intentionally clobber those bits with junk to expose
251  * any issues. The following set of macros automatically calculates a bitmask
252  * specifying which parameters should have their upper halves clobbered. */
253 #ifdef _WIN32
254 /* Integer and floating-point parameters share "register slots". */
255 #define IGNORED_FP_ARGS 0
256 #else
257 /* Up to 8 floating-point parameters are passed in XMM registers, which are
258  * handled orthogonally from integer parameters passed in GPR registers. */
259 #define IGNORED_FP_ARGS 8
260 #endif
261 #if HAVE_C11_GENERIC
262 #define clobber_type(arg) _Generic((void (*)(void*, arg))NULL,\
263      void (*)(void*, int32_t ): clobber_mask |= 1 << mpos++,\
264      void (*)(void*, uint32_t): clobber_mask |= 1 << mpos++,\
265      void (*)(void*, float   ): mpos += (fp_args++ >= IGNORED_FP_ARGS),\
266      void (*)(void*, double  ): mpos += (fp_args++ >= IGNORED_FP_ARGS),\
267      default:                   mpos++)
268 #define init_clobber_mask(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, ...)\
269     unsigned clobber_mask = 0;\
270     {\
271         int mpos = 0, fp_args = 0;\
272         clobber_type(a); clobber_type(b); clobber_type(c); clobber_type(d);\
273         clobber_type(e); clobber_type(f); clobber_type(g); clobber_type(h);\
274         clobber_type(i); clobber_type(j); clobber_type(k); clobber_type(l);\
275         clobber_type(m); clobber_type(n); clobber_type(o); clobber_type(p);\
276     }
277 #else
278 /* Skip parameter clobbering on compilers without support for _Generic() */
279 #define init_clobber_mask(...) unsigned clobber_mask = 0
280 #endif
281 #define declare_new(ret, ...)\
282     ret (*checked_call)(__VA_ARGS__, int, int, int, int, int, int, int,\
283                         int, int, int, int, int, int, int, int, int,\
284                         void*, unsigned) =\
285         (void*)checkasm_checked_call;\
286     init_clobber_mask(__VA_ARGS__, void*, void*, void*, void*,\
287                       void*, void*, void*, void*, void*, void*,\
288                       void*, void*, void*, void*, void*);
289 #define call_new(...)\
290     (checkasm_set_signal_handler_state(1),\
291      checkasm_simd_warmup(),\
292      checked_call(__VA_ARGS__, 16, 15, 14, 13, 12, 11, 10, 9, 8,\
293                   7, 6, 5, 4, 3, 2, 1, func_new, clobber_mask));\
294     checkasm_set_signal_handler_state(0)
295 #elif ARCH_X86_32
296 #define declare_new(ret, ...)\
297     ret (*checked_call)(void *, __VA_ARGS__, int, int, int, int, int, int,\
298                         int, int, int, int, int, int, int, int, int) =\
299         (void *)checkasm_checked_call;
300 #define call_new(...)\
301     (checkasm_set_signal_handler_state(1),\
302      checked_call(func_new, __VA_ARGS__, 15, 14, 13, 12,\
303                   11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1));\
304     checkasm_set_signal_handler_state(0)
305 #elif ARCH_ARM
306 /* Use a dummy argument, to offset the real parameters by 2, not only 1.
307  * This makes sure that potential 8-byte-alignment of parameters is kept
308  * the same even when the extra parameters have been removed. */
309 extern void (*checkasm_checked_call_ptr)(void *func, int dummy, ...);
310 #define declare_new(ret, ...)\
311     ret (*checked_call)(void *, int dummy, __VA_ARGS__,\
312                         int, int, int, int, int, int, int, int,\
313                         int, int, int, int, int, int, int) =\
314     (void *)checkasm_checked_call_ptr;
315 #define call_new(...)\
316     (checkasm_set_signal_handler_state(1),\
317      checked_call(func_new, 0, __VA_ARGS__, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0));\
318     checkasm_set_signal_handler_state(0)
319 #elif ARCH_AARCH64 && !defined(__APPLE__)
320 void checkasm_stack_clobber(uint64_t clobber, ...);
321 #define declare_new(ret, ...)\
322     ret (*checked_call)(void *, int, int, int, int, int, int, int,\
323                         __VA_ARGS__, int, int, int, int, int, int, int, int,\
324                         int, int, int, int, int, int, int) =\
325     (void *)checkasm_checked_call;
326 #define CLOB (UINT64_C(0xdeadbeefdeadbeef))
327 #define call_new(...)\
328     (checkasm_set_signal_handler_state(1),\
329      checkasm_stack_clobber(CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
330                             CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
331                             CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
332                             CLOB, CLOB, CLOB, CLOB, CLOB),\
333      checked_call(func_new, 0, 0, 0, 0, 0, 0, 0, __VA_ARGS__,\
334                   7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0));\
335     checkasm_set_signal_handler_state(0)
336 #elif ARCH_RISCV
337 #define declare_new(ret, ...)\
338     ret (*checked_call)(void *, int, int, int, int, int, int, int,\
339                         __VA_ARGS__, int, int, int, int, int, int, int, int,\
340                         int, int, int, int, int, int, int) =\
341     (void *)checkasm_checked_call;
342 #define call_new(...)\
343     (checkasm_set_signal_handler_state(1),\
344      checked_call(func_new, 0, 0, 0, 0, 0, 0, 0, __VA_ARGS__,\
345                   7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0));\
346     checkasm_set_signal_handler_state(0)
347 #elif ARCH_LOONGARCH
348 #define declare_new(ret, ...)\
349     ret (*checked_call)(void *, int, int, int, int, int, int, int,\
350                         __VA_ARGS__, int, int, int, int, int, int, int, int,\
351                         int, int, int, int, int, int, int) =\
352     (void *)checkasm_checked_call;
353 #define call_new(...)\
354     (checkasm_set_signal_handler_state(1),\
355      checked_call(func_new, 0, 0, 0, 0, 0, 0, 0, __VA_ARGS__,\
356                   7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0));\
357     checkasm_set_signal_handler_state(0)
358 #else
359 #define declare_new(ret, ...)
360 #define call_new(...)\
361     (checkasm_set_signal_handler_state(1),\
362      ((func_type *)func_new)(__VA_ARGS__));\
363     checkasm_set_signal_handler_state(0)
364 #endif
365 #else /* HAVE_ASM */
366 #define declare_new(ret, ...)
367 /* Call the function */
368 #define call_new(...)\
369     (checkasm_set_signal_handler_state(1),\
370      ((func_type *)func_new)(__VA_ARGS__));\
371     checkasm_set_signal_handler_state(0)
372 #endif /* HAVE_ASM */
373 
374 /* Benchmark the function */
375 #ifdef readtime
376 #define bench_new(...)\
377     do {\
378         if (checkasm_bench_func()) {\
379             func_type *const tfunc = func_new;\
380             checkasm_set_signal_handler_state(1);\
381             uint64_t tsum = 0;\
382             int tcount = 0;\
383             for (int ti = 0; ti < BENCH_RUNS; ti++) {\
384                 uint64_t t = readtime();\
385                 int talt = 0; (void)talt;\
386                 tfunc(__VA_ARGS__);\
387                 talt = 1;\
388                 tfunc(__VA_ARGS__);\
389                 talt = 0;\
390                 tfunc(__VA_ARGS__);\
391                 talt = 1;\
392                 tfunc(__VA_ARGS__);\
393                 t = readtime() - t;\
394                 if (t*tcount <= tsum*4 && ti > 0) {\
395                     tsum += t;\
396                     tcount++;\
397                 }\
398             }\
399             checkasm_set_signal_handler_state(0);\
400             checkasm_update_bench(tcount, tsum);\
401         } else {\
402             const int talt = 0; (void)talt;\
403             call_new(__VA_ARGS__);\
404         }\
405     } while (0)
406 #else
407 #define bench_new(...) do {} while (0)
408 #endif
409 
410 /* Alternates between two pointers. Intended to be used within bench_new()
411  * calls for functions which modifies their input buffer(s) to ensure that
412  * throughput, and not latency, is measured. */
413 #define alternate(a, b) (talt ? (b) : (a))
414 
415 #define ROUND_UP(x,a) (((x)+((a)-1)) & ~((a)-1))
416 #define PIXEL_RECT(name, w, h) \
417     ALIGN_STK_64(pixel, name##_buf, ((h)+32)*(ROUND_UP(w,64)+64) + 64,); \
418     ptrdiff_t name##_stride = sizeof(pixel)*(ROUND_UP(w,64)+64); \
419     (void)name##_stride; \
420     pixel *name = name##_buf + (ROUND_UP(w,64)+64)*16 + 64
421 
422 #define CLEAR_PIXEL_RECT(name) \
423     memset(name##_buf, 0x99, sizeof(name##_buf)) \
424 
425 #define DECL_CHECKASM_CHECK_FUNC(type) \
426 int checkasm_check_##type(const char *const file, const int line, \
427                           const type *const buf1, const ptrdiff_t stride1, \
428                           const type *const buf2, const ptrdiff_t stride2, \
429                           const int w, const int h, const char *const name, \
430                           const int align_w, const int align_h, \
431                           const int padding)
432 
433 DECL_CHECKASM_CHECK_FUNC(int8_t);
434 DECL_CHECKASM_CHECK_FUNC(int16_t);
435 DECL_CHECKASM_CHECK_FUNC(int32_t);
436 DECL_CHECKASM_CHECK_FUNC(uint8_t);
437 DECL_CHECKASM_CHECK_FUNC(uint16_t);
438 DECL_CHECKASM_CHECK_FUNC(uint32_t);
439 
440 #define CONCAT(a,b) a ## b
441 
442 #define checkasm_check2(prefix, ...) CONCAT(checkasm_check_, prefix)(__FILE__, __LINE__, __VA_ARGS__)
443 #define checkasm_check(prefix, ...) checkasm_check2(prefix, __VA_ARGS__, 0, 0, 0)
444 
445 #ifdef BITDEPTH
446 #define checkasm_check_pixel(...) checkasm_check(PIXEL_TYPE, __VA_ARGS__)
447 #define checkasm_check_pixel_padded(...) checkasm_check2(PIXEL_TYPE, __VA_ARGS__, 1, 1, 8)
448 #define checkasm_check_pixel_padded_align(...) checkasm_check2(PIXEL_TYPE, __VA_ARGS__, 8)
449 #define checkasm_check_coef(...)  checkasm_check(COEF_TYPE,  __VA_ARGS__)
450 #endif
451 
452 #endif /* DAV1D_TESTS_CHECKASM_CHECKASM_H */
453