xref: /aosp_15_r20/external/libdav1d/tests/checkasm/x86/checkasm.asm (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1; Copyright © 2018, VideoLAN and dav1d authors
2; Copyright © 2018, Two Orioles, LLC
3; All rights reserved.
4;
5; Redistribution and use in source and binary forms, with or without
6; modification, are permitted provided that the following conditions are met:
7;
8; 1. Redistributions of source code must retain the above copyright notice, this
9;    list of conditions and the following disclaimer.
10;
11; 2. Redistributions in binary form must reproduce the above copyright notice,
12;    this list of conditions and the following disclaimer in the documentation
13;    and/or other materials provided with the distribution.
14;
15; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
26%include "config.asm"
27%undef private_prefix
28%define private_prefix checkasm
29%include "ext/x86/x86inc.asm"
30
31SECTION_RODATA 16
32
33%if ARCH_X86_64
34; just random numbers to reduce the chance of incidental match
35%if WIN64
36x6:  dq 0x1a1b2550a612b48c,0x79445c159ce79064
37x7:  dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636
38x8:  dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e
39x9:  dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f
40x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9
41x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d
42x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b
43x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786
44x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef
45x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5
46n7:  dq 0x21f86d66c8ca00ce
47n8:  dq 0x75b6ba21077c48ad
48%endif
49n9:  dq 0xed56bb2dcb3c7736
50n10: dq 0x8bda43d3fd1a7e06
51n11: dq 0xb64a9c9e5d318408
52n12: dq 0xdf9a54b303f1d3a3
53n13: dq 0x4a75479abd64e097
54n14: dq 0x249214109d5d1c88
55%endif
56
57errmsg_stack: db "stack corruption", 0
58errmsg_register: db "failed to preserve register:%s", 0
59errmsg_vzeroupper: db "missing vzeroupper", 0
60
61SECTION .bss
62
63check_vzeroupper: resd 1
64
65SECTION .text
66
67cextern fail_func
68
69; max number of args used by any asm function.
70; (max_args % 4) must equal 3 for stack alignment
71%define max_args 15
72
73%if UNIX64
74    DECLARE_REG_TMP 0
75%else
76    DECLARE_REG_TMP 4
77%endif
78
79;-----------------------------------------------------------------------------
80; unsigned checkasm_init_x86(char *name)
81;-----------------------------------------------------------------------------
82cglobal init_x86, 0, 5
83%if ARCH_X86_64
84    push          rbx
85%endif
86    movifnidn      t0, r0mp
87    mov           eax, 0x80000000
88    cpuid
89    cmp           eax, 0x80000004
90    jb .no_brand ; processor brand string not supported
91    mov           eax, 0x80000002
92    cpuid
93    mov     [t0+4* 0], eax
94    mov     [t0+4* 1], ebx
95    mov     [t0+4* 2], ecx
96    mov     [t0+4* 3], edx
97    mov           eax, 0x80000003
98    cpuid
99    mov     [t0+4* 4], eax
100    mov     [t0+4* 5], ebx
101    mov     [t0+4* 6], ecx
102    mov     [t0+4* 7], edx
103    mov           eax, 0x80000004
104    cpuid
105    mov     [t0+4* 8], eax
106    mov     [t0+4* 9], ebx
107    mov     [t0+4*10], ecx
108    mov     [t0+4*11], edx
109    xor           eax, eax
110    cpuid
111    jmp .check_xcr1
112.no_brand: ; use manufacturer id as a fallback
113    xor           eax, eax
114    mov      [t0+4*3], eax
115    cpuid
116    mov      [t0+4*0], ebx
117    mov      [t0+4*1], edx
118    mov      [t0+4*2], ecx
119.check_xcr1:
120    test          eax, eax
121    jz .end2 ; cpuid leaf 1 not supported
122    mov           t0d, eax ; max leaf
123    mov           eax, 1
124    cpuid
125    and           ecx, 0x18000000
126    cmp           ecx, 0x18000000
127    jne .end2 ; osxsave/avx not supported
128    cmp           t0d, 13 ; cpuid leaf 13 not supported
129    jb .end2
130    mov           t0d, eax ; cpuid signature
131    mov           eax, 13
132    mov           ecx, 1
133    cpuid
134    test           al, 0x04
135    jz .end ; xcr1 not supported
136    mov           ecx, 1
137    xgetbv
138    test           al, 0x04
139    jnz .end ; always-dirty ymm state
140%if ARCH_X86_64 == 0 && PIC
141    LEA           eax, check_vzeroupper
142    mov         [eax], ecx
143%else
144    mov [check_vzeroupper], ecx
145%endif
146.end:
147    mov           eax, t0d
148.end2:
149%if ARCH_X86_64
150    pop           rbx
151%endif
152    RET
153
154%if ARCH_X86_64
155%if WIN64
156    %define stack_param rsp+32 ; shadow space
157    %define num_fn_args rsp+stack_offset+17*8
158    %assign num_reg_args 4
159    %assign free_regs 7
160    %assign clobber_mask_stack_bit 16
161    DECLARE_REG_TMP 4
162%else
163    %define stack_param rsp
164    %define num_fn_args rsp+stack_offset+11*8
165    %assign num_reg_args 6
166    %assign free_regs 9
167    %assign clobber_mask_stack_bit 64
168    DECLARE_REG_TMP 7
169%endif
170
171%macro CLOBBER_UPPER 2 ; reg, mask_bit
172    mov          r13d, %1d
173    or            r13, r8
174    test          r9b, %2
175    cmovnz         %1, r13
176%endmacro
177
178cglobal checked_call, 2, 15, 16, max_args*8+64+8
179    mov          r10d, [num_fn_args]
180    mov            r8, 0xdeadbeef00000000
181    mov           r9d, [num_fn_args+r10*8+8] ; clobber_mask
182    mov            t0, [num_fn_args+r10*8]   ; func
183
184    ; Clobber the upper halves of 32-bit parameters
185    CLOBBER_UPPER  r0, 1
186    CLOBBER_UPPER  r1, 2
187    CLOBBER_UPPER  r2, 4
188    CLOBBER_UPPER  r3, 8
189%if UNIX64
190    CLOBBER_UPPER  r4, 16
191    CLOBBER_UPPER  r5, 32
192%else ; WIN64
193%assign i 6
194%rep 16-6
195    mova       m %+ i, [x %+ i]
196    %assign i i+1
197%endrep
198%endif
199
200    xor          r11d, r11d
201    sub          r10d, num_reg_args
202    cmovs        r10d, r11d ; num stack args
203
204    ; write stack canaries to the area above parameters passed on the stack
205    mov           r12, [rsp+stack_offset] ; return address
206    not           r12
207%assign i 0
208%rep 8 ; 64 bytes
209    mov [stack_param+(r10+i)*8], r12
210    %assign i i+1
211%endrep
212
213    test         r10d, r10d
214    jz .stack_setup_done ; no stack parameters
215.copy_stack_parameter:
216    mov           r12, [stack_param+stack_offset+8+r11*8]
217    CLOBBER_UPPER r12, clobber_mask_stack_bit
218    shr           r9d, 1
219    mov [stack_param+r11*8], r12
220    inc          r11d
221    cmp          r11d, r10d
222    jl .copy_stack_parameter
223.stack_setup_done:
224
225%assign i 14
226%rep 15-free_regs
227    mov        r %+ i, [n %+ i]
228    %assign i i-1
229%endrep
230    call           t0
231
232    ; check for stack corruption
233    mov           r0d, [num_fn_args]
234    xor           r3d, r3d
235    sub           r0d, num_reg_args
236    cmovs         r0d, r3d ; num stack args
237
238    mov            r3, [rsp+stack_offset]
239    mov            r4, [stack_param+r0*8]
240    not            r3
241    xor            r4, r3
242%assign i 1
243%rep 6
244    mov            r5, [stack_param+(r0+i)*8]
245    xor            r5, r3
246    or             r4, r5
247    %assign i i+1
248%endrep
249    xor            r3, [stack_param+(r0+7)*8]
250    or             r4, r3
251    jz .stack_ok
252    ; Save the return value located in rdx:rax first to prevent clobbering.
253    mov           r10, rax
254    mov           r11, rdx
255    lea            r0, [errmsg_stack]
256    jmp .fail
257.stack_ok:
258
259    ; check for failure to preserve registers
260%assign i 14
261%rep 15-free_regs
262    cmp        r %+ i, [n %+ i]
263    setne         r4b
264    lea           r3d, [r4+r3*2]
265    %assign i i-1
266%endrep
267%if WIN64
268    lea            r0, [rsp+32] ; account for shadow space
269    mov            r5, r0
270    test          r3d, r3d
271    jz .gpr_ok
272%else
273    test          r3d, r3d
274    jz .gpr_xmm_ok
275    mov            r0, rsp
276%endif
277%assign i free_regs
278%rep 15-free_regs
279%if i < 10
280    mov    dword [r0], " r0" + (i << 16)
281    lea            r4, [r0+3]
282%else
283    mov    dword [r0], " r10" + ((i - 10) << 24)
284    lea            r4, [r0+4]
285%endif
286    test          r3b, 1 << (i - free_regs)
287    cmovnz         r0, r4
288    %assign i i+1
289%endrep
290%if WIN64 ; xmm registers
291.gpr_ok:
292%assign i 6
293%rep 16-6
294    pxor       m %+ i, [x %+ i]
295    %assign i i+1
296%endrep
297    packsswb       m6, m7
298    packsswb       m8, m9
299    packsswb      m10, m11
300    packsswb      m12, m13
301    packsswb      m14, m15
302    packsswb       m6, m6
303    packsswb       m8, m10
304    packsswb      m12, m14
305    packsswb       m6, m6
306    packsswb       m8, m12
307    packsswb       m6, m8
308    pxor           m7, m7
309    pcmpeqb        m6, m7
310    pmovmskb      r3d, m6
311    cmp           r3d, 0xffff
312    je .xmm_ok
313    mov           r7d, " xmm"
314%assign i 6
315%rep 16-6
316    mov        [r0+0], r7d
317%if i < 10
318    mov   byte [r0+4], "0" + i
319    lea            r4, [r0+5]
320%else
321    mov   word [r0+4], "10" + ((i - 10) << 8)
322    lea            r4, [r0+6]
323%endif
324    test          r3d, 1 << i
325    cmovz          r0, r4
326    %assign i i+1
327%endrep
328.xmm_ok:
329    cmp            r0, r5
330    je .gpr_xmm_ok
331    mov     byte [r0], 0
332    mov           r11, rdx
333    mov            r1, r5
334%else
335    mov     byte [r0], 0
336    mov           r11, rdx
337    mov            r1, rsp
338%endif
339    mov           r10, rax
340    lea            r0, [errmsg_register]
341    jmp .fail
342.gpr_xmm_ok:
343    ; Check for dirty YMM state, i.e. missing vzeroupper
344    mov           ecx, [check_vzeroupper]
345    test          ecx, ecx
346    jz .ok ; not supported, skip
347    mov           r10, rax
348    mov           r11, rdx
349    xgetbv
350    test           al, 0x04
351    jz .restore_retval ; clean ymm state
352    lea            r0, [errmsg_vzeroupper]
353    vzeroupper
354.fail:
355    ; Call fail_func() with a descriptive message to mark it as a failure.
356    xor           eax, eax
357    call fail_func
358.restore_retval:
359    mov           rax, r10
360    mov           rdx, r11
361.ok:
362    RET
363
364; trigger a warmup of vector units
365%macro WARMUP 0
366cglobal warmup, 0, 0
367    xorps          m0, m0
368    mulps          m0, m0
369    RET
370%endmacro
371
372INIT_YMM avx2
373WARMUP
374INIT_ZMM avx512
375WARMUP
376
377%else
378
379; just random numbers to reduce the chance of incidental match
380%assign n3 0x6549315c
381%assign n4 0xe02f3e23
382%assign n5 0xb78d0d1d
383%assign n6 0x33627ba7
384
385;-----------------------------------------------------------------------------
386; void checkasm_checked_call(void *func, ...)
387;-----------------------------------------------------------------------------
388cglobal checked_call, 1, 7
389    mov            r3, [esp+stack_offset]      ; return address
390    mov            r1, [esp+stack_offset+17*4] ; num_stack_params
391    mov            r2, 27
392    not            r3
393    sub            r2, r1
394.push_canary:
395    push           r3
396    dec            r2
397    jg .push_canary
398.push_parameter:
399    push dword [esp+32*4]
400    dec            r1
401    jg .push_parameter
402    mov            r3, n3
403    mov            r4, n4
404    mov            r5, n5
405    mov            r6, n6
406    call           r0
407
408    ; check for failure to preserve registers
409    cmp            r3, n3
410    setne         r3h
411    cmp            r4, n4
412    setne         r3b
413    shl           r3d, 16
414    cmp            r5, n5
415    setne         r3h
416    cmp            r6, n6
417    setne         r3b
418    test           r3, r3
419    jz .gpr_ok
420    lea            r1, [esp+16]
421    mov       [esp+4], r1
422%assign i 3
423%rep 4
424    mov    dword [r1], " r0" + (i << 16)
425    lea            r4, [r1+3]
426    test           r3, 1 << ((6 - i) * 8)
427    cmovnz         r1, r4
428    %assign i i+1
429%endrep
430    mov     byte [r1], 0
431    mov            r5, eax
432    mov            r6, edx
433    LEA            r1, errmsg_register
434    jmp .fail
435.gpr_ok:
436    ; check for stack corruption
437    mov            r3, [esp+48*4] ; num_stack_params
438    mov            r6, [esp+31*4] ; return address
439    mov            r4, [esp+r3*4]
440    sub            r3, 26
441    not            r6
442    xor            r4, r6
443.check_canary:
444    mov            r5, [esp+(r3+27)*4]
445    xor            r5, r6
446    or             r4, r5
447    inc            r3
448    jl .check_canary
449    mov            r5, eax
450    mov            r6, edx
451    test           r4, r4
452    jz .stack_ok
453    LEA            r1, errmsg_stack
454    jmp .fail
455.stack_ok:
456    ; check for dirty YMM state, i.e. missing vzeroupper
457    LEA           ecx, check_vzeroupper
458    mov           ecx, [ecx]
459    test          ecx, ecx
460    jz .ok ; not supported, skip
461    xgetbv
462    test           al, 0x04
463    jz .ok ; clean ymm state
464    LEA            r1, errmsg_vzeroupper
465    vzeroupper
466.fail:
467    mov         [esp], r1
468    call fail_func
469.ok:
470    add           esp, 27*4
471    mov           eax, r5
472    mov           edx, r6
473    RET
474
475%endif ; ARCH_X86_64
476