1; Copyright © 2018, VideoLAN and dav1d authors 2; Copyright © 2018, Two Orioles, LLC 3; All rights reserved. 4; 5; Redistribution and use in source and binary forms, with or without 6; modification, are permitted provided that the following conditions are met: 7; 8; 1. Redistributions of source code must retain the above copyright notice, this 9; list of conditions and the following disclaimer. 10; 11; 2. Redistributions in binary form must reproduce the above copyright notice, 12; this list of conditions and the following disclaimer in the documentation 13; and/or other materials provided with the distribution. 14; 15; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 19; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 26%include "config.asm" 27%undef private_prefix 28%define private_prefix checkasm 29%include "ext/x86/x86inc.asm" 30 31SECTION_RODATA 16 32 33%if ARCH_X86_64 34; just random numbers to reduce the chance of incidental match 35%if WIN64 36x6: dq 0x1a1b2550a612b48c,0x79445c159ce79064 37x7: dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636 38x8: dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e 39x9: dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f 40x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9 41x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d 42x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b 43x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786 44x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef 45x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5 46n7: dq 0x21f86d66c8ca00ce 47n8: dq 0x75b6ba21077c48ad 48%endif 49n9: dq 0xed56bb2dcb3c7736 50n10: dq 0x8bda43d3fd1a7e06 51n11: dq 0xb64a9c9e5d318408 52n12: dq 0xdf9a54b303f1d3a3 53n13: dq 0x4a75479abd64e097 54n14: dq 0x249214109d5d1c88 55%endif 56 57errmsg_stack: db "stack corruption", 0 58errmsg_register: db "failed to preserve register:%s", 0 59errmsg_vzeroupper: db "missing vzeroupper", 0 60 61SECTION .bss 62 63check_vzeroupper: resd 1 64 65SECTION .text 66 67cextern fail_func 68 69; max number of args used by any asm function. 70; (max_args % 4) must equal 3 for stack alignment 71%define max_args 15 72 73%if UNIX64 74 DECLARE_REG_TMP 0 75%else 76 DECLARE_REG_TMP 4 77%endif 78 79;----------------------------------------------------------------------------- 80; unsigned checkasm_init_x86(char *name) 81;----------------------------------------------------------------------------- 82cglobal init_x86, 0, 5 83%if ARCH_X86_64 84 push rbx 85%endif 86 movifnidn t0, r0mp 87 mov eax, 0x80000000 88 cpuid 89 cmp eax, 0x80000004 90 jb .no_brand ; processor brand string not supported 91 mov eax, 0x80000002 92 cpuid 93 mov [t0+4* 0], eax 94 mov [t0+4* 1], ebx 95 mov [t0+4* 2], ecx 96 mov [t0+4* 3], edx 97 mov eax, 0x80000003 98 cpuid 99 mov [t0+4* 4], eax 100 mov [t0+4* 5], ebx 101 mov [t0+4* 6], ecx 102 mov [t0+4* 7], edx 103 mov eax, 0x80000004 104 cpuid 105 mov [t0+4* 8], eax 106 mov [t0+4* 9], ebx 107 mov [t0+4*10], ecx 108 mov [t0+4*11], edx 109 xor eax, eax 110 cpuid 111 jmp .check_xcr1 112.no_brand: ; use manufacturer id as a fallback 113 xor eax, eax 114 mov [t0+4*3], eax 115 cpuid 116 mov [t0+4*0], ebx 117 mov [t0+4*1], edx 118 mov [t0+4*2], ecx 119.check_xcr1: 120 test eax, eax 121 jz .end2 ; cpuid leaf 1 not supported 122 mov t0d, eax ; max leaf 123 mov eax, 1 124 cpuid 125 and ecx, 0x18000000 126 cmp ecx, 0x18000000 127 jne .end2 ; osxsave/avx not supported 128 cmp t0d, 13 ; cpuid leaf 13 not supported 129 jb .end2 130 mov t0d, eax ; cpuid signature 131 mov eax, 13 132 mov ecx, 1 133 cpuid 134 test al, 0x04 135 jz .end ; xcr1 not supported 136 mov ecx, 1 137 xgetbv 138 test al, 0x04 139 jnz .end ; always-dirty ymm state 140%if ARCH_X86_64 == 0 && PIC 141 LEA eax, check_vzeroupper 142 mov [eax], ecx 143%else 144 mov [check_vzeroupper], ecx 145%endif 146.end: 147 mov eax, t0d 148.end2: 149%if ARCH_X86_64 150 pop rbx 151%endif 152 RET 153 154%if ARCH_X86_64 155%if WIN64 156 %define stack_param rsp+32 ; shadow space 157 %define num_fn_args rsp+stack_offset+17*8 158 %assign num_reg_args 4 159 %assign free_regs 7 160 %assign clobber_mask_stack_bit 16 161 DECLARE_REG_TMP 4 162%else 163 %define stack_param rsp 164 %define num_fn_args rsp+stack_offset+11*8 165 %assign num_reg_args 6 166 %assign free_regs 9 167 %assign clobber_mask_stack_bit 64 168 DECLARE_REG_TMP 7 169%endif 170 171%macro CLOBBER_UPPER 2 ; reg, mask_bit 172 mov r13d, %1d 173 or r13, r8 174 test r9b, %2 175 cmovnz %1, r13 176%endmacro 177 178cglobal checked_call, 2, 15, 16, max_args*8+64+8 179 mov r10d, [num_fn_args] 180 mov r8, 0xdeadbeef00000000 181 mov r9d, [num_fn_args+r10*8+8] ; clobber_mask 182 mov t0, [num_fn_args+r10*8] ; func 183 184 ; Clobber the upper halves of 32-bit parameters 185 CLOBBER_UPPER r0, 1 186 CLOBBER_UPPER r1, 2 187 CLOBBER_UPPER r2, 4 188 CLOBBER_UPPER r3, 8 189%if UNIX64 190 CLOBBER_UPPER r4, 16 191 CLOBBER_UPPER r5, 32 192%else ; WIN64 193%assign i 6 194%rep 16-6 195 mova m %+ i, [x %+ i] 196 %assign i i+1 197%endrep 198%endif 199 200 xor r11d, r11d 201 sub r10d, num_reg_args 202 cmovs r10d, r11d ; num stack args 203 204 ; write stack canaries to the area above parameters passed on the stack 205 mov r12, [rsp+stack_offset] ; return address 206 not r12 207%assign i 0 208%rep 8 ; 64 bytes 209 mov [stack_param+(r10+i)*8], r12 210 %assign i i+1 211%endrep 212 213 test r10d, r10d 214 jz .stack_setup_done ; no stack parameters 215.copy_stack_parameter: 216 mov r12, [stack_param+stack_offset+8+r11*8] 217 CLOBBER_UPPER r12, clobber_mask_stack_bit 218 shr r9d, 1 219 mov [stack_param+r11*8], r12 220 inc r11d 221 cmp r11d, r10d 222 jl .copy_stack_parameter 223.stack_setup_done: 224 225%assign i 14 226%rep 15-free_regs 227 mov r %+ i, [n %+ i] 228 %assign i i-1 229%endrep 230 call t0 231 232 ; check for stack corruption 233 mov r0d, [num_fn_args] 234 xor r3d, r3d 235 sub r0d, num_reg_args 236 cmovs r0d, r3d ; num stack args 237 238 mov r3, [rsp+stack_offset] 239 mov r4, [stack_param+r0*8] 240 not r3 241 xor r4, r3 242%assign i 1 243%rep 6 244 mov r5, [stack_param+(r0+i)*8] 245 xor r5, r3 246 or r4, r5 247 %assign i i+1 248%endrep 249 xor r3, [stack_param+(r0+7)*8] 250 or r4, r3 251 jz .stack_ok 252 ; Save the return value located in rdx:rax first to prevent clobbering. 253 mov r10, rax 254 mov r11, rdx 255 lea r0, [errmsg_stack] 256 jmp .fail 257.stack_ok: 258 259 ; check for failure to preserve registers 260%assign i 14 261%rep 15-free_regs 262 cmp r %+ i, [n %+ i] 263 setne r4b 264 lea r3d, [r4+r3*2] 265 %assign i i-1 266%endrep 267%if WIN64 268 lea r0, [rsp+32] ; account for shadow space 269 mov r5, r0 270 test r3d, r3d 271 jz .gpr_ok 272%else 273 test r3d, r3d 274 jz .gpr_xmm_ok 275 mov r0, rsp 276%endif 277%assign i free_regs 278%rep 15-free_regs 279%if i < 10 280 mov dword [r0], " r0" + (i << 16) 281 lea r4, [r0+3] 282%else 283 mov dword [r0], " r10" + ((i - 10) << 24) 284 lea r4, [r0+4] 285%endif 286 test r3b, 1 << (i - free_regs) 287 cmovnz r0, r4 288 %assign i i+1 289%endrep 290%if WIN64 ; xmm registers 291.gpr_ok: 292%assign i 6 293%rep 16-6 294 pxor m %+ i, [x %+ i] 295 %assign i i+1 296%endrep 297 packsswb m6, m7 298 packsswb m8, m9 299 packsswb m10, m11 300 packsswb m12, m13 301 packsswb m14, m15 302 packsswb m6, m6 303 packsswb m8, m10 304 packsswb m12, m14 305 packsswb m6, m6 306 packsswb m8, m12 307 packsswb m6, m8 308 pxor m7, m7 309 pcmpeqb m6, m7 310 pmovmskb r3d, m6 311 cmp r3d, 0xffff 312 je .xmm_ok 313 mov r7d, " xmm" 314%assign i 6 315%rep 16-6 316 mov [r0+0], r7d 317%if i < 10 318 mov byte [r0+4], "0" + i 319 lea r4, [r0+5] 320%else 321 mov word [r0+4], "10" + ((i - 10) << 8) 322 lea r4, [r0+6] 323%endif 324 test r3d, 1 << i 325 cmovz r0, r4 326 %assign i i+1 327%endrep 328.xmm_ok: 329 cmp r0, r5 330 je .gpr_xmm_ok 331 mov byte [r0], 0 332 mov r11, rdx 333 mov r1, r5 334%else 335 mov byte [r0], 0 336 mov r11, rdx 337 mov r1, rsp 338%endif 339 mov r10, rax 340 lea r0, [errmsg_register] 341 jmp .fail 342.gpr_xmm_ok: 343 ; Check for dirty YMM state, i.e. missing vzeroupper 344 mov ecx, [check_vzeroupper] 345 test ecx, ecx 346 jz .ok ; not supported, skip 347 mov r10, rax 348 mov r11, rdx 349 xgetbv 350 test al, 0x04 351 jz .restore_retval ; clean ymm state 352 lea r0, [errmsg_vzeroupper] 353 vzeroupper 354.fail: 355 ; Call fail_func() with a descriptive message to mark it as a failure. 356 xor eax, eax 357 call fail_func 358.restore_retval: 359 mov rax, r10 360 mov rdx, r11 361.ok: 362 RET 363 364; trigger a warmup of vector units 365%macro WARMUP 0 366cglobal warmup, 0, 0 367 xorps m0, m0 368 mulps m0, m0 369 RET 370%endmacro 371 372INIT_YMM avx2 373WARMUP 374INIT_ZMM avx512 375WARMUP 376 377%else 378 379; just random numbers to reduce the chance of incidental match 380%assign n3 0x6549315c 381%assign n4 0xe02f3e23 382%assign n5 0xb78d0d1d 383%assign n6 0x33627ba7 384 385;----------------------------------------------------------------------------- 386; void checkasm_checked_call(void *func, ...) 387;----------------------------------------------------------------------------- 388cglobal checked_call, 1, 7 389 mov r3, [esp+stack_offset] ; return address 390 mov r1, [esp+stack_offset+17*4] ; num_stack_params 391 mov r2, 27 392 not r3 393 sub r2, r1 394.push_canary: 395 push r3 396 dec r2 397 jg .push_canary 398.push_parameter: 399 push dword [esp+32*4] 400 dec r1 401 jg .push_parameter 402 mov r3, n3 403 mov r4, n4 404 mov r5, n5 405 mov r6, n6 406 call r0 407 408 ; check for failure to preserve registers 409 cmp r3, n3 410 setne r3h 411 cmp r4, n4 412 setne r3b 413 shl r3d, 16 414 cmp r5, n5 415 setne r3h 416 cmp r6, n6 417 setne r3b 418 test r3, r3 419 jz .gpr_ok 420 lea r1, [esp+16] 421 mov [esp+4], r1 422%assign i 3 423%rep 4 424 mov dword [r1], " r0" + (i << 16) 425 lea r4, [r1+3] 426 test r3, 1 << ((6 - i) * 8) 427 cmovnz r1, r4 428 %assign i i+1 429%endrep 430 mov byte [r1], 0 431 mov r5, eax 432 mov r6, edx 433 LEA r1, errmsg_register 434 jmp .fail 435.gpr_ok: 436 ; check for stack corruption 437 mov r3, [esp+48*4] ; num_stack_params 438 mov r6, [esp+31*4] ; return address 439 mov r4, [esp+r3*4] 440 sub r3, 26 441 not r6 442 xor r4, r6 443.check_canary: 444 mov r5, [esp+(r3+27)*4] 445 xor r5, r6 446 or r4, r5 447 inc r3 448 jl .check_canary 449 mov r5, eax 450 mov r6, edx 451 test r4, r4 452 jz .stack_ok 453 LEA r1, errmsg_stack 454 jmp .fail 455.stack_ok: 456 ; check for dirty YMM state, i.e. missing vzeroupper 457 LEA ecx, check_vzeroupper 458 mov ecx, [ecx] 459 test ecx, ecx 460 jz .ok ; not supported, skip 461 xgetbv 462 test al, 0x04 463 jz .ok ; clean ymm state 464 LEA r1, errmsg_vzeroupper 465 vzeroupper 466.fail: 467 mov [esp], r1 468 call fail_func 469.ok: 470 add esp, 27*4 471 mov eax, r5 472 mov edx, r6 473 RET 474 475%endif ; ARCH_X86_64 476