1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4%include "ring_core_generated/prefix_symbols_nasm.inc" 5%ifidn __OUTPUT_FORMAT__, win32 6%ifidn __OUTPUT_FORMAT__,obj 7section code use32 class=code align=64 8%elifidn __OUTPUT_FORMAT__,win32 9$@feat.00 equ 1 10section .text code align=64 11%else 12section .text code 13%endif 14;extern _OPENSSL_ia32cap_P 15%ifdef BORINGSSL_DISPATCH_TEST 16extern _BORINGSSL_function_hit 17%endif 18global _aes_hw_encrypt 19align 16 20_aes_hw_encrypt: 21L$_aes_hw_encrypt_begin: 22%ifdef BORINGSSL_DISPATCH_TEST 23 push ebx 24 push edx 25 call L$000pic 26L$000pic: 27 pop ebx 28 lea ebx,[(_BORINGSSL_function_hit+1-L$000pic)+ebx] 29 mov edx,1 30 mov BYTE [ebx],dl 31 pop edx 32 pop ebx 33%endif 34 mov eax,DWORD [4+esp] 35 mov edx,DWORD [12+esp] 36 movups xmm2,[eax] 37 mov ecx,DWORD [240+edx] 38 mov eax,DWORD [8+esp] 39 movups xmm0,[edx] 40 movups xmm1,[16+edx] 41 lea edx,[32+edx] 42 xorps xmm2,xmm0 43L$001enc1_loop_1: 44db 102,15,56,220,209 45 dec ecx 46 movups xmm1,[edx] 47 lea edx,[16+edx] 48 jnz NEAR L$001enc1_loop_1 49db 102,15,56,221,209 50 pxor xmm0,xmm0 51 pxor xmm1,xmm1 52 movups [eax],xmm2 53 pxor xmm2,xmm2 54 ret 55align 16 56__aesni_encrypt2: 57 movups xmm0,[edx] 58 shl ecx,4 59 movups xmm1,[16+edx] 60 xorps xmm2,xmm0 61 pxor xmm3,xmm0 62 movups xmm0,[32+edx] 63 lea edx,[32+ecx*1+edx] 64 neg ecx 65 add ecx,16 66L$002enc2_loop: 67db 102,15,56,220,209 68db 102,15,56,220,217 69 movups xmm1,[ecx*1+edx] 70 add ecx,32 71db 102,15,56,220,208 72db 102,15,56,220,216 73 movups xmm0,[ecx*1+edx-16] 74 jnz NEAR L$002enc2_loop 75db 102,15,56,220,209 76db 102,15,56,220,217 77db 102,15,56,221,208 78db 102,15,56,221,216 79 ret 80align 16 81__aesni_encrypt3: 82 movups xmm0,[edx] 83 shl ecx,4 84 movups xmm1,[16+edx] 85 xorps xmm2,xmm0 86 pxor xmm3,xmm0 87 pxor xmm4,xmm0 88 movups xmm0,[32+edx] 89 lea edx,[32+ecx*1+edx] 90 neg ecx 91 add ecx,16 92L$003enc3_loop: 93db 102,15,56,220,209 94db 102,15,56,220,217 95db 102,15,56,220,225 96 movups xmm1,[ecx*1+edx] 97 add ecx,32 98db 102,15,56,220,208 99db 102,15,56,220,216 100db 102,15,56,220,224 101 movups xmm0,[ecx*1+edx-16] 102 jnz NEAR L$003enc3_loop 103db 102,15,56,220,209 104db 102,15,56,220,217 105db 102,15,56,220,225 106db 102,15,56,221,208 107db 102,15,56,221,216 108db 102,15,56,221,224 109 ret 110align 16 111__aesni_encrypt4: 112 movups xmm0,[edx] 113 movups xmm1,[16+edx] 114 shl ecx,4 115 xorps xmm2,xmm0 116 pxor xmm3,xmm0 117 pxor xmm4,xmm0 118 pxor xmm5,xmm0 119 movups xmm0,[32+edx] 120 lea edx,[32+ecx*1+edx] 121 neg ecx 122db 15,31,64,0 123 add ecx,16 124L$004enc4_loop: 125db 102,15,56,220,209 126db 102,15,56,220,217 127db 102,15,56,220,225 128db 102,15,56,220,233 129 movups xmm1,[ecx*1+edx] 130 add ecx,32 131db 102,15,56,220,208 132db 102,15,56,220,216 133db 102,15,56,220,224 134db 102,15,56,220,232 135 movups xmm0,[ecx*1+edx-16] 136 jnz NEAR L$004enc4_loop 137db 102,15,56,220,209 138db 102,15,56,220,217 139db 102,15,56,220,225 140db 102,15,56,220,233 141db 102,15,56,221,208 142db 102,15,56,221,216 143db 102,15,56,221,224 144db 102,15,56,221,232 145 ret 146align 16 147__aesni_encrypt6: 148 movups xmm0,[edx] 149 shl ecx,4 150 movups xmm1,[16+edx] 151 xorps xmm2,xmm0 152 pxor xmm3,xmm0 153 pxor xmm4,xmm0 154db 102,15,56,220,209 155 pxor xmm5,xmm0 156 pxor xmm6,xmm0 157db 102,15,56,220,217 158 lea edx,[32+ecx*1+edx] 159 neg ecx 160db 102,15,56,220,225 161 pxor xmm7,xmm0 162 movups xmm0,[ecx*1+edx] 163 add ecx,16 164 jmp NEAR L$005_aesni_encrypt6_inner 165align 16 166L$006enc6_loop: 167db 102,15,56,220,209 168db 102,15,56,220,217 169db 102,15,56,220,225 170L$005_aesni_encrypt6_inner: 171db 102,15,56,220,233 172db 102,15,56,220,241 173db 102,15,56,220,249 174L$_aesni_encrypt6_enter: 175 movups xmm1,[ecx*1+edx] 176 add ecx,32 177db 102,15,56,220,208 178db 102,15,56,220,216 179db 102,15,56,220,224 180db 102,15,56,220,232 181db 102,15,56,220,240 182db 102,15,56,220,248 183 movups xmm0,[ecx*1+edx-16] 184 jnz NEAR L$006enc6_loop 185db 102,15,56,220,209 186db 102,15,56,220,217 187db 102,15,56,220,225 188db 102,15,56,220,233 189db 102,15,56,220,241 190db 102,15,56,220,249 191db 102,15,56,221,208 192db 102,15,56,221,216 193db 102,15,56,221,224 194db 102,15,56,221,232 195db 102,15,56,221,240 196db 102,15,56,221,248 197 ret 198global _aes_hw_ctr32_encrypt_blocks 199align 16 200_aes_hw_ctr32_encrypt_blocks: 201L$_aes_hw_ctr32_encrypt_blocks_begin: 202 push ebp 203 push ebx 204 push esi 205 push edi 206%ifdef BORINGSSL_DISPATCH_TEST 207 push ebx 208 push edx 209 call L$007pic 210L$007pic: 211 pop ebx 212 lea ebx,[(_BORINGSSL_function_hit+0-L$007pic)+ebx] 213 mov edx,1 214 mov BYTE [ebx],dl 215 pop edx 216 pop ebx 217%endif 218 mov esi,DWORD [20+esp] 219 mov edi,DWORD [24+esp] 220 mov eax,DWORD [28+esp] 221 mov edx,DWORD [32+esp] 222 mov ebx,DWORD [36+esp] 223 mov ebp,esp 224 sub esp,88 225 and esp,-16 226 mov DWORD [80+esp],ebp 227 cmp eax,1 228 je NEAR L$008ctr32_one_shortcut 229 movdqu xmm7,[ebx] 230 mov DWORD [esp],202182159 231 mov DWORD [4+esp],134810123 232 mov DWORD [8+esp],67438087 233 mov DWORD [12+esp],66051 234 mov ecx,6 235 xor ebp,ebp 236 mov DWORD [16+esp],ecx 237 mov DWORD [20+esp],ecx 238 mov DWORD [24+esp],ecx 239 mov DWORD [28+esp],ebp 240db 102,15,58,22,251,3 241db 102,15,58,34,253,3 242 mov ecx,DWORD [240+edx] 243 bswap ebx 244 pxor xmm0,xmm0 245 pxor xmm1,xmm1 246 movdqa xmm2,[esp] 247db 102,15,58,34,195,0 248 lea ebp,[3+ebx] 249db 102,15,58,34,205,0 250 inc ebx 251db 102,15,58,34,195,1 252 inc ebp 253db 102,15,58,34,205,1 254 inc ebx 255db 102,15,58,34,195,2 256 inc ebp 257db 102,15,58,34,205,2 258 movdqa [48+esp],xmm0 259db 102,15,56,0,194 260 movdqu xmm6,[edx] 261 movdqa [64+esp],xmm1 262db 102,15,56,0,202 263 pshufd xmm2,xmm0,192 264 pshufd xmm3,xmm0,128 265 cmp eax,6 266 jb NEAR L$009ctr32_tail 267 pxor xmm7,xmm6 268 shl ecx,4 269 mov ebx,16 270 movdqa [32+esp],xmm7 271 mov ebp,edx 272 sub ebx,ecx 273 lea edx,[32+ecx*1+edx] 274 sub eax,6 275 jmp NEAR L$010ctr32_loop6 276align 16 277L$010ctr32_loop6: 278 pshufd xmm4,xmm0,64 279 movdqa xmm0,[32+esp] 280 pshufd xmm5,xmm1,192 281 pxor xmm2,xmm0 282 pshufd xmm6,xmm1,128 283 pxor xmm3,xmm0 284 pshufd xmm7,xmm1,64 285 movups xmm1,[16+ebp] 286 pxor xmm4,xmm0 287 pxor xmm5,xmm0 288db 102,15,56,220,209 289 pxor xmm6,xmm0 290 pxor xmm7,xmm0 291db 102,15,56,220,217 292 movups xmm0,[32+ebp] 293 mov ecx,ebx 294db 102,15,56,220,225 295db 102,15,56,220,233 296db 102,15,56,220,241 297db 102,15,56,220,249 298 call L$_aesni_encrypt6_enter 299 movups xmm1,[esi] 300 movups xmm0,[16+esi] 301 xorps xmm2,xmm1 302 movups xmm1,[32+esi] 303 xorps xmm3,xmm0 304 movups [edi],xmm2 305 movdqa xmm0,[16+esp] 306 xorps xmm4,xmm1 307 movdqa xmm1,[64+esp] 308 movups [16+edi],xmm3 309 movups [32+edi],xmm4 310 paddd xmm1,xmm0 311 paddd xmm0,[48+esp] 312 movdqa xmm2,[esp] 313 movups xmm3,[48+esi] 314 movups xmm4,[64+esi] 315 xorps xmm5,xmm3 316 movups xmm3,[80+esi] 317 lea esi,[96+esi] 318 movdqa [48+esp],xmm0 319db 102,15,56,0,194 320 xorps xmm6,xmm4 321 movups [48+edi],xmm5 322 xorps xmm7,xmm3 323 movdqa [64+esp],xmm1 324db 102,15,56,0,202 325 movups [64+edi],xmm6 326 pshufd xmm2,xmm0,192 327 movups [80+edi],xmm7 328 lea edi,[96+edi] 329 pshufd xmm3,xmm0,128 330 sub eax,6 331 jnc NEAR L$010ctr32_loop6 332 add eax,6 333 jz NEAR L$011ctr32_ret 334 movdqu xmm7,[ebp] 335 mov edx,ebp 336 pxor xmm7,[32+esp] 337 mov ecx,DWORD [240+ebp] 338L$009ctr32_tail: 339 por xmm2,xmm7 340 cmp eax,2 341 jb NEAR L$012ctr32_one 342 pshufd xmm4,xmm0,64 343 por xmm3,xmm7 344 je NEAR L$013ctr32_two 345 pshufd xmm5,xmm1,192 346 por xmm4,xmm7 347 cmp eax,4 348 jb NEAR L$014ctr32_three 349 pshufd xmm6,xmm1,128 350 por xmm5,xmm7 351 je NEAR L$015ctr32_four 352 por xmm6,xmm7 353 call __aesni_encrypt6 354 movups xmm1,[esi] 355 movups xmm0,[16+esi] 356 xorps xmm2,xmm1 357 movups xmm1,[32+esi] 358 xorps xmm3,xmm0 359 movups xmm0,[48+esi] 360 xorps xmm4,xmm1 361 movups xmm1,[64+esi] 362 xorps xmm5,xmm0 363 movups [edi],xmm2 364 xorps xmm6,xmm1 365 movups [16+edi],xmm3 366 movups [32+edi],xmm4 367 movups [48+edi],xmm5 368 movups [64+edi],xmm6 369 jmp NEAR L$011ctr32_ret 370align 16 371L$008ctr32_one_shortcut: 372 movups xmm2,[ebx] 373 mov ecx,DWORD [240+edx] 374L$012ctr32_one: 375 movups xmm0,[edx] 376 movups xmm1,[16+edx] 377 lea edx,[32+edx] 378 xorps xmm2,xmm0 379L$016enc1_loop_2: 380db 102,15,56,220,209 381 dec ecx 382 movups xmm1,[edx] 383 lea edx,[16+edx] 384 jnz NEAR L$016enc1_loop_2 385db 102,15,56,221,209 386 movups xmm6,[esi] 387 xorps xmm6,xmm2 388 movups [edi],xmm6 389 jmp NEAR L$011ctr32_ret 390align 16 391L$013ctr32_two: 392 call __aesni_encrypt2 393 movups xmm5,[esi] 394 movups xmm6,[16+esi] 395 xorps xmm2,xmm5 396 xorps xmm3,xmm6 397 movups [edi],xmm2 398 movups [16+edi],xmm3 399 jmp NEAR L$011ctr32_ret 400align 16 401L$014ctr32_three: 402 call __aesni_encrypt3 403 movups xmm5,[esi] 404 movups xmm6,[16+esi] 405 xorps xmm2,xmm5 406 movups xmm7,[32+esi] 407 xorps xmm3,xmm6 408 movups [edi],xmm2 409 xorps xmm4,xmm7 410 movups [16+edi],xmm3 411 movups [32+edi],xmm4 412 jmp NEAR L$011ctr32_ret 413align 16 414L$015ctr32_four: 415 call __aesni_encrypt4 416 movups xmm6,[esi] 417 movups xmm7,[16+esi] 418 movups xmm1,[32+esi] 419 xorps xmm2,xmm6 420 movups xmm0,[48+esi] 421 xorps xmm3,xmm7 422 movups [edi],xmm2 423 xorps xmm4,xmm1 424 movups [16+edi],xmm3 425 xorps xmm5,xmm0 426 movups [32+edi],xmm4 427 movups [48+edi],xmm5 428L$011ctr32_ret: 429 pxor xmm0,xmm0 430 pxor xmm1,xmm1 431 pxor xmm2,xmm2 432 pxor xmm3,xmm3 433 pxor xmm4,xmm4 434 movdqa [32+esp],xmm0 435 pxor xmm5,xmm5 436 movdqa [48+esp],xmm0 437 pxor xmm6,xmm6 438 movdqa [64+esp],xmm0 439 pxor xmm7,xmm7 440 mov esp,DWORD [80+esp] 441 pop edi 442 pop esi 443 pop ebx 444 pop ebp 445 ret 446align 16 447__aesni_set_encrypt_key: 448 push ebp 449 push ebx 450 test eax,eax 451 jz NEAR L$017bad_pointer 452 test edx,edx 453 jz NEAR L$017bad_pointer 454 call L$018pic 455L$018pic: 456 pop ebx 457 lea ebx,[(L$key_const-L$018pic)+ebx] 458 lea ebp,[_OPENSSL_ia32cap_P] 459 movups xmm0,[eax] 460 xorps xmm4,xmm4 461 mov ebp,DWORD [4+ebp] 462 lea edx,[16+edx] 463 and ebp,268437504 464 cmp ecx,256 465 je NEAR L$01914rounds 466 cmp ecx,128 467 jne NEAR L$020bad_keybits 468align 16 469L$02110rounds: 470 cmp ebp,268435456 471 je NEAR L$02210rounds_alt 472 mov ecx,9 473 movups [edx-16],xmm0 474db 102,15,58,223,200,1 475 call L$023key_128_cold 476db 102,15,58,223,200,2 477 call L$024key_128 478db 102,15,58,223,200,4 479 call L$024key_128 480db 102,15,58,223,200,8 481 call L$024key_128 482db 102,15,58,223,200,16 483 call L$024key_128 484db 102,15,58,223,200,32 485 call L$024key_128 486db 102,15,58,223,200,64 487 call L$024key_128 488db 102,15,58,223,200,128 489 call L$024key_128 490db 102,15,58,223,200,27 491 call L$024key_128 492db 102,15,58,223,200,54 493 call L$024key_128 494 movups [edx],xmm0 495 mov DWORD [80+edx],ecx 496 jmp NEAR L$025good_key 497align 16 498L$024key_128: 499 movups [edx],xmm0 500 lea edx,[16+edx] 501L$023key_128_cold: 502 shufps xmm4,xmm0,16 503 xorps xmm0,xmm4 504 shufps xmm4,xmm0,140 505 xorps xmm0,xmm4 506 shufps xmm1,xmm1,255 507 xorps xmm0,xmm1 508 ret 509align 16 510L$02210rounds_alt: 511 movdqa xmm5,[ebx] 512 mov ecx,8 513 movdqa xmm4,[32+ebx] 514 movdqa xmm2,xmm0 515 movdqu [edx-16],xmm0 516L$026loop_key128: 517db 102,15,56,0,197 518db 102,15,56,221,196 519 pslld xmm4,1 520 lea edx,[16+edx] 521 movdqa xmm3,xmm2 522 pslldq xmm2,4 523 pxor xmm3,xmm2 524 pslldq xmm2,4 525 pxor xmm3,xmm2 526 pslldq xmm2,4 527 pxor xmm2,xmm3 528 pxor xmm0,xmm2 529 movdqu [edx-16],xmm0 530 movdqa xmm2,xmm0 531 dec ecx 532 jnz NEAR L$026loop_key128 533 movdqa xmm4,[48+ebx] 534db 102,15,56,0,197 535db 102,15,56,221,196 536 pslld xmm4,1 537 movdqa xmm3,xmm2 538 pslldq xmm2,4 539 pxor xmm3,xmm2 540 pslldq xmm2,4 541 pxor xmm3,xmm2 542 pslldq xmm2,4 543 pxor xmm2,xmm3 544 pxor xmm0,xmm2 545 movdqu [edx],xmm0 546 movdqa xmm2,xmm0 547db 102,15,56,0,197 548db 102,15,56,221,196 549 movdqa xmm3,xmm2 550 pslldq xmm2,4 551 pxor xmm3,xmm2 552 pslldq xmm2,4 553 pxor xmm3,xmm2 554 pslldq xmm2,4 555 pxor xmm2,xmm3 556 pxor xmm0,xmm2 557 movdqu [16+edx],xmm0 558 mov ecx,9 559 mov DWORD [96+edx],ecx 560 jmp NEAR L$025good_key 561align 16 562L$01914rounds: 563 movups xmm2,[16+eax] 564 lea edx,[16+edx] 565 cmp ebp,268435456 566 je NEAR L$02714rounds_alt 567 mov ecx,13 568 movups [edx-32],xmm0 569 movups [edx-16],xmm2 570db 102,15,58,223,202,1 571 call L$028key_256a_cold 572db 102,15,58,223,200,1 573 call L$029key_256b 574db 102,15,58,223,202,2 575 call L$030key_256a 576db 102,15,58,223,200,2 577 call L$029key_256b 578db 102,15,58,223,202,4 579 call L$030key_256a 580db 102,15,58,223,200,4 581 call L$029key_256b 582db 102,15,58,223,202,8 583 call L$030key_256a 584db 102,15,58,223,200,8 585 call L$029key_256b 586db 102,15,58,223,202,16 587 call L$030key_256a 588db 102,15,58,223,200,16 589 call L$029key_256b 590db 102,15,58,223,202,32 591 call L$030key_256a 592db 102,15,58,223,200,32 593 call L$029key_256b 594db 102,15,58,223,202,64 595 call L$030key_256a 596 movups [edx],xmm0 597 mov DWORD [16+edx],ecx 598 xor eax,eax 599 jmp NEAR L$025good_key 600align 16 601L$030key_256a: 602 movups [edx],xmm2 603 lea edx,[16+edx] 604L$028key_256a_cold: 605 shufps xmm4,xmm0,16 606 xorps xmm0,xmm4 607 shufps xmm4,xmm0,140 608 xorps xmm0,xmm4 609 shufps xmm1,xmm1,255 610 xorps xmm0,xmm1 611 ret 612align 16 613L$029key_256b: 614 movups [edx],xmm0 615 lea edx,[16+edx] 616 shufps xmm4,xmm2,16 617 xorps xmm2,xmm4 618 shufps xmm4,xmm2,140 619 xorps xmm2,xmm4 620 shufps xmm1,xmm1,170 621 xorps xmm2,xmm1 622 ret 623align 16 624L$02714rounds_alt: 625 movdqa xmm5,[ebx] 626 movdqa xmm4,[32+ebx] 627 mov ecx,7 628 movdqu [edx-32],xmm0 629 movdqa xmm1,xmm2 630 movdqu [edx-16],xmm2 631L$031loop_key256: 632db 102,15,56,0,213 633db 102,15,56,221,212 634 movdqa xmm3,xmm0 635 pslldq xmm0,4 636 pxor xmm3,xmm0 637 pslldq xmm0,4 638 pxor xmm3,xmm0 639 pslldq xmm0,4 640 pxor xmm0,xmm3 641 pslld xmm4,1 642 pxor xmm0,xmm2 643 movdqu [edx],xmm0 644 dec ecx 645 jz NEAR L$032done_key256 646 pshufd xmm2,xmm0,255 647 pxor xmm3,xmm3 648db 102,15,56,221,211 649 movdqa xmm3,xmm1 650 pslldq xmm1,4 651 pxor xmm3,xmm1 652 pslldq xmm1,4 653 pxor xmm3,xmm1 654 pslldq xmm1,4 655 pxor xmm1,xmm3 656 pxor xmm2,xmm1 657 movdqu [16+edx],xmm2 658 lea edx,[32+edx] 659 movdqa xmm1,xmm2 660 jmp NEAR L$031loop_key256 661L$032done_key256: 662 mov ecx,13 663 mov DWORD [16+edx],ecx 664L$025good_key: 665 pxor xmm0,xmm0 666 pxor xmm1,xmm1 667 pxor xmm2,xmm2 668 pxor xmm3,xmm3 669 pxor xmm4,xmm4 670 pxor xmm5,xmm5 671 xor eax,eax 672 pop ebx 673 pop ebp 674 ret 675align 4 676L$017bad_pointer: 677 mov eax,-1 678 pop ebx 679 pop ebp 680 ret 681align 4 682L$020bad_keybits: 683 pxor xmm0,xmm0 684 mov eax,-2 685 pop ebx 686 pop ebp 687 ret 688global _aes_hw_set_encrypt_key 689align 16 690_aes_hw_set_encrypt_key: 691L$_aes_hw_set_encrypt_key_begin: 692%ifdef BORINGSSL_DISPATCH_TEST 693 push ebx 694 push edx 695 call L$033pic 696L$033pic: 697 pop ebx 698 lea ebx,[(_BORINGSSL_function_hit+3-L$033pic)+ebx] 699 mov edx,1 700 mov BYTE [ebx],dl 701 pop edx 702 pop ebx 703%endif 704 mov eax,DWORD [4+esp] 705 mov ecx,DWORD [8+esp] 706 mov edx,DWORD [12+esp] 707 call __aesni_set_encrypt_key 708 ret 709align 64 710L$key_const: 711dd 202313229,202313229,202313229,202313229 712dd 67569157,67569157,67569157,67569157 713dd 1,1,1,1 714dd 27,27,27,27 715db 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 716db 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 717db 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 718db 115,108,46,111,114,103,62,0 719segment .bss 720common _OPENSSL_ia32cap_P 16 721%else 722; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 723ret 724%endif 725