1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) 7.text 8.extern OPENSSL_ia32cap_P 9.hidden OPENSSL_ia32cap_P 10.globl aes_hw_encrypt 11.hidden aes_hw_encrypt 12.type aes_hw_encrypt,@function 13.align 16 14aes_hw_encrypt: 15.cfi_startproc 16_CET_ENDBR 17#ifdef BORINGSSL_DISPATCH_TEST 18.extern BORINGSSL_function_hit 19.hidden BORINGSSL_function_hit 20 movb $1,BORINGSSL_function_hit+1(%rip) 21#endif 22 movups (%rdi),%xmm2 23 movl 240(%rdx),%eax 24 movups (%rdx),%xmm0 25 movups 16(%rdx),%xmm1 26 leaq 32(%rdx),%rdx 27 xorps %xmm0,%xmm2 28.Loop_enc1_1: 29.byte 102,15,56,220,209 30 decl %eax 31 movups (%rdx),%xmm1 32 leaq 16(%rdx),%rdx 33 jnz .Loop_enc1_1 34.byte 102,15,56,221,209 35 pxor %xmm0,%xmm0 36 pxor %xmm1,%xmm1 37 movups %xmm2,(%rsi) 38 pxor %xmm2,%xmm2 39 ret 40.cfi_endproc 41.size aes_hw_encrypt,.-aes_hw_encrypt 42 43.globl aes_hw_decrypt 44.hidden aes_hw_decrypt 45.type aes_hw_decrypt,@function 46.align 16 47aes_hw_decrypt: 48.cfi_startproc 49_CET_ENDBR 50 movups (%rdi),%xmm2 51 movl 240(%rdx),%eax 52 movups (%rdx),%xmm0 53 movups 16(%rdx),%xmm1 54 leaq 32(%rdx),%rdx 55 xorps %xmm0,%xmm2 56.Loop_dec1_2: 57.byte 102,15,56,222,209 58 decl %eax 59 movups (%rdx),%xmm1 60 leaq 16(%rdx),%rdx 61 jnz .Loop_dec1_2 62.byte 102,15,56,223,209 63 pxor %xmm0,%xmm0 64 pxor %xmm1,%xmm1 65 movups %xmm2,(%rsi) 66 pxor %xmm2,%xmm2 67 ret 68.cfi_endproc 69.size aes_hw_decrypt, .-aes_hw_decrypt 70.type _aesni_encrypt2,@function 71.align 16 72_aesni_encrypt2: 73.cfi_startproc 74 movups (%rcx),%xmm0 75 shll $4,%eax 76 movups 16(%rcx),%xmm1 77 xorps %xmm0,%xmm2 78 xorps %xmm0,%xmm3 79 movups 32(%rcx),%xmm0 80 leaq 32(%rcx,%rax,1),%rcx 81 negq %rax 82 addq $16,%rax 83 84.Lenc_loop2: 85.byte 102,15,56,220,209 86.byte 102,15,56,220,217 87 movups (%rcx,%rax,1),%xmm1 88 addq $32,%rax 89.byte 102,15,56,220,208 90.byte 102,15,56,220,216 91 movups -16(%rcx,%rax,1),%xmm0 92 jnz .Lenc_loop2 93 94.byte 102,15,56,220,209 95.byte 102,15,56,220,217 96.byte 102,15,56,221,208 97.byte 102,15,56,221,216 98 ret 99.cfi_endproc 100.size _aesni_encrypt2,.-_aesni_encrypt2 101.type _aesni_decrypt2,@function 102.align 16 103_aesni_decrypt2: 104.cfi_startproc 105 movups (%rcx),%xmm0 106 shll $4,%eax 107 movups 16(%rcx),%xmm1 108 xorps %xmm0,%xmm2 109 xorps %xmm0,%xmm3 110 movups 32(%rcx),%xmm0 111 leaq 32(%rcx,%rax,1),%rcx 112 negq %rax 113 addq $16,%rax 114 115.Ldec_loop2: 116.byte 102,15,56,222,209 117.byte 102,15,56,222,217 118 movups (%rcx,%rax,1),%xmm1 119 addq $32,%rax 120.byte 102,15,56,222,208 121.byte 102,15,56,222,216 122 movups -16(%rcx,%rax,1),%xmm0 123 jnz .Ldec_loop2 124 125.byte 102,15,56,222,209 126.byte 102,15,56,222,217 127.byte 102,15,56,223,208 128.byte 102,15,56,223,216 129 ret 130.cfi_endproc 131.size _aesni_decrypt2,.-_aesni_decrypt2 132.type _aesni_encrypt3,@function 133.align 16 134_aesni_encrypt3: 135.cfi_startproc 136 movups (%rcx),%xmm0 137 shll $4,%eax 138 movups 16(%rcx),%xmm1 139 xorps %xmm0,%xmm2 140 xorps %xmm0,%xmm3 141 xorps %xmm0,%xmm4 142 movups 32(%rcx),%xmm0 143 leaq 32(%rcx,%rax,1),%rcx 144 negq %rax 145 addq $16,%rax 146 147.Lenc_loop3: 148.byte 102,15,56,220,209 149.byte 102,15,56,220,217 150.byte 102,15,56,220,225 151 movups (%rcx,%rax,1),%xmm1 152 addq $32,%rax 153.byte 102,15,56,220,208 154.byte 102,15,56,220,216 155.byte 102,15,56,220,224 156 movups -16(%rcx,%rax,1),%xmm0 157 jnz .Lenc_loop3 158 159.byte 102,15,56,220,209 160.byte 102,15,56,220,217 161.byte 102,15,56,220,225 162.byte 102,15,56,221,208 163.byte 102,15,56,221,216 164.byte 102,15,56,221,224 165 ret 166.cfi_endproc 167.size _aesni_encrypt3,.-_aesni_encrypt3 168.type _aesni_decrypt3,@function 169.align 16 170_aesni_decrypt3: 171.cfi_startproc 172 movups (%rcx),%xmm0 173 shll $4,%eax 174 movups 16(%rcx),%xmm1 175 xorps %xmm0,%xmm2 176 xorps %xmm0,%xmm3 177 xorps %xmm0,%xmm4 178 movups 32(%rcx),%xmm0 179 leaq 32(%rcx,%rax,1),%rcx 180 negq %rax 181 addq $16,%rax 182 183.Ldec_loop3: 184.byte 102,15,56,222,209 185.byte 102,15,56,222,217 186.byte 102,15,56,222,225 187 movups (%rcx,%rax,1),%xmm1 188 addq $32,%rax 189.byte 102,15,56,222,208 190.byte 102,15,56,222,216 191.byte 102,15,56,222,224 192 movups -16(%rcx,%rax,1),%xmm0 193 jnz .Ldec_loop3 194 195.byte 102,15,56,222,209 196.byte 102,15,56,222,217 197.byte 102,15,56,222,225 198.byte 102,15,56,223,208 199.byte 102,15,56,223,216 200.byte 102,15,56,223,224 201 ret 202.cfi_endproc 203.size _aesni_decrypt3,.-_aesni_decrypt3 204.type _aesni_encrypt4,@function 205.align 16 206_aesni_encrypt4: 207.cfi_startproc 208 movups (%rcx),%xmm0 209 shll $4,%eax 210 movups 16(%rcx),%xmm1 211 xorps %xmm0,%xmm2 212 xorps %xmm0,%xmm3 213 xorps %xmm0,%xmm4 214 xorps %xmm0,%xmm5 215 movups 32(%rcx),%xmm0 216 leaq 32(%rcx,%rax,1),%rcx 217 negq %rax 218.byte 0x0f,0x1f,0x00 219 addq $16,%rax 220 221.Lenc_loop4: 222.byte 102,15,56,220,209 223.byte 102,15,56,220,217 224.byte 102,15,56,220,225 225.byte 102,15,56,220,233 226 movups (%rcx,%rax,1),%xmm1 227 addq $32,%rax 228.byte 102,15,56,220,208 229.byte 102,15,56,220,216 230.byte 102,15,56,220,224 231.byte 102,15,56,220,232 232 movups -16(%rcx,%rax,1),%xmm0 233 jnz .Lenc_loop4 234 235.byte 102,15,56,220,209 236.byte 102,15,56,220,217 237.byte 102,15,56,220,225 238.byte 102,15,56,220,233 239.byte 102,15,56,221,208 240.byte 102,15,56,221,216 241.byte 102,15,56,221,224 242.byte 102,15,56,221,232 243 ret 244.cfi_endproc 245.size _aesni_encrypt4,.-_aesni_encrypt4 246.type _aesni_decrypt4,@function 247.align 16 248_aesni_decrypt4: 249.cfi_startproc 250 movups (%rcx),%xmm0 251 shll $4,%eax 252 movups 16(%rcx),%xmm1 253 xorps %xmm0,%xmm2 254 xorps %xmm0,%xmm3 255 xorps %xmm0,%xmm4 256 xorps %xmm0,%xmm5 257 movups 32(%rcx),%xmm0 258 leaq 32(%rcx,%rax,1),%rcx 259 negq %rax 260.byte 0x0f,0x1f,0x00 261 addq $16,%rax 262 263.Ldec_loop4: 264.byte 102,15,56,222,209 265.byte 102,15,56,222,217 266.byte 102,15,56,222,225 267.byte 102,15,56,222,233 268 movups (%rcx,%rax,1),%xmm1 269 addq $32,%rax 270.byte 102,15,56,222,208 271.byte 102,15,56,222,216 272.byte 102,15,56,222,224 273.byte 102,15,56,222,232 274 movups -16(%rcx,%rax,1),%xmm0 275 jnz .Ldec_loop4 276 277.byte 102,15,56,222,209 278.byte 102,15,56,222,217 279.byte 102,15,56,222,225 280.byte 102,15,56,222,233 281.byte 102,15,56,223,208 282.byte 102,15,56,223,216 283.byte 102,15,56,223,224 284.byte 102,15,56,223,232 285 ret 286.cfi_endproc 287.size _aesni_decrypt4,.-_aesni_decrypt4 288.type _aesni_encrypt6,@function 289.align 16 290_aesni_encrypt6: 291.cfi_startproc 292 movups (%rcx),%xmm0 293 shll $4,%eax 294 movups 16(%rcx),%xmm1 295 xorps %xmm0,%xmm2 296 pxor %xmm0,%xmm3 297 pxor %xmm0,%xmm4 298.byte 102,15,56,220,209 299 leaq 32(%rcx,%rax,1),%rcx 300 negq %rax 301.byte 102,15,56,220,217 302 pxor %xmm0,%xmm5 303 pxor %xmm0,%xmm6 304.byte 102,15,56,220,225 305 pxor %xmm0,%xmm7 306 movups (%rcx,%rax,1),%xmm0 307 addq $16,%rax 308 jmp .Lenc_loop6_enter 309.align 16 310.Lenc_loop6: 311.byte 102,15,56,220,209 312.byte 102,15,56,220,217 313.byte 102,15,56,220,225 314.Lenc_loop6_enter: 315.byte 102,15,56,220,233 316.byte 102,15,56,220,241 317.byte 102,15,56,220,249 318 movups (%rcx,%rax,1),%xmm1 319 addq $32,%rax 320.byte 102,15,56,220,208 321.byte 102,15,56,220,216 322.byte 102,15,56,220,224 323.byte 102,15,56,220,232 324.byte 102,15,56,220,240 325.byte 102,15,56,220,248 326 movups -16(%rcx,%rax,1),%xmm0 327 jnz .Lenc_loop6 328 329.byte 102,15,56,220,209 330.byte 102,15,56,220,217 331.byte 102,15,56,220,225 332.byte 102,15,56,220,233 333.byte 102,15,56,220,241 334.byte 102,15,56,220,249 335.byte 102,15,56,221,208 336.byte 102,15,56,221,216 337.byte 102,15,56,221,224 338.byte 102,15,56,221,232 339.byte 102,15,56,221,240 340.byte 102,15,56,221,248 341 ret 342.cfi_endproc 343.size _aesni_encrypt6,.-_aesni_encrypt6 344.type _aesni_decrypt6,@function 345.align 16 346_aesni_decrypt6: 347.cfi_startproc 348 movups (%rcx),%xmm0 349 shll $4,%eax 350 movups 16(%rcx),%xmm1 351 xorps %xmm0,%xmm2 352 pxor %xmm0,%xmm3 353 pxor %xmm0,%xmm4 354.byte 102,15,56,222,209 355 leaq 32(%rcx,%rax,1),%rcx 356 negq %rax 357.byte 102,15,56,222,217 358 pxor %xmm0,%xmm5 359 pxor %xmm0,%xmm6 360.byte 102,15,56,222,225 361 pxor %xmm0,%xmm7 362 movups (%rcx,%rax,1),%xmm0 363 addq $16,%rax 364 jmp .Ldec_loop6_enter 365.align 16 366.Ldec_loop6: 367.byte 102,15,56,222,209 368.byte 102,15,56,222,217 369.byte 102,15,56,222,225 370.Ldec_loop6_enter: 371.byte 102,15,56,222,233 372.byte 102,15,56,222,241 373.byte 102,15,56,222,249 374 movups (%rcx,%rax,1),%xmm1 375 addq $32,%rax 376.byte 102,15,56,222,208 377.byte 102,15,56,222,216 378.byte 102,15,56,222,224 379.byte 102,15,56,222,232 380.byte 102,15,56,222,240 381.byte 102,15,56,222,248 382 movups -16(%rcx,%rax,1),%xmm0 383 jnz .Ldec_loop6 384 385.byte 102,15,56,222,209 386.byte 102,15,56,222,217 387.byte 102,15,56,222,225 388.byte 102,15,56,222,233 389.byte 102,15,56,222,241 390.byte 102,15,56,222,249 391.byte 102,15,56,223,208 392.byte 102,15,56,223,216 393.byte 102,15,56,223,224 394.byte 102,15,56,223,232 395.byte 102,15,56,223,240 396.byte 102,15,56,223,248 397 ret 398.cfi_endproc 399.size _aesni_decrypt6,.-_aesni_decrypt6 400.type _aesni_encrypt8,@function 401.align 16 402_aesni_encrypt8: 403.cfi_startproc 404 movups (%rcx),%xmm0 405 shll $4,%eax 406 movups 16(%rcx),%xmm1 407 xorps %xmm0,%xmm2 408 xorps %xmm0,%xmm3 409 pxor %xmm0,%xmm4 410 pxor %xmm0,%xmm5 411 pxor %xmm0,%xmm6 412 leaq 32(%rcx,%rax,1),%rcx 413 negq %rax 414.byte 102,15,56,220,209 415 pxor %xmm0,%xmm7 416 pxor %xmm0,%xmm8 417.byte 102,15,56,220,217 418 pxor %xmm0,%xmm9 419 movups (%rcx,%rax,1),%xmm0 420 addq $16,%rax 421 jmp .Lenc_loop8_inner 422.align 16 423.Lenc_loop8: 424.byte 102,15,56,220,209 425.byte 102,15,56,220,217 426.Lenc_loop8_inner: 427.byte 102,15,56,220,225 428.byte 102,15,56,220,233 429.byte 102,15,56,220,241 430.byte 102,15,56,220,249 431.byte 102,68,15,56,220,193 432.byte 102,68,15,56,220,201 433.Lenc_loop8_enter: 434 movups (%rcx,%rax,1),%xmm1 435 addq $32,%rax 436.byte 102,15,56,220,208 437.byte 102,15,56,220,216 438.byte 102,15,56,220,224 439.byte 102,15,56,220,232 440.byte 102,15,56,220,240 441.byte 102,15,56,220,248 442.byte 102,68,15,56,220,192 443.byte 102,68,15,56,220,200 444 movups -16(%rcx,%rax,1),%xmm0 445 jnz .Lenc_loop8 446 447.byte 102,15,56,220,209 448.byte 102,15,56,220,217 449.byte 102,15,56,220,225 450.byte 102,15,56,220,233 451.byte 102,15,56,220,241 452.byte 102,15,56,220,249 453.byte 102,68,15,56,220,193 454.byte 102,68,15,56,220,201 455.byte 102,15,56,221,208 456.byte 102,15,56,221,216 457.byte 102,15,56,221,224 458.byte 102,15,56,221,232 459.byte 102,15,56,221,240 460.byte 102,15,56,221,248 461.byte 102,68,15,56,221,192 462.byte 102,68,15,56,221,200 463 ret 464.cfi_endproc 465.size _aesni_encrypt8,.-_aesni_encrypt8 466.type _aesni_decrypt8,@function 467.align 16 468_aesni_decrypt8: 469.cfi_startproc 470 movups (%rcx),%xmm0 471 shll $4,%eax 472 movups 16(%rcx),%xmm1 473 xorps %xmm0,%xmm2 474 xorps %xmm0,%xmm3 475 pxor %xmm0,%xmm4 476 pxor %xmm0,%xmm5 477 pxor %xmm0,%xmm6 478 leaq 32(%rcx,%rax,1),%rcx 479 negq %rax 480.byte 102,15,56,222,209 481 pxor %xmm0,%xmm7 482 pxor %xmm0,%xmm8 483.byte 102,15,56,222,217 484 pxor %xmm0,%xmm9 485 movups (%rcx,%rax,1),%xmm0 486 addq $16,%rax 487 jmp .Ldec_loop8_inner 488.align 16 489.Ldec_loop8: 490.byte 102,15,56,222,209 491.byte 102,15,56,222,217 492.Ldec_loop8_inner: 493.byte 102,15,56,222,225 494.byte 102,15,56,222,233 495.byte 102,15,56,222,241 496.byte 102,15,56,222,249 497.byte 102,68,15,56,222,193 498.byte 102,68,15,56,222,201 499.Ldec_loop8_enter: 500 movups (%rcx,%rax,1),%xmm1 501 addq $32,%rax 502.byte 102,15,56,222,208 503.byte 102,15,56,222,216 504.byte 102,15,56,222,224 505.byte 102,15,56,222,232 506.byte 102,15,56,222,240 507.byte 102,15,56,222,248 508.byte 102,68,15,56,222,192 509.byte 102,68,15,56,222,200 510 movups -16(%rcx,%rax,1),%xmm0 511 jnz .Ldec_loop8 512 513.byte 102,15,56,222,209 514.byte 102,15,56,222,217 515.byte 102,15,56,222,225 516.byte 102,15,56,222,233 517.byte 102,15,56,222,241 518.byte 102,15,56,222,249 519.byte 102,68,15,56,222,193 520.byte 102,68,15,56,222,201 521.byte 102,15,56,223,208 522.byte 102,15,56,223,216 523.byte 102,15,56,223,224 524.byte 102,15,56,223,232 525.byte 102,15,56,223,240 526.byte 102,15,56,223,248 527.byte 102,68,15,56,223,192 528.byte 102,68,15,56,223,200 529 ret 530.cfi_endproc 531.size _aesni_decrypt8,.-_aesni_decrypt8 532.globl aes_hw_ecb_encrypt 533.hidden aes_hw_ecb_encrypt 534.type aes_hw_ecb_encrypt,@function 535.align 16 536aes_hw_ecb_encrypt: 537.cfi_startproc 538_CET_ENDBR 539 andq $-16,%rdx 540 jz .Lecb_ret 541 542 movl 240(%rcx),%eax 543 movups (%rcx),%xmm0 544 movq %rcx,%r11 545 movl %eax,%r10d 546 testl %r8d,%r8d 547 jz .Lecb_decrypt 548 549 cmpq $0x80,%rdx 550 jb .Lecb_enc_tail 551 552 movdqu (%rdi),%xmm2 553 movdqu 16(%rdi),%xmm3 554 movdqu 32(%rdi),%xmm4 555 movdqu 48(%rdi),%xmm5 556 movdqu 64(%rdi),%xmm6 557 movdqu 80(%rdi),%xmm7 558 movdqu 96(%rdi),%xmm8 559 movdqu 112(%rdi),%xmm9 560 leaq 128(%rdi),%rdi 561 subq $0x80,%rdx 562 jmp .Lecb_enc_loop8_enter 563.align 16 564.Lecb_enc_loop8: 565 movups %xmm2,(%rsi) 566 movq %r11,%rcx 567 movdqu (%rdi),%xmm2 568 movl %r10d,%eax 569 movups %xmm3,16(%rsi) 570 movdqu 16(%rdi),%xmm3 571 movups %xmm4,32(%rsi) 572 movdqu 32(%rdi),%xmm4 573 movups %xmm5,48(%rsi) 574 movdqu 48(%rdi),%xmm5 575 movups %xmm6,64(%rsi) 576 movdqu 64(%rdi),%xmm6 577 movups %xmm7,80(%rsi) 578 movdqu 80(%rdi),%xmm7 579 movups %xmm8,96(%rsi) 580 movdqu 96(%rdi),%xmm8 581 movups %xmm9,112(%rsi) 582 leaq 128(%rsi),%rsi 583 movdqu 112(%rdi),%xmm9 584 leaq 128(%rdi),%rdi 585.Lecb_enc_loop8_enter: 586 587 call _aesni_encrypt8 588 589 subq $0x80,%rdx 590 jnc .Lecb_enc_loop8 591 592 movups %xmm2,(%rsi) 593 movq %r11,%rcx 594 movups %xmm3,16(%rsi) 595 movl %r10d,%eax 596 movups %xmm4,32(%rsi) 597 movups %xmm5,48(%rsi) 598 movups %xmm6,64(%rsi) 599 movups %xmm7,80(%rsi) 600 movups %xmm8,96(%rsi) 601 movups %xmm9,112(%rsi) 602 leaq 128(%rsi),%rsi 603 addq $0x80,%rdx 604 jz .Lecb_ret 605 606.Lecb_enc_tail: 607 movups (%rdi),%xmm2 608 cmpq $0x20,%rdx 609 jb .Lecb_enc_one 610 movups 16(%rdi),%xmm3 611 je .Lecb_enc_two 612 movups 32(%rdi),%xmm4 613 cmpq $0x40,%rdx 614 jb .Lecb_enc_three 615 movups 48(%rdi),%xmm5 616 je .Lecb_enc_four 617 movups 64(%rdi),%xmm6 618 cmpq $0x60,%rdx 619 jb .Lecb_enc_five 620 movups 80(%rdi),%xmm7 621 je .Lecb_enc_six 622 movdqu 96(%rdi),%xmm8 623 xorps %xmm9,%xmm9 624 call _aesni_encrypt8 625 movups %xmm2,(%rsi) 626 movups %xmm3,16(%rsi) 627 movups %xmm4,32(%rsi) 628 movups %xmm5,48(%rsi) 629 movups %xmm6,64(%rsi) 630 movups %xmm7,80(%rsi) 631 movups %xmm8,96(%rsi) 632 jmp .Lecb_ret 633.align 16 634.Lecb_enc_one: 635 movups (%rcx),%xmm0 636 movups 16(%rcx),%xmm1 637 leaq 32(%rcx),%rcx 638 xorps %xmm0,%xmm2 639.Loop_enc1_3: 640.byte 102,15,56,220,209 641 decl %eax 642 movups (%rcx),%xmm1 643 leaq 16(%rcx),%rcx 644 jnz .Loop_enc1_3 645.byte 102,15,56,221,209 646 movups %xmm2,(%rsi) 647 jmp .Lecb_ret 648.align 16 649.Lecb_enc_two: 650 call _aesni_encrypt2 651 movups %xmm2,(%rsi) 652 movups %xmm3,16(%rsi) 653 jmp .Lecb_ret 654.align 16 655.Lecb_enc_three: 656 call _aesni_encrypt3 657 movups %xmm2,(%rsi) 658 movups %xmm3,16(%rsi) 659 movups %xmm4,32(%rsi) 660 jmp .Lecb_ret 661.align 16 662.Lecb_enc_four: 663 call _aesni_encrypt4 664 movups %xmm2,(%rsi) 665 movups %xmm3,16(%rsi) 666 movups %xmm4,32(%rsi) 667 movups %xmm5,48(%rsi) 668 jmp .Lecb_ret 669.align 16 670.Lecb_enc_five: 671 xorps %xmm7,%xmm7 672 call _aesni_encrypt6 673 movups %xmm2,(%rsi) 674 movups %xmm3,16(%rsi) 675 movups %xmm4,32(%rsi) 676 movups %xmm5,48(%rsi) 677 movups %xmm6,64(%rsi) 678 jmp .Lecb_ret 679.align 16 680.Lecb_enc_six: 681 call _aesni_encrypt6 682 movups %xmm2,(%rsi) 683 movups %xmm3,16(%rsi) 684 movups %xmm4,32(%rsi) 685 movups %xmm5,48(%rsi) 686 movups %xmm6,64(%rsi) 687 movups %xmm7,80(%rsi) 688 jmp .Lecb_ret 689 690.align 16 691.Lecb_decrypt: 692 cmpq $0x80,%rdx 693 jb .Lecb_dec_tail 694 695 movdqu (%rdi),%xmm2 696 movdqu 16(%rdi),%xmm3 697 movdqu 32(%rdi),%xmm4 698 movdqu 48(%rdi),%xmm5 699 movdqu 64(%rdi),%xmm6 700 movdqu 80(%rdi),%xmm7 701 movdqu 96(%rdi),%xmm8 702 movdqu 112(%rdi),%xmm9 703 leaq 128(%rdi),%rdi 704 subq $0x80,%rdx 705 jmp .Lecb_dec_loop8_enter 706.align 16 707.Lecb_dec_loop8: 708 movups %xmm2,(%rsi) 709 movq %r11,%rcx 710 movdqu (%rdi),%xmm2 711 movl %r10d,%eax 712 movups %xmm3,16(%rsi) 713 movdqu 16(%rdi),%xmm3 714 movups %xmm4,32(%rsi) 715 movdqu 32(%rdi),%xmm4 716 movups %xmm5,48(%rsi) 717 movdqu 48(%rdi),%xmm5 718 movups %xmm6,64(%rsi) 719 movdqu 64(%rdi),%xmm6 720 movups %xmm7,80(%rsi) 721 movdqu 80(%rdi),%xmm7 722 movups %xmm8,96(%rsi) 723 movdqu 96(%rdi),%xmm8 724 movups %xmm9,112(%rsi) 725 leaq 128(%rsi),%rsi 726 movdqu 112(%rdi),%xmm9 727 leaq 128(%rdi),%rdi 728.Lecb_dec_loop8_enter: 729 730 call _aesni_decrypt8 731 732 movups (%r11),%xmm0 733 subq $0x80,%rdx 734 jnc .Lecb_dec_loop8 735 736 movups %xmm2,(%rsi) 737 pxor %xmm2,%xmm2 738 movq %r11,%rcx 739 movups %xmm3,16(%rsi) 740 pxor %xmm3,%xmm3 741 movl %r10d,%eax 742 movups %xmm4,32(%rsi) 743 pxor %xmm4,%xmm4 744 movups %xmm5,48(%rsi) 745 pxor %xmm5,%xmm5 746 movups %xmm6,64(%rsi) 747 pxor %xmm6,%xmm6 748 movups %xmm7,80(%rsi) 749 pxor %xmm7,%xmm7 750 movups %xmm8,96(%rsi) 751 pxor %xmm8,%xmm8 752 movups %xmm9,112(%rsi) 753 pxor %xmm9,%xmm9 754 leaq 128(%rsi),%rsi 755 addq $0x80,%rdx 756 jz .Lecb_ret 757 758.Lecb_dec_tail: 759 movups (%rdi),%xmm2 760 cmpq $0x20,%rdx 761 jb .Lecb_dec_one 762 movups 16(%rdi),%xmm3 763 je .Lecb_dec_two 764 movups 32(%rdi),%xmm4 765 cmpq $0x40,%rdx 766 jb .Lecb_dec_three 767 movups 48(%rdi),%xmm5 768 je .Lecb_dec_four 769 movups 64(%rdi),%xmm6 770 cmpq $0x60,%rdx 771 jb .Lecb_dec_five 772 movups 80(%rdi),%xmm7 773 je .Lecb_dec_six 774 movups 96(%rdi),%xmm8 775 movups (%rcx),%xmm0 776 xorps %xmm9,%xmm9 777 call _aesni_decrypt8 778 movups %xmm2,(%rsi) 779 pxor %xmm2,%xmm2 780 movups %xmm3,16(%rsi) 781 pxor %xmm3,%xmm3 782 movups %xmm4,32(%rsi) 783 pxor %xmm4,%xmm4 784 movups %xmm5,48(%rsi) 785 pxor %xmm5,%xmm5 786 movups %xmm6,64(%rsi) 787 pxor %xmm6,%xmm6 788 movups %xmm7,80(%rsi) 789 pxor %xmm7,%xmm7 790 movups %xmm8,96(%rsi) 791 pxor %xmm8,%xmm8 792 pxor %xmm9,%xmm9 793 jmp .Lecb_ret 794.align 16 795.Lecb_dec_one: 796 movups (%rcx),%xmm0 797 movups 16(%rcx),%xmm1 798 leaq 32(%rcx),%rcx 799 xorps %xmm0,%xmm2 800.Loop_dec1_4: 801.byte 102,15,56,222,209 802 decl %eax 803 movups (%rcx),%xmm1 804 leaq 16(%rcx),%rcx 805 jnz .Loop_dec1_4 806.byte 102,15,56,223,209 807 movups %xmm2,(%rsi) 808 pxor %xmm2,%xmm2 809 jmp .Lecb_ret 810.align 16 811.Lecb_dec_two: 812 call _aesni_decrypt2 813 movups %xmm2,(%rsi) 814 pxor %xmm2,%xmm2 815 movups %xmm3,16(%rsi) 816 pxor %xmm3,%xmm3 817 jmp .Lecb_ret 818.align 16 819.Lecb_dec_three: 820 call _aesni_decrypt3 821 movups %xmm2,(%rsi) 822 pxor %xmm2,%xmm2 823 movups %xmm3,16(%rsi) 824 pxor %xmm3,%xmm3 825 movups %xmm4,32(%rsi) 826 pxor %xmm4,%xmm4 827 jmp .Lecb_ret 828.align 16 829.Lecb_dec_four: 830 call _aesni_decrypt4 831 movups %xmm2,(%rsi) 832 pxor %xmm2,%xmm2 833 movups %xmm3,16(%rsi) 834 pxor %xmm3,%xmm3 835 movups %xmm4,32(%rsi) 836 pxor %xmm4,%xmm4 837 movups %xmm5,48(%rsi) 838 pxor %xmm5,%xmm5 839 jmp .Lecb_ret 840.align 16 841.Lecb_dec_five: 842 xorps %xmm7,%xmm7 843 call _aesni_decrypt6 844 movups %xmm2,(%rsi) 845 pxor %xmm2,%xmm2 846 movups %xmm3,16(%rsi) 847 pxor %xmm3,%xmm3 848 movups %xmm4,32(%rsi) 849 pxor %xmm4,%xmm4 850 movups %xmm5,48(%rsi) 851 pxor %xmm5,%xmm5 852 movups %xmm6,64(%rsi) 853 pxor %xmm6,%xmm6 854 pxor %xmm7,%xmm7 855 jmp .Lecb_ret 856.align 16 857.Lecb_dec_six: 858 call _aesni_decrypt6 859 movups %xmm2,(%rsi) 860 pxor %xmm2,%xmm2 861 movups %xmm3,16(%rsi) 862 pxor %xmm3,%xmm3 863 movups %xmm4,32(%rsi) 864 pxor %xmm4,%xmm4 865 movups %xmm5,48(%rsi) 866 pxor %xmm5,%xmm5 867 movups %xmm6,64(%rsi) 868 pxor %xmm6,%xmm6 869 movups %xmm7,80(%rsi) 870 pxor %xmm7,%xmm7 871 872.Lecb_ret: 873 xorps %xmm0,%xmm0 874 pxor %xmm1,%xmm1 875 ret 876.cfi_endproc 877.size aes_hw_ecb_encrypt,.-aes_hw_ecb_encrypt 878.globl aes_hw_ctr32_encrypt_blocks 879.hidden aes_hw_ctr32_encrypt_blocks 880.type aes_hw_ctr32_encrypt_blocks,@function 881.align 16 882aes_hw_ctr32_encrypt_blocks: 883.cfi_startproc 884_CET_ENDBR 885#ifdef BORINGSSL_DISPATCH_TEST 886 movb $1,BORINGSSL_function_hit(%rip) 887#endif 888 cmpq $1,%rdx 889 jne .Lctr32_bulk 890 891 892 893 movups (%r8),%xmm2 894 movups (%rdi),%xmm3 895 movl 240(%rcx),%edx 896 movups (%rcx),%xmm0 897 movups 16(%rcx),%xmm1 898 leaq 32(%rcx),%rcx 899 xorps %xmm0,%xmm2 900.Loop_enc1_5: 901.byte 102,15,56,220,209 902 decl %edx 903 movups (%rcx),%xmm1 904 leaq 16(%rcx),%rcx 905 jnz .Loop_enc1_5 906.byte 102,15,56,221,209 907 pxor %xmm0,%xmm0 908 pxor %xmm1,%xmm1 909 xorps %xmm3,%xmm2 910 pxor %xmm3,%xmm3 911 movups %xmm2,(%rsi) 912 xorps %xmm2,%xmm2 913 jmp .Lctr32_epilogue 914 915.align 16 916.Lctr32_bulk: 917 leaq (%rsp),%r11 918.cfi_def_cfa_register %r11 919 pushq %rbp 920.cfi_offset %rbp,-16 921 subq $128,%rsp 922 andq $-16,%rsp 923 924 925 926 927 movdqu (%r8),%xmm2 928 movdqu (%rcx),%xmm0 929 movl 12(%r8),%r8d 930 pxor %xmm0,%xmm2 931 movl 12(%rcx),%ebp 932 movdqa %xmm2,0(%rsp) 933 bswapl %r8d 934 movdqa %xmm2,%xmm3 935 movdqa %xmm2,%xmm4 936 movdqa %xmm2,%xmm5 937 movdqa %xmm2,64(%rsp) 938 movdqa %xmm2,80(%rsp) 939 movdqa %xmm2,96(%rsp) 940 movq %rdx,%r10 941 movdqa %xmm2,112(%rsp) 942 943 leaq 1(%r8),%rax 944 leaq 2(%r8),%rdx 945 bswapl %eax 946 bswapl %edx 947 xorl %ebp,%eax 948 xorl %ebp,%edx 949.byte 102,15,58,34,216,3 950 leaq 3(%r8),%rax 951 movdqa %xmm3,16(%rsp) 952.byte 102,15,58,34,226,3 953 bswapl %eax 954 movq %r10,%rdx 955 leaq 4(%r8),%r10 956 movdqa %xmm4,32(%rsp) 957 xorl %ebp,%eax 958 bswapl %r10d 959.byte 102,15,58,34,232,3 960 xorl %ebp,%r10d 961 movdqa %xmm5,48(%rsp) 962 leaq 5(%r8),%r9 963 movl %r10d,64+12(%rsp) 964 bswapl %r9d 965 leaq 6(%r8),%r10 966 movl 240(%rcx),%eax 967 xorl %ebp,%r9d 968 bswapl %r10d 969 movl %r9d,80+12(%rsp) 970 xorl %ebp,%r10d 971 leaq 7(%r8),%r9 972 movl %r10d,96+12(%rsp) 973 bswapl %r9d 974 xorl %ebp,%r9d 975 movl %r9d,112+12(%rsp) 976 977 movups 16(%rcx),%xmm1 978 979 movdqa 64(%rsp),%xmm6 980 movdqa 80(%rsp),%xmm7 981 982 cmpq $8,%rdx 983 jb .Lctr32_tail 984 985 leaq 128(%rcx),%rcx 986 subq $8,%rdx 987 jmp .Lctr32_loop8 988 989.align 32 990.Lctr32_loop8: 991 addl $8,%r8d 992 movdqa 96(%rsp),%xmm8 993.byte 102,15,56,220,209 994 movl %r8d,%r9d 995 movdqa 112(%rsp),%xmm9 996.byte 102,15,56,220,217 997 bswapl %r9d 998 movups 32-128(%rcx),%xmm0 999.byte 102,15,56,220,225 1000 xorl %ebp,%r9d 1001 nop 1002.byte 102,15,56,220,233 1003 movl %r9d,0+12(%rsp) 1004 leaq 1(%r8),%r9 1005.byte 102,15,56,220,241 1006.byte 102,15,56,220,249 1007.byte 102,68,15,56,220,193 1008.byte 102,68,15,56,220,201 1009 movups 48-128(%rcx),%xmm1 1010 bswapl %r9d 1011.byte 102,15,56,220,208 1012.byte 102,15,56,220,216 1013 xorl %ebp,%r9d 1014.byte 0x66,0x90 1015.byte 102,15,56,220,224 1016.byte 102,15,56,220,232 1017 movl %r9d,16+12(%rsp) 1018 leaq 2(%r8),%r9 1019.byte 102,15,56,220,240 1020.byte 102,15,56,220,248 1021.byte 102,68,15,56,220,192 1022.byte 102,68,15,56,220,200 1023 movups 64-128(%rcx),%xmm0 1024 bswapl %r9d 1025.byte 102,15,56,220,209 1026.byte 102,15,56,220,217 1027 xorl %ebp,%r9d 1028.byte 0x66,0x90 1029.byte 102,15,56,220,225 1030.byte 102,15,56,220,233 1031 movl %r9d,32+12(%rsp) 1032 leaq 3(%r8),%r9 1033.byte 102,15,56,220,241 1034.byte 102,15,56,220,249 1035.byte 102,68,15,56,220,193 1036.byte 102,68,15,56,220,201 1037 movups 80-128(%rcx),%xmm1 1038 bswapl %r9d 1039.byte 102,15,56,220,208 1040.byte 102,15,56,220,216 1041 xorl %ebp,%r9d 1042.byte 0x66,0x90 1043.byte 102,15,56,220,224 1044.byte 102,15,56,220,232 1045 movl %r9d,48+12(%rsp) 1046 leaq 4(%r8),%r9 1047.byte 102,15,56,220,240 1048.byte 102,15,56,220,248 1049.byte 102,68,15,56,220,192 1050.byte 102,68,15,56,220,200 1051 movups 96-128(%rcx),%xmm0 1052 bswapl %r9d 1053.byte 102,15,56,220,209 1054.byte 102,15,56,220,217 1055 xorl %ebp,%r9d 1056.byte 0x66,0x90 1057.byte 102,15,56,220,225 1058.byte 102,15,56,220,233 1059 movl %r9d,64+12(%rsp) 1060 leaq 5(%r8),%r9 1061.byte 102,15,56,220,241 1062.byte 102,15,56,220,249 1063.byte 102,68,15,56,220,193 1064.byte 102,68,15,56,220,201 1065 movups 112-128(%rcx),%xmm1 1066 bswapl %r9d 1067.byte 102,15,56,220,208 1068.byte 102,15,56,220,216 1069 xorl %ebp,%r9d 1070.byte 0x66,0x90 1071.byte 102,15,56,220,224 1072.byte 102,15,56,220,232 1073 movl %r9d,80+12(%rsp) 1074 leaq 6(%r8),%r9 1075.byte 102,15,56,220,240 1076.byte 102,15,56,220,248 1077.byte 102,68,15,56,220,192 1078.byte 102,68,15,56,220,200 1079 movups 128-128(%rcx),%xmm0 1080 bswapl %r9d 1081.byte 102,15,56,220,209 1082.byte 102,15,56,220,217 1083 xorl %ebp,%r9d 1084.byte 0x66,0x90 1085.byte 102,15,56,220,225 1086.byte 102,15,56,220,233 1087 movl %r9d,96+12(%rsp) 1088 leaq 7(%r8),%r9 1089.byte 102,15,56,220,241 1090.byte 102,15,56,220,249 1091.byte 102,68,15,56,220,193 1092.byte 102,68,15,56,220,201 1093 movups 144-128(%rcx),%xmm1 1094 bswapl %r9d 1095.byte 102,15,56,220,208 1096.byte 102,15,56,220,216 1097.byte 102,15,56,220,224 1098 xorl %ebp,%r9d 1099 movdqu 0(%rdi),%xmm10 1100.byte 102,15,56,220,232 1101 movl %r9d,112+12(%rsp) 1102 cmpl $11,%eax 1103.byte 102,15,56,220,240 1104.byte 102,15,56,220,248 1105.byte 102,68,15,56,220,192 1106.byte 102,68,15,56,220,200 1107 movups 160-128(%rcx),%xmm0 1108 1109 jb .Lctr32_enc_done 1110 1111.byte 102,15,56,220,209 1112.byte 102,15,56,220,217 1113.byte 102,15,56,220,225 1114.byte 102,15,56,220,233 1115.byte 102,15,56,220,241 1116.byte 102,15,56,220,249 1117.byte 102,68,15,56,220,193 1118.byte 102,68,15,56,220,201 1119 movups 176-128(%rcx),%xmm1 1120 1121.byte 102,15,56,220,208 1122.byte 102,15,56,220,216 1123.byte 102,15,56,220,224 1124.byte 102,15,56,220,232 1125.byte 102,15,56,220,240 1126.byte 102,15,56,220,248 1127.byte 102,68,15,56,220,192 1128.byte 102,68,15,56,220,200 1129 movups 192-128(%rcx),%xmm0 1130 je .Lctr32_enc_done 1131 1132.byte 102,15,56,220,209 1133.byte 102,15,56,220,217 1134.byte 102,15,56,220,225 1135.byte 102,15,56,220,233 1136.byte 102,15,56,220,241 1137.byte 102,15,56,220,249 1138.byte 102,68,15,56,220,193 1139.byte 102,68,15,56,220,201 1140 movups 208-128(%rcx),%xmm1 1141 1142.byte 102,15,56,220,208 1143.byte 102,15,56,220,216 1144.byte 102,15,56,220,224 1145.byte 102,15,56,220,232 1146.byte 102,15,56,220,240 1147.byte 102,15,56,220,248 1148.byte 102,68,15,56,220,192 1149.byte 102,68,15,56,220,200 1150 movups 224-128(%rcx),%xmm0 1151 jmp .Lctr32_enc_done 1152 1153.align 16 1154.Lctr32_enc_done: 1155 movdqu 16(%rdi),%xmm11 1156 pxor %xmm0,%xmm10 1157 movdqu 32(%rdi),%xmm12 1158 pxor %xmm0,%xmm11 1159 movdqu 48(%rdi),%xmm13 1160 pxor %xmm0,%xmm12 1161 movdqu 64(%rdi),%xmm14 1162 pxor %xmm0,%xmm13 1163 movdqu 80(%rdi),%xmm15 1164 pxor %xmm0,%xmm14 1165 prefetcht0 448(%rdi) 1166 prefetcht0 512(%rdi) 1167 pxor %xmm0,%xmm15 1168.byte 102,15,56,220,209 1169.byte 102,15,56,220,217 1170.byte 102,15,56,220,225 1171.byte 102,15,56,220,233 1172.byte 102,15,56,220,241 1173.byte 102,15,56,220,249 1174.byte 102,68,15,56,220,193 1175.byte 102,68,15,56,220,201 1176 movdqu 96(%rdi),%xmm1 1177 leaq 128(%rdi),%rdi 1178 1179.byte 102,65,15,56,221,210 1180 pxor %xmm0,%xmm1 1181 movdqu 112-128(%rdi),%xmm10 1182.byte 102,65,15,56,221,219 1183 pxor %xmm0,%xmm10 1184 movdqa 0(%rsp),%xmm11 1185.byte 102,65,15,56,221,228 1186.byte 102,65,15,56,221,237 1187 movdqa 16(%rsp),%xmm12 1188 movdqa 32(%rsp),%xmm13 1189.byte 102,65,15,56,221,246 1190.byte 102,65,15,56,221,255 1191 movdqa 48(%rsp),%xmm14 1192 movdqa 64(%rsp),%xmm15 1193.byte 102,68,15,56,221,193 1194 movdqa 80(%rsp),%xmm0 1195 movups 16-128(%rcx),%xmm1 1196.byte 102,69,15,56,221,202 1197 1198 movups %xmm2,(%rsi) 1199 movdqa %xmm11,%xmm2 1200 movups %xmm3,16(%rsi) 1201 movdqa %xmm12,%xmm3 1202 movups %xmm4,32(%rsi) 1203 movdqa %xmm13,%xmm4 1204 movups %xmm5,48(%rsi) 1205 movdqa %xmm14,%xmm5 1206 movups %xmm6,64(%rsi) 1207 movdqa %xmm15,%xmm6 1208 movups %xmm7,80(%rsi) 1209 movdqa %xmm0,%xmm7 1210 movups %xmm8,96(%rsi) 1211 movups %xmm9,112(%rsi) 1212 leaq 128(%rsi),%rsi 1213 1214 subq $8,%rdx 1215 jnc .Lctr32_loop8 1216 1217 addq $8,%rdx 1218 jz .Lctr32_done 1219 leaq -128(%rcx),%rcx 1220 1221.Lctr32_tail: 1222 1223 1224 leaq 16(%rcx),%rcx 1225 cmpq $4,%rdx 1226 jb .Lctr32_loop3 1227 je .Lctr32_loop4 1228 1229 1230 shll $4,%eax 1231 movdqa 96(%rsp),%xmm8 1232 pxor %xmm9,%xmm9 1233 1234 movups 16(%rcx),%xmm0 1235.byte 102,15,56,220,209 1236.byte 102,15,56,220,217 1237 leaq 32-16(%rcx,%rax,1),%rcx 1238 negq %rax 1239.byte 102,15,56,220,225 1240 addq $16,%rax 1241 movups (%rdi),%xmm10 1242.byte 102,15,56,220,233 1243.byte 102,15,56,220,241 1244 movups 16(%rdi),%xmm11 1245 movups 32(%rdi),%xmm12 1246.byte 102,15,56,220,249 1247.byte 102,68,15,56,220,193 1248 1249 call .Lenc_loop8_enter 1250 1251 movdqu 48(%rdi),%xmm13 1252 pxor %xmm10,%xmm2 1253 movdqu 64(%rdi),%xmm10 1254 pxor %xmm11,%xmm3 1255 movdqu %xmm2,(%rsi) 1256 pxor %xmm12,%xmm4 1257 movdqu %xmm3,16(%rsi) 1258 pxor %xmm13,%xmm5 1259 movdqu %xmm4,32(%rsi) 1260 pxor %xmm10,%xmm6 1261 movdqu %xmm5,48(%rsi) 1262 movdqu %xmm6,64(%rsi) 1263 cmpq $6,%rdx 1264 jb .Lctr32_done 1265 1266 movups 80(%rdi),%xmm11 1267 xorps %xmm11,%xmm7 1268 movups %xmm7,80(%rsi) 1269 je .Lctr32_done 1270 1271 movups 96(%rdi),%xmm12 1272 xorps %xmm12,%xmm8 1273 movups %xmm8,96(%rsi) 1274 jmp .Lctr32_done 1275 1276.align 32 1277.Lctr32_loop4: 1278.byte 102,15,56,220,209 1279 leaq 16(%rcx),%rcx 1280 decl %eax 1281.byte 102,15,56,220,217 1282.byte 102,15,56,220,225 1283.byte 102,15,56,220,233 1284 movups (%rcx),%xmm1 1285 jnz .Lctr32_loop4 1286.byte 102,15,56,221,209 1287.byte 102,15,56,221,217 1288 movups (%rdi),%xmm10 1289 movups 16(%rdi),%xmm11 1290.byte 102,15,56,221,225 1291.byte 102,15,56,221,233 1292 movups 32(%rdi),%xmm12 1293 movups 48(%rdi),%xmm13 1294 1295 xorps %xmm10,%xmm2 1296 movups %xmm2,(%rsi) 1297 xorps %xmm11,%xmm3 1298 movups %xmm3,16(%rsi) 1299 pxor %xmm12,%xmm4 1300 movdqu %xmm4,32(%rsi) 1301 pxor %xmm13,%xmm5 1302 movdqu %xmm5,48(%rsi) 1303 jmp .Lctr32_done 1304 1305.align 32 1306.Lctr32_loop3: 1307.byte 102,15,56,220,209 1308 leaq 16(%rcx),%rcx 1309 decl %eax 1310.byte 102,15,56,220,217 1311.byte 102,15,56,220,225 1312 movups (%rcx),%xmm1 1313 jnz .Lctr32_loop3 1314.byte 102,15,56,221,209 1315.byte 102,15,56,221,217 1316.byte 102,15,56,221,225 1317 1318 movups (%rdi),%xmm10 1319 xorps %xmm10,%xmm2 1320 movups %xmm2,(%rsi) 1321 cmpq $2,%rdx 1322 jb .Lctr32_done 1323 1324 movups 16(%rdi),%xmm11 1325 xorps %xmm11,%xmm3 1326 movups %xmm3,16(%rsi) 1327 je .Lctr32_done 1328 1329 movups 32(%rdi),%xmm12 1330 xorps %xmm12,%xmm4 1331 movups %xmm4,32(%rsi) 1332 1333.Lctr32_done: 1334 xorps %xmm0,%xmm0 1335 xorl %ebp,%ebp 1336 pxor %xmm1,%xmm1 1337 pxor %xmm2,%xmm2 1338 pxor %xmm3,%xmm3 1339 pxor %xmm4,%xmm4 1340 pxor %xmm5,%xmm5 1341 pxor %xmm6,%xmm6 1342 pxor %xmm7,%xmm7 1343 movaps %xmm0,0(%rsp) 1344 pxor %xmm8,%xmm8 1345 movaps %xmm0,16(%rsp) 1346 pxor %xmm9,%xmm9 1347 movaps %xmm0,32(%rsp) 1348 pxor %xmm10,%xmm10 1349 movaps %xmm0,48(%rsp) 1350 pxor %xmm11,%xmm11 1351 movaps %xmm0,64(%rsp) 1352 pxor %xmm12,%xmm12 1353 movaps %xmm0,80(%rsp) 1354 pxor %xmm13,%xmm13 1355 movaps %xmm0,96(%rsp) 1356 pxor %xmm14,%xmm14 1357 movaps %xmm0,112(%rsp) 1358 pxor %xmm15,%xmm15 1359 movq -8(%r11),%rbp 1360.cfi_restore %rbp 1361 leaq (%r11),%rsp 1362.cfi_def_cfa_register %rsp 1363.Lctr32_epilogue: 1364 ret 1365.cfi_endproc 1366.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks 1367.globl aes_hw_cbc_encrypt 1368.hidden aes_hw_cbc_encrypt 1369.type aes_hw_cbc_encrypt,@function 1370.align 16 1371aes_hw_cbc_encrypt: 1372.cfi_startproc 1373_CET_ENDBR 1374 testq %rdx,%rdx 1375 jz .Lcbc_ret 1376 1377 movl 240(%rcx),%r10d 1378 movq %rcx,%r11 1379 testl %r9d,%r9d 1380 jz .Lcbc_decrypt 1381 1382 movups (%r8),%xmm2 1383 movl %r10d,%eax 1384 cmpq $16,%rdx 1385 jb .Lcbc_enc_tail 1386 subq $16,%rdx 1387 jmp .Lcbc_enc_loop 1388.align 16 1389.Lcbc_enc_loop: 1390 movups (%rdi),%xmm3 1391 leaq 16(%rdi),%rdi 1392 1393 movups (%rcx),%xmm0 1394 movups 16(%rcx),%xmm1 1395 xorps %xmm0,%xmm3 1396 leaq 32(%rcx),%rcx 1397 xorps %xmm3,%xmm2 1398.Loop_enc1_6: 1399.byte 102,15,56,220,209 1400 decl %eax 1401 movups (%rcx),%xmm1 1402 leaq 16(%rcx),%rcx 1403 jnz .Loop_enc1_6 1404.byte 102,15,56,221,209 1405 movl %r10d,%eax 1406 movq %r11,%rcx 1407 movups %xmm2,0(%rsi) 1408 leaq 16(%rsi),%rsi 1409 subq $16,%rdx 1410 jnc .Lcbc_enc_loop 1411 addq $16,%rdx 1412 jnz .Lcbc_enc_tail 1413 pxor %xmm0,%xmm0 1414 pxor %xmm1,%xmm1 1415 movups %xmm2,(%r8) 1416 pxor %xmm2,%xmm2 1417 pxor %xmm3,%xmm3 1418 jmp .Lcbc_ret 1419 1420.Lcbc_enc_tail: 1421 movq %rdx,%rcx 1422 xchgq %rdi,%rsi 1423.long 0x9066A4F3 1424 movl $16,%ecx 1425 subq %rdx,%rcx 1426 xorl %eax,%eax 1427.long 0x9066AAF3 1428 leaq -16(%rdi),%rdi 1429 movl %r10d,%eax 1430 movq %rdi,%rsi 1431 movq %r11,%rcx 1432 xorq %rdx,%rdx 1433 jmp .Lcbc_enc_loop 1434 1435.align 16 1436.Lcbc_decrypt: 1437 cmpq $16,%rdx 1438 jne .Lcbc_decrypt_bulk 1439 1440 1441 1442 movdqu (%rdi),%xmm2 1443 movdqu (%r8),%xmm3 1444 movdqa %xmm2,%xmm4 1445 movups (%rcx),%xmm0 1446 movups 16(%rcx),%xmm1 1447 leaq 32(%rcx),%rcx 1448 xorps %xmm0,%xmm2 1449.Loop_dec1_7: 1450.byte 102,15,56,222,209 1451 decl %r10d 1452 movups (%rcx),%xmm1 1453 leaq 16(%rcx),%rcx 1454 jnz .Loop_dec1_7 1455.byte 102,15,56,223,209 1456 pxor %xmm0,%xmm0 1457 pxor %xmm1,%xmm1 1458 movdqu %xmm4,(%r8) 1459 xorps %xmm3,%xmm2 1460 pxor %xmm3,%xmm3 1461 movups %xmm2,(%rsi) 1462 pxor %xmm2,%xmm2 1463 jmp .Lcbc_ret 1464.align 16 1465.Lcbc_decrypt_bulk: 1466 leaq (%rsp),%r11 1467.cfi_def_cfa_register %r11 1468 pushq %rbp 1469.cfi_offset %rbp,-16 1470 subq $16,%rsp 1471 andq $-16,%rsp 1472 movq %rcx,%rbp 1473 movups (%r8),%xmm10 1474 movl %r10d,%eax 1475 cmpq $0x50,%rdx 1476 jbe .Lcbc_dec_tail 1477 1478 movups (%rcx),%xmm0 1479 movdqu 0(%rdi),%xmm2 1480 movdqu 16(%rdi),%xmm3 1481 movdqa %xmm2,%xmm11 1482 movdqu 32(%rdi),%xmm4 1483 movdqa %xmm3,%xmm12 1484 movdqu 48(%rdi),%xmm5 1485 movdqa %xmm4,%xmm13 1486 movdqu 64(%rdi),%xmm6 1487 movdqa %xmm5,%xmm14 1488 movdqu 80(%rdi),%xmm7 1489 movdqa %xmm6,%xmm15 1490 cmpq $0x70,%rdx 1491 jbe .Lcbc_dec_six_or_seven 1492 1493 subq $0x70,%rdx 1494 leaq 112(%rcx),%rcx 1495 jmp .Lcbc_dec_loop8_enter 1496.align 16 1497.Lcbc_dec_loop8: 1498 movups %xmm9,(%rsi) 1499 leaq 16(%rsi),%rsi 1500.Lcbc_dec_loop8_enter: 1501 movdqu 96(%rdi),%xmm8 1502 pxor %xmm0,%xmm2 1503 movdqu 112(%rdi),%xmm9 1504 pxor %xmm0,%xmm3 1505 movups 16-112(%rcx),%xmm1 1506 pxor %xmm0,%xmm4 1507 movq $-1,%rbp 1508 cmpq $0x70,%rdx 1509 pxor %xmm0,%xmm5 1510 pxor %xmm0,%xmm6 1511 pxor %xmm0,%xmm7 1512 pxor %xmm0,%xmm8 1513 1514.byte 102,15,56,222,209 1515 pxor %xmm0,%xmm9 1516 movups 32-112(%rcx),%xmm0 1517.byte 102,15,56,222,217 1518.byte 102,15,56,222,225 1519.byte 102,15,56,222,233 1520.byte 102,15,56,222,241 1521.byte 102,15,56,222,249 1522.byte 102,68,15,56,222,193 1523 adcq $0,%rbp 1524 andq $128,%rbp 1525.byte 102,68,15,56,222,201 1526 addq %rdi,%rbp 1527 movups 48-112(%rcx),%xmm1 1528.byte 102,15,56,222,208 1529.byte 102,15,56,222,216 1530.byte 102,15,56,222,224 1531.byte 102,15,56,222,232 1532.byte 102,15,56,222,240 1533.byte 102,15,56,222,248 1534.byte 102,68,15,56,222,192 1535.byte 102,68,15,56,222,200 1536 movups 64-112(%rcx),%xmm0 1537 nop 1538.byte 102,15,56,222,209 1539.byte 102,15,56,222,217 1540.byte 102,15,56,222,225 1541.byte 102,15,56,222,233 1542.byte 102,15,56,222,241 1543.byte 102,15,56,222,249 1544.byte 102,68,15,56,222,193 1545.byte 102,68,15,56,222,201 1546 movups 80-112(%rcx),%xmm1 1547 nop 1548.byte 102,15,56,222,208 1549.byte 102,15,56,222,216 1550.byte 102,15,56,222,224 1551.byte 102,15,56,222,232 1552.byte 102,15,56,222,240 1553.byte 102,15,56,222,248 1554.byte 102,68,15,56,222,192 1555.byte 102,68,15,56,222,200 1556 movups 96-112(%rcx),%xmm0 1557 nop 1558.byte 102,15,56,222,209 1559.byte 102,15,56,222,217 1560.byte 102,15,56,222,225 1561.byte 102,15,56,222,233 1562.byte 102,15,56,222,241 1563.byte 102,15,56,222,249 1564.byte 102,68,15,56,222,193 1565.byte 102,68,15,56,222,201 1566 movups 112-112(%rcx),%xmm1 1567 nop 1568.byte 102,15,56,222,208 1569.byte 102,15,56,222,216 1570.byte 102,15,56,222,224 1571.byte 102,15,56,222,232 1572.byte 102,15,56,222,240 1573.byte 102,15,56,222,248 1574.byte 102,68,15,56,222,192 1575.byte 102,68,15,56,222,200 1576 movups 128-112(%rcx),%xmm0 1577 nop 1578.byte 102,15,56,222,209 1579.byte 102,15,56,222,217 1580.byte 102,15,56,222,225 1581.byte 102,15,56,222,233 1582.byte 102,15,56,222,241 1583.byte 102,15,56,222,249 1584.byte 102,68,15,56,222,193 1585.byte 102,68,15,56,222,201 1586 movups 144-112(%rcx),%xmm1 1587 cmpl $11,%eax 1588.byte 102,15,56,222,208 1589.byte 102,15,56,222,216 1590.byte 102,15,56,222,224 1591.byte 102,15,56,222,232 1592.byte 102,15,56,222,240 1593.byte 102,15,56,222,248 1594.byte 102,68,15,56,222,192 1595.byte 102,68,15,56,222,200 1596 movups 160-112(%rcx),%xmm0 1597 jb .Lcbc_dec_done 1598.byte 102,15,56,222,209 1599.byte 102,15,56,222,217 1600.byte 102,15,56,222,225 1601.byte 102,15,56,222,233 1602.byte 102,15,56,222,241 1603.byte 102,15,56,222,249 1604.byte 102,68,15,56,222,193 1605.byte 102,68,15,56,222,201 1606 movups 176-112(%rcx),%xmm1 1607 nop 1608.byte 102,15,56,222,208 1609.byte 102,15,56,222,216 1610.byte 102,15,56,222,224 1611.byte 102,15,56,222,232 1612.byte 102,15,56,222,240 1613.byte 102,15,56,222,248 1614.byte 102,68,15,56,222,192 1615.byte 102,68,15,56,222,200 1616 movups 192-112(%rcx),%xmm0 1617 je .Lcbc_dec_done 1618.byte 102,15,56,222,209 1619.byte 102,15,56,222,217 1620.byte 102,15,56,222,225 1621.byte 102,15,56,222,233 1622.byte 102,15,56,222,241 1623.byte 102,15,56,222,249 1624.byte 102,68,15,56,222,193 1625.byte 102,68,15,56,222,201 1626 movups 208-112(%rcx),%xmm1 1627 nop 1628.byte 102,15,56,222,208 1629.byte 102,15,56,222,216 1630.byte 102,15,56,222,224 1631.byte 102,15,56,222,232 1632.byte 102,15,56,222,240 1633.byte 102,15,56,222,248 1634.byte 102,68,15,56,222,192 1635.byte 102,68,15,56,222,200 1636 movups 224-112(%rcx),%xmm0 1637 jmp .Lcbc_dec_done 1638.align 16 1639.Lcbc_dec_done: 1640.byte 102,15,56,222,209 1641.byte 102,15,56,222,217 1642 pxor %xmm0,%xmm10 1643 pxor %xmm0,%xmm11 1644.byte 102,15,56,222,225 1645.byte 102,15,56,222,233 1646 pxor %xmm0,%xmm12 1647 pxor %xmm0,%xmm13 1648.byte 102,15,56,222,241 1649.byte 102,15,56,222,249 1650 pxor %xmm0,%xmm14 1651 pxor %xmm0,%xmm15 1652.byte 102,68,15,56,222,193 1653.byte 102,68,15,56,222,201 1654 movdqu 80(%rdi),%xmm1 1655 1656.byte 102,65,15,56,223,210 1657 movdqu 96(%rdi),%xmm10 1658 pxor %xmm0,%xmm1 1659.byte 102,65,15,56,223,219 1660 pxor %xmm0,%xmm10 1661 movdqu 112(%rdi),%xmm0 1662.byte 102,65,15,56,223,228 1663 leaq 128(%rdi),%rdi 1664 movdqu 0(%rbp),%xmm11 1665.byte 102,65,15,56,223,237 1666.byte 102,65,15,56,223,246 1667 movdqu 16(%rbp),%xmm12 1668 movdqu 32(%rbp),%xmm13 1669.byte 102,65,15,56,223,255 1670.byte 102,68,15,56,223,193 1671 movdqu 48(%rbp),%xmm14 1672 movdqu 64(%rbp),%xmm15 1673.byte 102,69,15,56,223,202 1674 movdqa %xmm0,%xmm10 1675 movdqu 80(%rbp),%xmm1 1676 movups -112(%rcx),%xmm0 1677 1678 movups %xmm2,(%rsi) 1679 movdqa %xmm11,%xmm2 1680 movups %xmm3,16(%rsi) 1681 movdqa %xmm12,%xmm3 1682 movups %xmm4,32(%rsi) 1683 movdqa %xmm13,%xmm4 1684 movups %xmm5,48(%rsi) 1685 movdqa %xmm14,%xmm5 1686 movups %xmm6,64(%rsi) 1687 movdqa %xmm15,%xmm6 1688 movups %xmm7,80(%rsi) 1689 movdqa %xmm1,%xmm7 1690 movups %xmm8,96(%rsi) 1691 leaq 112(%rsi),%rsi 1692 1693 subq $0x80,%rdx 1694 ja .Lcbc_dec_loop8 1695 1696 movaps %xmm9,%xmm2 1697 leaq -112(%rcx),%rcx 1698 addq $0x70,%rdx 1699 jle .Lcbc_dec_clear_tail_collected 1700 movups %xmm9,(%rsi) 1701 leaq 16(%rsi),%rsi 1702 cmpq $0x50,%rdx 1703 jbe .Lcbc_dec_tail 1704 1705 movaps %xmm11,%xmm2 1706.Lcbc_dec_six_or_seven: 1707 cmpq $0x60,%rdx 1708 ja .Lcbc_dec_seven 1709 1710 movaps %xmm7,%xmm8 1711 call _aesni_decrypt6 1712 pxor %xmm10,%xmm2 1713 movaps %xmm8,%xmm10 1714 pxor %xmm11,%xmm3 1715 movdqu %xmm2,(%rsi) 1716 pxor %xmm12,%xmm4 1717 movdqu %xmm3,16(%rsi) 1718 pxor %xmm3,%xmm3 1719 pxor %xmm13,%xmm5 1720 movdqu %xmm4,32(%rsi) 1721 pxor %xmm4,%xmm4 1722 pxor %xmm14,%xmm6 1723 movdqu %xmm5,48(%rsi) 1724 pxor %xmm5,%xmm5 1725 pxor %xmm15,%xmm7 1726 movdqu %xmm6,64(%rsi) 1727 pxor %xmm6,%xmm6 1728 leaq 80(%rsi),%rsi 1729 movdqa %xmm7,%xmm2 1730 pxor %xmm7,%xmm7 1731 jmp .Lcbc_dec_tail_collected 1732 1733.align 16 1734.Lcbc_dec_seven: 1735 movups 96(%rdi),%xmm8 1736 xorps %xmm9,%xmm9 1737 call _aesni_decrypt8 1738 movups 80(%rdi),%xmm9 1739 pxor %xmm10,%xmm2 1740 movups 96(%rdi),%xmm10 1741 pxor %xmm11,%xmm3 1742 movdqu %xmm2,(%rsi) 1743 pxor %xmm12,%xmm4 1744 movdqu %xmm3,16(%rsi) 1745 pxor %xmm3,%xmm3 1746 pxor %xmm13,%xmm5 1747 movdqu %xmm4,32(%rsi) 1748 pxor %xmm4,%xmm4 1749 pxor %xmm14,%xmm6 1750 movdqu %xmm5,48(%rsi) 1751 pxor %xmm5,%xmm5 1752 pxor %xmm15,%xmm7 1753 movdqu %xmm6,64(%rsi) 1754 pxor %xmm6,%xmm6 1755 pxor %xmm9,%xmm8 1756 movdqu %xmm7,80(%rsi) 1757 pxor %xmm7,%xmm7 1758 leaq 96(%rsi),%rsi 1759 movdqa %xmm8,%xmm2 1760 pxor %xmm8,%xmm8 1761 pxor %xmm9,%xmm9 1762 jmp .Lcbc_dec_tail_collected 1763 1764.Lcbc_dec_tail: 1765 movups (%rdi),%xmm2 1766 subq $0x10,%rdx 1767 jbe .Lcbc_dec_one 1768 1769 movups 16(%rdi),%xmm3 1770 movaps %xmm2,%xmm11 1771 subq $0x10,%rdx 1772 jbe .Lcbc_dec_two 1773 1774 movups 32(%rdi),%xmm4 1775 movaps %xmm3,%xmm12 1776 subq $0x10,%rdx 1777 jbe .Lcbc_dec_three 1778 1779 movups 48(%rdi),%xmm5 1780 movaps %xmm4,%xmm13 1781 subq $0x10,%rdx 1782 jbe .Lcbc_dec_four 1783 1784 movups 64(%rdi),%xmm6 1785 movaps %xmm5,%xmm14 1786 movaps %xmm6,%xmm15 1787 xorps %xmm7,%xmm7 1788 call _aesni_decrypt6 1789 pxor %xmm10,%xmm2 1790 movaps %xmm15,%xmm10 1791 pxor %xmm11,%xmm3 1792 movdqu %xmm2,(%rsi) 1793 pxor %xmm12,%xmm4 1794 movdqu %xmm3,16(%rsi) 1795 pxor %xmm3,%xmm3 1796 pxor %xmm13,%xmm5 1797 movdqu %xmm4,32(%rsi) 1798 pxor %xmm4,%xmm4 1799 pxor %xmm14,%xmm6 1800 movdqu %xmm5,48(%rsi) 1801 pxor %xmm5,%xmm5 1802 leaq 64(%rsi),%rsi 1803 movdqa %xmm6,%xmm2 1804 pxor %xmm6,%xmm6 1805 pxor %xmm7,%xmm7 1806 subq $0x10,%rdx 1807 jmp .Lcbc_dec_tail_collected 1808 1809.align 16 1810.Lcbc_dec_one: 1811 movaps %xmm2,%xmm11 1812 movups (%rcx),%xmm0 1813 movups 16(%rcx),%xmm1 1814 leaq 32(%rcx),%rcx 1815 xorps %xmm0,%xmm2 1816.Loop_dec1_8: 1817.byte 102,15,56,222,209 1818 decl %eax 1819 movups (%rcx),%xmm1 1820 leaq 16(%rcx),%rcx 1821 jnz .Loop_dec1_8 1822.byte 102,15,56,223,209 1823 xorps %xmm10,%xmm2 1824 movaps %xmm11,%xmm10 1825 jmp .Lcbc_dec_tail_collected 1826.align 16 1827.Lcbc_dec_two: 1828 movaps %xmm3,%xmm12 1829 call _aesni_decrypt2 1830 pxor %xmm10,%xmm2 1831 movaps %xmm12,%xmm10 1832 pxor %xmm11,%xmm3 1833 movdqu %xmm2,(%rsi) 1834 movdqa %xmm3,%xmm2 1835 pxor %xmm3,%xmm3 1836 leaq 16(%rsi),%rsi 1837 jmp .Lcbc_dec_tail_collected 1838.align 16 1839.Lcbc_dec_three: 1840 movaps %xmm4,%xmm13 1841 call _aesni_decrypt3 1842 pxor %xmm10,%xmm2 1843 movaps %xmm13,%xmm10 1844 pxor %xmm11,%xmm3 1845 movdqu %xmm2,(%rsi) 1846 pxor %xmm12,%xmm4 1847 movdqu %xmm3,16(%rsi) 1848 pxor %xmm3,%xmm3 1849 movdqa %xmm4,%xmm2 1850 pxor %xmm4,%xmm4 1851 leaq 32(%rsi),%rsi 1852 jmp .Lcbc_dec_tail_collected 1853.align 16 1854.Lcbc_dec_four: 1855 movaps %xmm5,%xmm14 1856 call _aesni_decrypt4 1857 pxor %xmm10,%xmm2 1858 movaps %xmm14,%xmm10 1859 pxor %xmm11,%xmm3 1860 movdqu %xmm2,(%rsi) 1861 pxor %xmm12,%xmm4 1862 movdqu %xmm3,16(%rsi) 1863 pxor %xmm3,%xmm3 1864 pxor %xmm13,%xmm5 1865 movdqu %xmm4,32(%rsi) 1866 pxor %xmm4,%xmm4 1867 movdqa %xmm5,%xmm2 1868 pxor %xmm5,%xmm5 1869 leaq 48(%rsi),%rsi 1870 jmp .Lcbc_dec_tail_collected 1871 1872.align 16 1873.Lcbc_dec_clear_tail_collected: 1874 pxor %xmm3,%xmm3 1875 pxor %xmm4,%xmm4 1876 pxor %xmm5,%xmm5 1877 pxor %xmm6,%xmm6 1878 pxor %xmm7,%xmm7 1879 pxor %xmm8,%xmm8 1880 pxor %xmm9,%xmm9 1881.Lcbc_dec_tail_collected: 1882 movups %xmm10,(%r8) 1883 andq $15,%rdx 1884 jnz .Lcbc_dec_tail_partial 1885 movups %xmm2,(%rsi) 1886 pxor %xmm2,%xmm2 1887 jmp .Lcbc_dec_ret 1888.align 16 1889.Lcbc_dec_tail_partial: 1890 movaps %xmm2,(%rsp) 1891 pxor %xmm2,%xmm2 1892 movq $16,%rcx 1893 movq %rsi,%rdi 1894 subq %rdx,%rcx 1895 leaq (%rsp),%rsi 1896.long 0x9066A4F3 1897 movdqa %xmm2,(%rsp) 1898 1899.Lcbc_dec_ret: 1900 xorps %xmm0,%xmm0 1901 pxor %xmm1,%xmm1 1902 movq -8(%r11),%rbp 1903.cfi_restore %rbp 1904 leaq (%r11),%rsp 1905.cfi_def_cfa_register %rsp 1906.Lcbc_ret: 1907 ret 1908.cfi_endproc 1909.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt 1910.globl aes_hw_set_decrypt_key 1911.hidden aes_hw_set_decrypt_key 1912.type aes_hw_set_decrypt_key,@function 1913.align 16 1914aes_hw_set_decrypt_key: 1915.cfi_startproc 1916_CET_ENDBR 1917.byte 0x48,0x83,0xEC,0x08 1918.cfi_adjust_cfa_offset 8 1919 call __aesni_set_encrypt_key 1920 shll $4,%esi 1921 testl %eax,%eax 1922 jnz .Ldec_key_ret 1923 leaq 16(%rdx,%rsi,1),%rdi 1924 1925 movups (%rdx),%xmm0 1926 movups (%rdi),%xmm1 1927 movups %xmm0,(%rdi) 1928 movups %xmm1,(%rdx) 1929 leaq 16(%rdx),%rdx 1930 leaq -16(%rdi),%rdi 1931 1932.Ldec_key_inverse: 1933 movups (%rdx),%xmm0 1934 movups (%rdi),%xmm1 1935.byte 102,15,56,219,192 1936.byte 102,15,56,219,201 1937 leaq 16(%rdx),%rdx 1938 leaq -16(%rdi),%rdi 1939 movups %xmm0,16(%rdi) 1940 movups %xmm1,-16(%rdx) 1941 cmpq %rdx,%rdi 1942 ja .Ldec_key_inverse 1943 1944 movups (%rdx),%xmm0 1945.byte 102,15,56,219,192 1946 pxor %xmm1,%xmm1 1947 movups %xmm0,(%rdi) 1948 pxor %xmm0,%xmm0 1949.Ldec_key_ret: 1950 addq $8,%rsp 1951.cfi_adjust_cfa_offset -8 1952 ret 1953.cfi_endproc 1954.LSEH_end_set_decrypt_key: 1955.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key 1956.globl aes_hw_set_encrypt_key 1957.hidden aes_hw_set_encrypt_key 1958.type aes_hw_set_encrypt_key,@function 1959.align 16 1960aes_hw_set_encrypt_key: 1961__aesni_set_encrypt_key: 1962.cfi_startproc 1963_CET_ENDBR 1964#ifdef BORINGSSL_DISPATCH_TEST 1965 movb $1,BORINGSSL_function_hit+3(%rip) 1966#endif 1967.byte 0x48,0x83,0xEC,0x08 1968.cfi_adjust_cfa_offset 8 1969 movq $-1,%rax 1970 testq %rdi,%rdi 1971 jz .Lenc_key_ret 1972 testq %rdx,%rdx 1973 jz .Lenc_key_ret 1974 1975 movups (%rdi),%xmm0 1976 xorps %xmm4,%xmm4 1977 leaq OPENSSL_ia32cap_P(%rip),%r10 1978 movl 4(%r10),%r10d 1979 andl $268437504,%r10d 1980 leaq 16(%rdx),%rax 1981 cmpl $256,%esi 1982 je .L14rounds 1983 cmpl $192,%esi 1984 je .L12rounds 1985 cmpl $128,%esi 1986 jne .Lbad_keybits 1987 1988.L10rounds: 1989 movl $9,%esi 1990 cmpl $268435456,%r10d 1991 je .L10rounds_alt 1992 1993 movups %xmm0,(%rdx) 1994.byte 102,15,58,223,200,1 1995 call .Lkey_expansion_128_cold 1996.byte 102,15,58,223,200,2 1997 call .Lkey_expansion_128 1998.byte 102,15,58,223,200,4 1999 call .Lkey_expansion_128 2000.byte 102,15,58,223,200,8 2001 call .Lkey_expansion_128 2002.byte 102,15,58,223,200,16 2003 call .Lkey_expansion_128 2004.byte 102,15,58,223,200,32 2005 call .Lkey_expansion_128 2006.byte 102,15,58,223,200,64 2007 call .Lkey_expansion_128 2008.byte 102,15,58,223,200,128 2009 call .Lkey_expansion_128 2010.byte 102,15,58,223,200,27 2011 call .Lkey_expansion_128 2012.byte 102,15,58,223,200,54 2013 call .Lkey_expansion_128 2014 movups %xmm0,(%rax) 2015 movl %esi,80(%rax) 2016 xorl %eax,%eax 2017 jmp .Lenc_key_ret 2018 2019.align 16 2020.L10rounds_alt: 2021 movdqa .Lkey_rotate(%rip),%xmm5 2022 movl $8,%r10d 2023 movdqa .Lkey_rcon1(%rip),%xmm4 2024 movdqa %xmm0,%xmm2 2025 movdqu %xmm0,(%rdx) 2026 jmp .Loop_key128 2027 2028.align 16 2029.Loop_key128: 2030.byte 102,15,56,0,197 2031.byte 102,15,56,221,196 2032 pslld $1,%xmm4 2033 leaq 16(%rax),%rax 2034 2035 movdqa %xmm2,%xmm3 2036 pslldq $4,%xmm2 2037 pxor %xmm2,%xmm3 2038 pslldq $4,%xmm2 2039 pxor %xmm2,%xmm3 2040 pslldq $4,%xmm2 2041 pxor %xmm3,%xmm2 2042 2043 pxor %xmm2,%xmm0 2044 movdqu %xmm0,-16(%rax) 2045 movdqa %xmm0,%xmm2 2046 2047 decl %r10d 2048 jnz .Loop_key128 2049 2050 movdqa .Lkey_rcon1b(%rip),%xmm4 2051 2052.byte 102,15,56,0,197 2053.byte 102,15,56,221,196 2054 pslld $1,%xmm4 2055 2056 movdqa %xmm2,%xmm3 2057 pslldq $4,%xmm2 2058 pxor %xmm2,%xmm3 2059 pslldq $4,%xmm2 2060 pxor %xmm2,%xmm3 2061 pslldq $4,%xmm2 2062 pxor %xmm3,%xmm2 2063 2064 pxor %xmm2,%xmm0 2065 movdqu %xmm0,(%rax) 2066 2067 movdqa %xmm0,%xmm2 2068.byte 102,15,56,0,197 2069.byte 102,15,56,221,196 2070 2071 movdqa %xmm2,%xmm3 2072 pslldq $4,%xmm2 2073 pxor %xmm2,%xmm3 2074 pslldq $4,%xmm2 2075 pxor %xmm2,%xmm3 2076 pslldq $4,%xmm2 2077 pxor %xmm3,%xmm2 2078 2079 pxor %xmm2,%xmm0 2080 movdqu %xmm0,16(%rax) 2081 2082 movl %esi,96(%rax) 2083 xorl %eax,%eax 2084 jmp .Lenc_key_ret 2085 2086.align 16 2087.L12rounds: 2088 movq 16(%rdi),%xmm2 2089 movl $11,%esi 2090 cmpl $268435456,%r10d 2091 je .L12rounds_alt 2092 2093 movups %xmm0,(%rdx) 2094.byte 102,15,58,223,202,1 2095 call .Lkey_expansion_192a_cold 2096.byte 102,15,58,223,202,2 2097 call .Lkey_expansion_192b 2098.byte 102,15,58,223,202,4 2099 call .Lkey_expansion_192a 2100.byte 102,15,58,223,202,8 2101 call .Lkey_expansion_192b 2102.byte 102,15,58,223,202,16 2103 call .Lkey_expansion_192a 2104.byte 102,15,58,223,202,32 2105 call .Lkey_expansion_192b 2106.byte 102,15,58,223,202,64 2107 call .Lkey_expansion_192a 2108.byte 102,15,58,223,202,128 2109 call .Lkey_expansion_192b 2110 movups %xmm0,(%rax) 2111 movl %esi,48(%rax) 2112 xorq %rax,%rax 2113 jmp .Lenc_key_ret 2114 2115.align 16 2116.L12rounds_alt: 2117 movdqa .Lkey_rotate192(%rip),%xmm5 2118 movdqa .Lkey_rcon1(%rip),%xmm4 2119 movl $8,%r10d 2120 movdqu %xmm0,(%rdx) 2121 jmp .Loop_key192 2122 2123.align 16 2124.Loop_key192: 2125 movq %xmm2,0(%rax) 2126 movdqa %xmm2,%xmm1 2127.byte 102,15,56,0,213 2128.byte 102,15,56,221,212 2129 pslld $1,%xmm4 2130 leaq 24(%rax),%rax 2131 2132 movdqa %xmm0,%xmm3 2133 pslldq $4,%xmm0 2134 pxor %xmm0,%xmm3 2135 pslldq $4,%xmm0 2136 pxor %xmm0,%xmm3 2137 pslldq $4,%xmm0 2138 pxor %xmm3,%xmm0 2139 2140 pshufd $0xff,%xmm0,%xmm3 2141 pxor %xmm1,%xmm3 2142 pslldq $4,%xmm1 2143 pxor %xmm1,%xmm3 2144 2145 pxor %xmm2,%xmm0 2146 pxor %xmm3,%xmm2 2147 movdqu %xmm0,-16(%rax) 2148 2149 decl %r10d 2150 jnz .Loop_key192 2151 2152 movl %esi,32(%rax) 2153 xorl %eax,%eax 2154 jmp .Lenc_key_ret 2155 2156.align 16 2157.L14rounds: 2158 movups 16(%rdi),%xmm2 2159 movl $13,%esi 2160 leaq 16(%rax),%rax 2161 cmpl $268435456,%r10d 2162 je .L14rounds_alt 2163 2164 movups %xmm0,(%rdx) 2165 movups %xmm2,16(%rdx) 2166.byte 102,15,58,223,202,1 2167 call .Lkey_expansion_256a_cold 2168.byte 102,15,58,223,200,1 2169 call .Lkey_expansion_256b 2170.byte 102,15,58,223,202,2 2171 call .Lkey_expansion_256a 2172.byte 102,15,58,223,200,2 2173 call .Lkey_expansion_256b 2174.byte 102,15,58,223,202,4 2175 call .Lkey_expansion_256a 2176.byte 102,15,58,223,200,4 2177 call .Lkey_expansion_256b 2178.byte 102,15,58,223,202,8 2179 call .Lkey_expansion_256a 2180.byte 102,15,58,223,200,8 2181 call .Lkey_expansion_256b 2182.byte 102,15,58,223,202,16 2183 call .Lkey_expansion_256a 2184.byte 102,15,58,223,200,16 2185 call .Lkey_expansion_256b 2186.byte 102,15,58,223,202,32 2187 call .Lkey_expansion_256a 2188.byte 102,15,58,223,200,32 2189 call .Lkey_expansion_256b 2190.byte 102,15,58,223,202,64 2191 call .Lkey_expansion_256a 2192 movups %xmm0,(%rax) 2193 movl %esi,16(%rax) 2194 xorq %rax,%rax 2195 jmp .Lenc_key_ret 2196 2197.align 16 2198.L14rounds_alt: 2199 movdqa .Lkey_rotate(%rip),%xmm5 2200 movdqa .Lkey_rcon1(%rip),%xmm4 2201 movl $7,%r10d 2202 movdqu %xmm0,0(%rdx) 2203 movdqa %xmm2,%xmm1 2204 movdqu %xmm2,16(%rdx) 2205 jmp .Loop_key256 2206 2207.align 16 2208.Loop_key256: 2209.byte 102,15,56,0,213 2210.byte 102,15,56,221,212 2211 2212 movdqa %xmm0,%xmm3 2213 pslldq $4,%xmm0 2214 pxor %xmm0,%xmm3 2215 pslldq $4,%xmm0 2216 pxor %xmm0,%xmm3 2217 pslldq $4,%xmm0 2218 pxor %xmm3,%xmm0 2219 pslld $1,%xmm4 2220 2221 pxor %xmm2,%xmm0 2222 movdqu %xmm0,(%rax) 2223 2224 decl %r10d 2225 jz .Ldone_key256 2226 2227 pshufd $0xff,%xmm0,%xmm2 2228 pxor %xmm3,%xmm3 2229.byte 102,15,56,221,211 2230 2231 movdqa %xmm1,%xmm3 2232 pslldq $4,%xmm1 2233 pxor %xmm1,%xmm3 2234 pslldq $4,%xmm1 2235 pxor %xmm1,%xmm3 2236 pslldq $4,%xmm1 2237 pxor %xmm3,%xmm1 2238 2239 pxor %xmm1,%xmm2 2240 movdqu %xmm2,16(%rax) 2241 leaq 32(%rax),%rax 2242 movdqa %xmm2,%xmm1 2243 2244 jmp .Loop_key256 2245 2246.Ldone_key256: 2247 movl %esi,16(%rax) 2248 xorl %eax,%eax 2249 jmp .Lenc_key_ret 2250 2251.align 16 2252.Lbad_keybits: 2253 movq $-2,%rax 2254.Lenc_key_ret: 2255 pxor %xmm0,%xmm0 2256 pxor %xmm1,%xmm1 2257 pxor %xmm2,%xmm2 2258 pxor %xmm3,%xmm3 2259 pxor %xmm4,%xmm4 2260 pxor %xmm5,%xmm5 2261 addq $8,%rsp 2262.cfi_adjust_cfa_offset -8 2263 ret 2264.cfi_endproc 2265.LSEH_end_set_encrypt_key: 2266 2267.align 16 2268.Lkey_expansion_128: 2269 movups %xmm0,(%rax) 2270 leaq 16(%rax),%rax 2271.Lkey_expansion_128_cold: 2272 shufps $16,%xmm0,%xmm4 2273 xorps %xmm4,%xmm0 2274 shufps $140,%xmm0,%xmm4 2275 xorps %xmm4,%xmm0 2276 shufps $255,%xmm1,%xmm1 2277 xorps %xmm1,%xmm0 2278 ret 2279 2280.align 16 2281.Lkey_expansion_192a: 2282 movups %xmm0,(%rax) 2283 leaq 16(%rax),%rax 2284.Lkey_expansion_192a_cold: 2285 movaps %xmm2,%xmm5 2286.Lkey_expansion_192b_warm: 2287 shufps $16,%xmm0,%xmm4 2288 movdqa %xmm2,%xmm3 2289 xorps %xmm4,%xmm0 2290 shufps $140,%xmm0,%xmm4 2291 pslldq $4,%xmm3 2292 xorps %xmm4,%xmm0 2293 pshufd $85,%xmm1,%xmm1 2294 pxor %xmm3,%xmm2 2295 pxor %xmm1,%xmm0 2296 pshufd $255,%xmm0,%xmm3 2297 pxor %xmm3,%xmm2 2298 ret 2299 2300.align 16 2301.Lkey_expansion_192b: 2302 movaps %xmm0,%xmm3 2303 shufps $68,%xmm0,%xmm5 2304 movups %xmm5,(%rax) 2305 shufps $78,%xmm2,%xmm3 2306 movups %xmm3,16(%rax) 2307 leaq 32(%rax),%rax 2308 jmp .Lkey_expansion_192b_warm 2309 2310.align 16 2311.Lkey_expansion_256a: 2312 movups %xmm2,(%rax) 2313 leaq 16(%rax),%rax 2314.Lkey_expansion_256a_cold: 2315 shufps $16,%xmm0,%xmm4 2316 xorps %xmm4,%xmm0 2317 shufps $140,%xmm0,%xmm4 2318 xorps %xmm4,%xmm0 2319 shufps $255,%xmm1,%xmm1 2320 xorps %xmm1,%xmm0 2321 ret 2322 2323.align 16 2324.Lkey_expansion_256b: 2325 movups %xmm0,(%rax) 2326 leaq 16(%rax),%rax 2327 2328 shufps $16,%xmm2,%xmm4 2329 xorps %xmm4,%xmm2 2330 shufps $140,%xmm2,%xmm4 2331 xorps %xmm4,%xmm2 2332 shufps $170,%xmm1,%xmm1 2333 xorps %xmm1,%xmm2 2334 ret 2335.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key 2336.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key 2337.section .rodata 2338.align 64 2339.Lbswap_mask: 2340.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 2341.Lincrement32: 2342.long 6,6,6,0 2343.Lincrement64: 2344.long 1,0,0,0 2345.Lxts_magic: 2346.long 0x87,0,1,0 2347.Lincrement1: 2348.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 2349.Lkey_rotate: 2350.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 2351.Lkey_rotate192: 2352.long 0x04070605,0x04070605,0x04070605,0x04070605 2353.Lkey_rcon1: 2354.long 1,1,1,1 2355.Lkey_rcon1b: 2356.long 0x1b,0x1b,0x1b,0x1b 2357 2358.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 2359.align 64 2360.text 2361#endif 2362