1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <ring-core/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) 7.text 8 9.globl _aes_hw_encrypt 10.private_extern _aes_hw_encrypt 11 12.p2align 4 13_aes_hw_encrypt: 14 15_CET_ENDBR 16#ifdef BORINGSSL_DISPATCH_TEST 17 18 movb $1,_BORINGSSL_function_hit+1(%rip) 19#endif 20 movups (%rdi),%xmm2 21 movl 240(%rdx),%eax 22 movups (%rdx),%xmm0 23 movups 16(%rdx),%xmm1 24 leaq 32(%rdx),%rdx 25 xorps %xmm0,%xmm2 26L$oop_enc1_1: 27.byte 102,15,56,220,209 28 decl %eax 29 movups (%rdx),%xmm1 30 leaq 16(%rdx),%rdx 31 jnz L$oop_enc1_1 32.byte 102,15,56,221,209 33 pxor %xmm0,%xmm0 34 pxor %xmm1,%xmm1 35 movups %xmm2,(%rsi) 36 pxor %xmm2,%xmm2 37 ret 38 39 40 41.p2align 4 42_aesni_encrypt2: 43 44 movups (%rcx),%xmm0 45 shll $4,%eax 46 movups 16(%rcx),%xmm1 47 xorps %xmm0,%xmm2 48 xorps %xmm0,%xmm3 49 movups 32(%rcx),%xmm0 50 leaq 32(%rcx,%rax,1),%rcx 51 negq %rax 52 addq $16,%rax 53 54L$enc_loop2: 55.byte 102,15,56,220,209 56.byte 102,15,56,220,217 57 movups (%rcx,%rax,1),%xmm1 58 addq $32,%rax 59.byte 102,15,56,220,208 60.byte 102,15,56,220,216 61 movups -16(%rcx,%rax,1),%xmm0 62 jnz L$enc_loop2 63 64.byte 102,15,56,220,209 65.byte 102,15,56,220,217 66.byte 102,15,56,221,208 67.byte 102,15,56,221,216 68 ret 69 70 71 72.p2align 4 73_aesni_encrypt3: 74 75 movups (%rcx),%xmm0 76 shll $4,%eax 77 movups 16(%rcx),%xmm1 78 xorps %xmm0,%xmm2 79 xorps %xmm0,%xmm3 80 xorps %xmm0,%xmm4 81 movups 32(%rcx),%xmm0 82 leaq 32(%rcx,%rax,1),%rcx 83 negq %rax 84 addq $16,%rax 85 86L$enc_loop3: 87.byte 102,15,56,220,209 88.byte 102,15,56,220,217 89.byte 102,15,56,220,225 90 movups (%rcx,%rax,1),%xmm1 91 addq $32,%rax 92.byte 102,15,56,220,208 93.byte 102,15,56,220,216 94.byte 102,15,56,220,224 95 movups -16(%rcx,%rax,1),%xmm0 96 jnz L$enc_loop3 97 98.byte 102,15,56,220,209 99.byte 102,15,56,220,217 100.byte 102,15,56,220,225 101.byte 102,15,56,221,208 102.byte 102,15,56,221,216 103.byte 102,15,56,221,224 104 ret 105 106 107 108.p2align 4 109_aesni_encrypt4: 110 111 movups (%rcx),%xmm0 112 shll $4,%eax 113 movups 16(%rcx),%xmm1 114 xorps %xmm0,%xmm2 115 xorps %xmm0,%xmm3 116 xorps %xmm0,%xmm4 117 xorps %xmm0,%xmm5 118 movups 32(%rcx),%xmm0 119 leaq 32(%rcx,%rax,1),%rcx 120 negq %rax 121.byte 0x0f,0x1f,0x00 122 addq $16,%rax 123 124L$enc_loop4: 125.byte 102,15,56,220,209 126.byte 102,15,56,220,217 127.byte 102,15,56,220,225 128.byte 102,15,56,220,233 129 movups (%rcx,%rax,1),%xmm1 130 addq $32,%rax 131.byte 102,15,56,220,208 132.byte 102,15,56,220,216 133.byte 102,15,56,220,224 134.byte 102,15,56,220,232 135 movups -16(%rcx,%rax,1),%xmm0 136 jnz L$enc_loop4 137 138.byte 102,15,56,220,209 139.byte 102,15,56,220,217 140.byte 102,15,56,220,225 141.byte 102,15,56,220,233 142.byte 102,15,56,221,208 143.byte 102,15,56,221,216 144.byte 102,15,56,221,224 145.byte 102,15,56,221,232 146 ret 147 148 149 150.p2align 4 151_aesni_encrypt6: 152 153 movups (%rcx),%xmm0 154 shll $4,%eax 155 movups 16(%rcx),%xmm1 156 xorps %xmm0,%xmm2 157 pxor %xmm0,%xmm3 158 pxor %xmm0,%xmm4 159.byte 102,15,56,220,209 160 leaq 32(%rcx,%rax,1),%rcx 161 negq %rax 162.byte 102,15,56,220,217 163 pxor %xmm0,%xmm5 164 pxor %xmm0,%xmm6 165.byte 102,15,56,220,225 166 pxor %xmm0,%xmm7 167 movups (%rcx,%rax,1),%xmm0 168 addq $16,%rax 169 jmp L$enc_loop6_enter 170.p2align 4 171L$enc_loop6: 172.byte 102,15,56,220,209 173.byte 102,15,56,220,217 174.byte 102,15,56,220,225 175L$enc_loop6_enter: 176.byte 102,15,56,220,233 177.byte 102,15,56,220,241 178.byte 102,15,56,220,249 179 movups (%rcx,%rax,1),%xmm1 180 addq $32,%rax 181.byte 102,15,56,220,208 182.byte 102,15,56,220,216 183.byte 102,15,56,220,224 184.byte 102,15,56,220,232 185.byte 102,15,56,220,240 186.byte 102,15,56,220,248 187 movups -16(%rcx,%rax,1),%xmm0 188 jnz L$enc_loop6 189 190.byte 102,15,56,220,209 191.byte 102,15,56,220,217 192.byte 102,15,56,220,225 193.byte 102,15,56,220,233 194.byte 102,15,56,220,241 195.byte 102,15,56,220,249 196.byte 102,15,56,221,208 197.byte 102,15,56,221,216 198.byte 102,15,56,221,224 199.byte 102,15,56,221,232 200.byte 102,15,56,221,240 201.byte 102,15,56,221,248 202 ret 203 204 205 206.p2align 4 207_aesni_encrypt8: 208 209 movups (%rcx),%xmm0 210 shll $4,%eax 211 movups 16(%rcx),%xmm1 212 xorps %xmm0,%xmm2 213 xorps %xmm0,%xmm3 214 pxor %xmm0,%xmm4 215 pxor %xmm0,%xmm5 216 pxor %xmm0,%xmm6 217 leaq 32(%rcx,%rax,1),%rcx 218 negq %rax 219.byte 102,15,56,220,209 220 pxor %xmm0,%xmm7 221 pxor %xmm0,%xmm8 222.byte 102,15,56,220,217 223 pxor %xmm0,%xmm9 224 movups (%rcx,%rax,1),%xmm0 225 addq $16,%rax 226 jmp L$enc_loop8_inner 227.p2align 4 228L$enc_loop8: 229.byte 102,15,56,220,209 230.byte 102,15,56,220,217 231L$enc_loop8_inner: 232.byte 102,15,56,220,225 233.byte 102,15,56,220,233 234.byte 102,15,56,220,241 235.byte 102,15,56,220,249 236.byte 102,68,15,56,220,193 237.byte 102,68,15,56,220,201 238L$enc_loop8_enter: 239 movups (%rcx,%rax,1),%xmm1 240 addq $32,%rax 241.byte 102,15,56,220,208 242.byte 102,15,56,220,216 243.byte 102,15,56,220,224 244.byte 102,15,56,220,232 245.byte 102,15,56,220,240 246.byte 102,15,56,220,248 247.byte 102,68,15,56,220,192 248.byte 102,68,15,56,220,200 249 movups -16(%rcx,%rax,1),%xmm0 250 jnz L$enc_loop8 251 252.byte 102,15,56,220,209 253.byte 102,15,56,220,217 254.byte 102,15,56,220,225 255.byte 102,15,56,220,233 256.byte 102,15,56,220,241 257.byte 102,15,56,220,249 258.byte 102,68,15,56,220,193 259.byte 102,68,15,56,220,201 260.byte 102,15,56,221,208 261.byte 102,15,56,221,216 262.byte 102,15,56,221,224 263.byte 102,15,56,221,232 264.byte 102,15,56,221,240 265.byte 102,15,56,221,248 266.byte 102,68,15,56,221,192 267.byte 102,68,15,56,221,200 268 ret 269 270 271.globl _aes_hw_ctr32_encrypt_blocks 272.private_extern _aes_hw_ctr32_encrypt_blocks 273 274.p2align 4 275_aes_hw_ctr32_encrypt_blocks: 276 277_CET_ENDBR 278#ifdef BORINGSSL_DISPATCH_TEST 279 movb $1,_BORINGSSL_function_hit(%rip) 280#endif 281 cmpq $1,%rdx 282 jne L$ctr32_bulk 283 284 285 286 movups (%r8),%xmm2 287 movups (%rdi),%xmm3 288 movl 240(%rcx),%edx 289 movups (%rcx),%xmm0 290 movups 16(%rcx),%xmm1 291 leaq 32(%rcx),%rcx 292 xorps %xmm0,%xmm2 293L$oop_enc1_2: 294.byte 102,15,56,220,209 295 decl %edx 296 movups (%rcx),%xmm1 297 leaq 16(%rcx),%rcx 298 jnz L$oop_enc1_2 299.byte 102,15,56,221,209 300 pxor %xmm0,%xmm0 301 pxor %xmm1,%xmm1 302 xorps %xmm3,%xmm2 303 pxor %xmm3,%xmm3 304 movups %xmm2,(%rsi) 305 xorps %xmm2,%xmm2 306 jmp L$ctr32_epilogue 307 308.p2align 4 309L$ctr32_bulk: 310 leaq (%rsp),%r11 311 312 pushq %rbp 313 314 subq $128,%rsp 315 andq $-16,%rsp 316 317 318 319 320 movdqu (%r8),%xmm2 321 movdqu (%rcx),%xmm0 322 movl 12(%r8),%r8d 323 pxor %xmm0,%xmm2 324 movl 12(%rcx),%ebp 325 movdqa %xmm2,0(%rsp) 326 bswapl %r8d 327 movdqa %xmm2,%xmm3 328 movdqa %xmm2,%xmm4 329 movdqa %xmm2,%xmm5 330 movdqa %xmm2,64(%rsp) 331 movdqa %xmm2,80(%rsp) 332 movdqa %xmm2,96(%rsp) 333 movq %rdx,%r10 334 movdqa %xmm2,112(%rsp) 335 336 leaq 1(%r8),%rax 337 leaq 2(%r8),%rdx 338 bswapl %eax 339 bswapl %edx 340 xorl %ebp,%eax 341 xorl %ebp,%edx 342.byte 102,15,58,34,216,3 343 leaq 3(%r8),%rax 344 movdqa %xmm3,16(%rsp) 345.byte 102,15,58,34,226,3 346 bswapl %eax 347 movq %r10,%rdx 348 leaq 4(%r8),%r10 349 movdqa %xmm4,32(%rsp) 350 xorl %ebp,%eax 351 bswapl %r10d 352.byte 102,15,58,34,232,3 353 xorl %ebp,%r10d 354 movdqa %xmm5,48(%rsp) 355 leaq 5(%r8),%r9 356 movl %r10d,64+12(%rsp) 357 bswapl %r9d 358 leaq 6(%r8),%r10 359 movl 240(%rcx),%eax 360 xorl %ebp,%r9d 361 bswapl %r10d 362 movl %r9d,80+12(%rsp) 363 xorl %ebp,%r10d 364 leaq 7(%r8),%r9 365 movl %r10d,96+12(%rsp) 366 bswapl %r9d 367 leaq _OPENSSL_ia32cap_P(%rip),%r10 368 movl 4(%r10),%r10d 369 xorl %ebp,%r9d 370 andl $71303168,%r10d 371 movl %r9d,112+12(%rsp) 372 373 movups 16(%rcx),%xmm1 374 375 movdqa 64(%rsp),%xmm6 376 movdqa 80(%rsp),%xmm7 377 378 cmpq $8,%rdx 379 jb L$ctr32_tail 380 381 subq $6,%rdx 382 cmpl $4194304,%r10d 383 je L$ctr32_6x 384 385 leaq 128(%rcx),%rcx 386 subq $2,%rdx 387 jmp L$ctr32_loop8 388 389.p2align 4 390L$ctr32_6x: 391 shll $4,%eax 392 movl $48,%r10d 393 bswapl %ebp 394 leaq 32(%rcx,%rax,1),%rcx 395 subq %rax,%r10 396 jmp L$ctr32_loop6 397 398.p2align 4 399L$ctr32_loop6: 400 addl $6,%r8d 401 movups -48(%rcx,%r10,1),%xmm0 402.byte 102,15,56,220,209 403 movl %r8d,%eax 404 xorl %ebp,%eax 405.byte 102,15,56,220,217 406.byte 0x0f,0x38,0xf1,0x44,0x24,12 407 leal 1(%r8),%eax 408.byte 102,15,56,220,225 409 xorl %ebp,%eax 410.byte 0x0f,0x38,0xf1,0x44,0x24,28 411.byte 102,15,56,220,233 412 leal 2(%r8),%eax 413 xorl %ebp,%eax 414.byte 102,15,56,220,241 415.byte 0x0f,0x38,0xf1,0x44,0x24,44 416 leal 3(%r8),%eax 417.byte 102,15,56,220,249 418 movups -32(%rcx,%r10,1),%xmm1 419 xorl %ebp,%eax 420 421.byte 102,15,56,220,208 422.byte 0x0f,0x38,0xf1,0x44,0x24,60 423 leal 4(%r8),%eax 424.byte 102,15,56,220,216 425 xorl %ebp,%eax 426.byte 0x0f,0x38,0xf1,0x44,0x24,76 427.byte 102,15,56,220,224 428 leal 5(%r8),%eax 429 xorl %ebp,%eax 430.byte 102,15,56,220,232 431.byte 0x0f,0x38,0xf1,0x44,0x24,92 432 movq %r10,%rax 433.byte 102,15,56,220,240 434.byte 102,15,56,220,248 435 movups -16(%rcx,%r10,1),%xmm0 436 437 call L$enc_loop6 438 439 movdqu (%rdi),%xmm8 440 movdqu 16(%rdi),%xmm9 441 movdqu 32(%rdi),%xmm10 442 movdqu 48(%rdi),%xmm11 443 movdqu 64(%rdi),%xmm12 444 movdqu 80(%rdi),%xmm13 445 leaq 96(%rdi),%rdi 446 movups -64(%rcx,%r10,1),%xmm1 447 pxor %xmm2,%xmm8 448 movaps 0(%rsp),%xmm2 449 pxor %xmm3,%xmm9 450 movaps 16(%rsp),%xmm3 451 pxor %xmm4,%xmm10 452 movaps 32(%rsp),%xmm4 453 pxor %xmm5,%xmm11 454 movaps 48(%rsp),%xmm5 455 pxor %xmm6,%xmm12 456 movaps 64(%rsp),%xmm6 457 pxor %xmm7,%xmm13 458 movaps 80(%rsp),%xmm7 459 movdqu %xmm8,(%rsi) 460 movdqu %xmm9,16(%rsi) 461 movdqu %xmm10,32(%rsi) 462 movdqu %xmm11,48(%rsi) 463 movdqu %xmm12,64(%rsi) 464 movdqu %xmm13,80(%rsi) 465 leaq 96(%rsi),%rsi 466 467 subq $6,%rdx 468 jnc L$ctr32_loop6 469 470 addq $6,%rdx 471 jz L$ctr32_done 472 473 leal -48(%r10),%eax 474 leaq -80(%rcx,%r10,1),%rcx 475 negl %eax 476 shrl $4,%eax 477 jmp L$ctr32_tail 478 479.p2align 5 480L$ctr32_loop8: 481 addl $8,%r8d 482 movdqa 96(%rsp),%xmm8 483.byte 102,15,56,220,209 484 movl %r8d,%r9d 485 movdqa 112(%rsp),%xmm9 486.byte 102,15,56,220,217 487 bswapl %r9d 488 movups 32-128(%rcx),%xmm0 489.byte 102,15,56,220,225 490 xorl %ebp,%r9d 491 nop 492.byte 102,15,56,220,233 493 movl %r9d,0+12(%rsp) 494 leaq 1(%r8),%r9 495.byte 102,15,56,220,241 496.byte 102,15,56,220,249 497.byte 102,68,15,56,220,193 498.byte 102,68,15,56,220,201 499 movups 48-128(%rcx),%xmm1 500 bswapl %r9d 501.byte 102,15,56,220,208 502.byte 102,15,56,220,216 503 xorl %ebp,%r9d 504.byte 0x66,0x90 505.byte 102,15,56,220,224 506.byte 102,15,56,220,232 507 movl %r9d,16+12(%rsp) 508 leaq 2(%r8),%r9 509.byte 102,15,56,220,240 510.byte 102,15,56,220,248 511.byte 102,68,15,56,220,192 512.byte 102,68,15,56,220,200 513 movups 64-128(%rcx),%xmm0 514 bswapl %r9d 515.byte 102,15,56,220,209 516.byte 102,15,56,220,217 517 xorl %ebp,%r9d 518.byte 0x66,0x90 519.byte 102,15,56,220,225 520.byte 102,15,56,220,233 521 movl %r9d,32+12(%rsp) 522 leaq 3(%r8),%r9 523.byte 102,15,56,220,241 524.byte 102,15,56,220,249 525.byte 102,68,15,56,220,193 526.byte 102,68,15,56,220,201 527 movups 80-128(%rcx),%xmm1 528 bswapl %r9d 529.byte 102,15,56,220,208 530.byte 102,15,56,220,216 531 xorl %ebp,%r9d 532.byte 0x66,0x90 533.byte 102,15,56,220,224 534.byte 102,15,56,220,232 535 movl %r9d,48+12(%rsp) 536 leaq 4(%r8),%r9 537.byte 102,15,56,220,240 538.byte 102,15,56,220,248 539.byte 102,68,15,56,220,192 540.byte 102,68,15,56,220,200 541 movups 96-128(%rcx),%xmm0 542 bswapl %r9d 543.byte 102,15,56,220,209 544.byte 102,15,56,220,217 545 xorl %ebp,%r9d 546.byte 0x66,0x90 547.byte 102,15,56,220,225 548.byte 102,15,56,220,233 549 movl %r9d,64+12(%rsp) 550 leaq 5(%r8),%r9 551.byte 102,15,56,220,241 552.byte 102,15,56,220,249 553.byte 102,68,15,56,220,193 554.byte 102,68,15,56,220,201 555 movups 112-128(%rcx),%xmm1 556 bswapl %r9d 557.byte 102,15,56,220,208 558.byte 102,15,56,220,216 559 xorl %ebp,%r9d 560.byte 0x66,0x90 561.byte 102,15,56,220,224 562.byte 102,15,56,220,232 563 movl %r9d,80+12(%rsp) 564 leaq 6(%r8),%r9 565.byte 102,15,56,220,240 566.byte 102,15,56,220,248 567.byte 102,68,15,56,220,192 568.byte 102,68,15,56,220,200 569 movups 128-128(%rcx),%xmm0 570 bswapl %r9d 571.byte 102,15,56,220,209 572.byte 102,15,56,220,217 573 xorl %ebp,%r9d 574.byte 0x66,0x90 575.byte 102,15,56,220,225 576.byte 102,15,56,220,233 577 movl %r9d,96+12(%rsp) 578 leaq 7(%r8),%r9 579.byte 102,15,56,220,241 580.byte 102,15,56,220,249 581.byte 102,68,15,56,220,193 582.byte 102,68,15,56,220,201 583 movups 144-128(%rcx),%xmm1 584 bswapl %r9d 585.byte 102,15,56,220,208 586.byte 102,15,56,220,216 587.byte 102,15,56,220,224 588 xorl %ebp,%r9d 589 movdqu 0(%rdi),%xmm10 590.byte 102,15,56,220,232 591 movl %r9d,112+12(%rsp) 592 cmpl $11,%eax 593.byte 102,15,56,220,240 594.byte 102,15,56,220,248 595.byte 102,68,15,56,220,192 596.byte 102,68,15,56,220,200 597 movups 160-128(%rcx),%xmm0 598 599 jb L$ctr32_enc_done 600 601.byte 102,15,56,220,209 602.byte 102,15,56,220,217 603.byte 102,15,56,220,225 604.byte 102,15,56,220,233 605.byte 102,15,56,220,241 606.byte 102,15,56,220,249 607.byte 102,68,15,56,220,193 608.byte 102,68,15,56,220,201 609 movups 176-128(%rcx),%xmm1 610 611.byte 102,15,56,220,208 612.byte 102,15,56,220,216 613.byte 102,15,56,220,224 614.byte 102,15,56,220,232 615.byte 102,15,56,220,240 616.byte 102,15,56,220,248 617.byte 102,68,15,56,220,192 618.byte 102,68,15,56,220,200 619 movups 192-128(%rcx),%xmm0 620 621 622 623.byte 102,15,56,220,209 624.byte 102,15,56,220,217 625.byte 102,15,56,220,225 626.byte 102,15,56,220,233 627.byte 102,15,56,220,241 628.byte 102,15,56,220,249 629.byte 102,68,15,56,220,193 630.byte 102,68,15,56,220,201 631 movups 208-128(%rcx),%xmm1 632 633.byte 102,15,56,220,208 634.byte 102,15,56,220,216 635.byte 102,15,56,220,224 636.byte 102,15,56,220,232 637.byte 102,15,56,220,240 638.byte 102,15,56,220,248 639.byte 102,68,15,56,220,192 640.byte 102,68,15,56,220,200 641 movups 224-128(%rcx),%xmm0 642 jmp L$ctr32_enc_done 643 644.p2align 4 645L$ctr32_enc_done: 646 movdqu 16(%rdi),%xmm11 647 pxor %xmm0,%xmm10 648 movdqu 32(%rdi),%xmm12 649 pxor %xmm0,%xmm11 650 movdqu 48(%rdi),%xmm13 651 pxor %xmm0,%xmm12 652 movdqu 64(%rdi),%xmm14 653 pxor %xmm0,%xmm13 654 movdqu 80(%rdi),%xmm15 655 pxor %xmm0,%xmm14 656 prefetcht0 448(%rdi) 657 prefetcht0 512(%rdi) 658 pxor %xmm0,%xmm15 659.byte 102,15,56,220,209 660.byte 102,15,56,220,217 661.byte 102,15,56,220,225 662.byte 102,15,56,220,233 663.byte 102,15,56,220,241 664.byte 102,15,56,220,249 665.byte 102,68,15,56,220,193 666.byte 102,68,15,56,220,201 667 movdqu 96(%rdi),%xmm1 668 leaq 128(%rdi),%rdi 669 670.byte 102,65,15,56,221,210 671 pxor %xmm0,%xmm1 672 movdqu 112-128(%rdi),%xmm10 673.byte 102,65,15,56,221,219 674 pxor %xmm0,%xmm10 675 movdqa 0(%rsp),%xmm11 676.byte 102,65,15,56,221,228 677.byte 102,65,15,56,221,237 678 movdqa 16(%rsp),%xmm12 679 movdqa 32(%rsp),%xmm13 680.byte 102,65,15,56,221,246 681.byte 102,65,15,56,221,255 682 movdqa 48(%rsp),%xmm14 683 movdqa 64(%rsp),%xmm15 684.byte 102,68,15,56,221,193 685 movdqa 80(%rsp),%xmm0 686 movups 16-128(%rcx),%xmm1 687.byte 102,69,15,56,221,202 688 689 movups %xmm2,(%rsi) 690 movdqa %xmm11,%xmm2 691 movups %xmm3,16(%rsi) 692 movdqa %xmm12,%xmm3 693 movups %xmm4,32(%rsi) 694 movdqa %xmm13,%xmm4 695 movups %xmm5,48(%rsi) 696 movdqa %xmm14,%xmm5 697 movups %xmm6,64(%rsi) 698 movdqa %xmm15,%xmm6 699 movups %xmm7,80(%rsi) 700 movdqa %xmm0,%xmm7 701 movups %xmm8,96(%rsi) 702 movups %xmm9,112(%rsi) 703 leaq 128(%rsi),%rsi 704 705 subq $8,%rdx 706 jnc L$ctr32_loop8 707 708 addq $8,%rdx 709 jz L$ctr32_done 710 leaq -128(%rcx),%rcx 711 712L$ctr32_tail: 713 714 715 leaq 16(%rcx),%rcx 716 cmpq $4,%rdx 717 jb L$ctr32_loop3 718 je L$ctr32_loop4 719 720 721 shll $4,%eax 722 movdqa 96(%rsp),%xmm8 723 pxor %xmm9,%xmm9 724 725 movups 16(%rcx),%xmm0 726.byte 102,15,56,220,209 727.byte 102,15,56,220,217 728 leaq 32-16(%rcx,%rax,1),%rcx 729 negq %rax 730.byte 102,15,56,220,225 731 addq $16,%rax 732 movups (%rdi),%xmm10 733.byte 102,15,56,220,233 734.byte 102,15,56,220,241 735 movups 16(%rdi),%xmm11 736 movups 32(%rdi),%xmm12 737.byte 102,15,56,220,249 738.byte 102,68,15,56,220,193 739 740 call L$enc_loop8_enter 741 742 movdqu 48(%rdi),%xmm13 743 pxor %xmm10,%xmm2 744 movdqu 64(%rdi),%xmm10 745 pxor %xmm11,%xmm3 746 movdqu %xmm2,(%rsi) 747 pxor %xmm12,%xmm4 748 movdqu %xmm3,16(%rsi) 749 pxor %xmm13,%xmm5 750 movdqu %xmm4,32(%rsi) 751 pxor %xmm10,%xmm6 752 movdqu %xmm5,48(%rsi) 753 movdqu %xmm6,64(%rsi) 754 cmpq $6,%rdx 755 jb L$ctr32_done 756 757 movups 80(%rdi),%xmm11 758 xorps %xmm11,%xmm7 759 movups %xmm7,80(%rsi) 760 je L$ctr32_done 761 762 movups 96(%rdi),%xmm12 763 xorps %xmm12,%xmm8 764 movups %xmm8,96(%rsi) 765 jmp L$ctr32_done 766 767.p2align 5 768L$ctr32_loop4: 769.byte 102,15,56,220,209 770 leaq 16(%rcx),%rcx 771 decl %eax 772.byte 102,15,56,220,217 773.byte 102,15,56,220,225 774.byte 102,15,56,220,233 775 movups (%rcx),%xmm1 776 jnz L$ctr32_loop4 777.byte 102,15,56,221,209 778.byte 102,15,56,221,217 779 movups (%rdi),%xmm10 780 movups 16(%rdi),%xmm11 781.byte 102,15,56,221,225 782.byte 102,15,56,221,233 783 movups 32(%rdi),%xmm12 784 movups 48(%rdi),%xmm13 785 786 xorps %xmm10,%xmm2 787 movups %xmm2,(%rsi) 788 xorps %xmm11,%xmm3 789 movups %xmm3,16(%rsi) 790 pxor %xmm12,%xmm4 791 movdqu %xmm4,32(%rsi) 792 pxor %xmm13,%xmm5 793 movdqu %xmm5,48(%rsi) 794 jmp L$ctr32_done 795 796.p2align 5 797L$ctr32_loop3: 798.byte 102,15,56,220,209 799 leaq 16(%rcx),%rcx 800 decl %eax 801.byte 102,15,56,220,217 802.byte 102,15,56,220,225 803 movups (%rcx),%xmm1 804 jnz L$ctr32_loop3 805.byte 102,15,56,221,209 806.byte 102,15,56,221,217 807.byte 102,15,56,221,225 808 809 movups (%rdi),%xmm10 810 xorps %xmm10,%xmm2 811 movups %xmm2,(%rsi) 812 cmpq $2,%rdx 813 jb L$ctr32_done 814 815 movups 16(%rdi),%xmm11 816 xorps %xmm11,%xmm3 817 movups %xmm3,16(%rsi) 818 je L$ctr32_done 819 820 movups 32(%rdi),%xmm12 821 xorps %xmm12,%xmm4 822 movups %xmm4,32(%rsi) 823 824L$ctr32_done: 825 xorps %xmm0,%xmm0 826 xorl %ebp,%ebp 827 pxor %xmm1,%xmm1 828 pxor %xmm2,%xmm2 829 pxor %xmm3,%xmm3 830 pxor %xmm4,%xmm4 831 pxor %xmm5,%xmm5 832 pxor %xmm6,%xmm6 833 pxor %xmm7,%xmm7 834 movaps %xmm0,0(%rsp) 835 pxor %xmm8,%xmm8 836 movaps %xmm0,16(%rsp) 837 pxor %xmm9,%xmm9 838 movaps %xmm0,32(%rsp) 839 pxor %xmm10,%xmm10 840 movaps %xmm0,48(%rsp) 841 pxor %xmm11,%xmm11 842 movaps %xmm0,64(%rsp) 843 pxor %xmm12,%xmm12 844 movaps %xmm0,80(%rsp) 845 pxor %xmm13,%xmm13 846 movaps %xmm0,96(%rsp) 847 pxor %xmm14,%xmm14 848 movaps %xmm0,112(%rsp) 849 pxor %xmm15,%xmm15 850 movq -8(%r11),%rbp 851 852 leaq (%r11),%rsp 853 854L$ctr32_epilogue: 855 ret 856 857 858.globl _aes_hw_set_encrypt_key 859.private_extern _aes_hw_set_encrypt_key 860 861.p2align 4 862_aes_hw_set_encrypt_key: 863__aesni_set_encrypt_key: 864 865_CET_ENDBR 866#ifdef BORINGSSL_DISPATCH_TEST 867 movb $1,_BORINGSSL_function_hit+3(%rip) 868#endif 869.byte 0x48,0x83,0xEC,0x08 870 871 movq $-1,%rax 872 testq %rdi,%rdi 873 jz L$enc_key_ret 874 testq %rdx,%rdx 875 jz L$enc_key_ret 876 877 movups (%rdi),%xmm0 878 xorps %xmm4,%xmm4 879 leaq _OPENSSL_ia32cap_P(%rip),%r10 880 movl 4(%r10),%r10d 881 andl $268437504,%r10d 882 leaq 16(%rdx),%rax 883 cmpl $256,%esi 884 je L$14rounds 885 886 cmpl $128,%esi 887 jne L$bad_keybits 888 889L$10rounds: 890 movl $9,%esi 891 cmpl $268435456,%r10d 892 je L$10rounds_alt 893 894 movups %xmm0,(%rdx) 895.byte 102,15,58,223,200,1 896 call L$key_expansion_128_cold 897.byte 102,15,58,223,200,2 898 call L$key_expansion_128 899.byte 102,15,58,223,200,4 900 call L$key_expansion_128 901.byte 102,15,58,223,200,8 902 call L$key_expansion_128 903.byte 102,15,58,223,200,16 904 call L$key_expansion_128 905.byte 102,15,58,223,200,32 906 call L$key_expansion_128 907.byte 102,15,58,223,200,64 908 call L$key_expansion_128 909.byte 102,15,58,223,200,128 910 call L$key_expansion_128 911.byte 102,15,58,223,200,27 912 call L$key_expansion_128 913.byte 102,15,58,223,200,54 914 call L$key_expansion_128 915 movups %xmm0,(%rax) 916 movl %esi,80(%rax) 917 xorl %eax,%eax 918 jmp L$enc_key_ret 919 920.p2align 4 921L$10rounds_alt: 922 movdqa L$key_rotate(%rip),%xmm5 923 movl $8,%r10d 924 movdqa L$key_rcon1(%rip),%xmm4 925 movdqa %xmm0,%xmm2 926 movdqu %xmm0,(%rdx) 927 jmp L$oop_key128 928 929.p2align 4 930L$oop_key128: 931.byte 102,15,56,0,197 932.byte 102,15,56,221,196 933 pslld $1,%xmm4 934 leaq 16(%rax),%rax 935 936 movdqa %xmm2,%xmm3 937 pslldq $4,%xmm2 938 pxor %xmm2,%xmm3 939 pslldq $4,%xmm2 940 pxor %xmm2,%xmm3 941 pslldq $4,%xmm2 942 pxor %xmm3,%xmm2 943 944 pxor %xmm2,%xmm0 945 movdqu %xmm0,-16(%rax) 946 movdqa %xmm0,%xmm2 947 948 decl %r10d 949 jnz L$oop_key128 950 951 movdqa L$key_rcon1b(%rip),%xmm4 952 953.byte 102,15,56,0,197 954.byte 102,15,56,221,196 955 pslld $1,%xmm4 956 957 movdqa %xmm2,%xmm3 958 pslldq $4,%xmm2 959 pxor %xmm2,%xmm3 960 pslldq $4,%xmm2 961 pxor %xmm2,%xmm3 962 pslldq $4,%xmm2 963 pxor %xmm3,%xmm2 964 965 pxor %xmm2,%xmm0 966 movdqu %xmm0,(%rax) 967 968 movdqa %xmm0,%xmm2 969.byte 102,15,56,0,197 970.byte 102,15,56,221,196 971 972 movdqa %xmm2,%xmm3 973 pslldq $4,%xmm2 974 pxor %xmm2,%xmm3 975 pslldq $4,%xmm2 976 pxor %xmm2,%xmm3 977 pslldq $4,%xmm2 978 pxor %xmm3,%xmm2 979 980 pxor %xmm2,%xmm0 981 movdqu %xmm0,16(%rax) 982 983 movl %esi,96(%rax) 984 xorl %eax,%eax 985 jmp L$enc_key_ret 986 987 988 989.p2align 4 990L$14rounds: 991 movups 16(%rdi),%xmm2 992 movl $13,%esi 993 leaq 16(%rax),%rax 994 cmpl $268435456,%r10d 995 je L$14rounds_alt 996 997 movups %xmm0,(%rdx) 998 movups %xmm2,16(%rdx) 999.byte 102,15,58,223,202,1 1000 call L$key_expansion_256a_cold 1001.byte 102,15,58,223,200,1 1002 call L$key_expansion_256b 1003.byte 102,15,58,223,202,2 1004 call L$key_expansion_256a 1005.byte 102,15,58,223,200,2 1006 call L$key_expansion_256b 1007.byte 102,15,58,223,202,4 1008 call L$key_expansion_256a 1009.byte 102,15,58,223,200,4 1010 call L$key_expansion_256b 1011.byte 102,15,58,223,202,8 1012 call L$key_expansion_256a 1013.byte 102,15,58,223,200,8 1014 call L$key_expansion_256b 1015.byte 102,15,58,223,202,16 1016 call L$key_expansion_256a 1017.byte 102,15,58,223,200,16 1018 call L$key_expansion_256b 1019.byte 102,15,58,223,202,32 1020 call L$key_expansion_256a 1021.byte 102,15,58,223,200,32 1022 call L$key_expansion_256b 1023.byte 102,15,58,223,202,64 1024 call L$key_expansion_256a 1025 movups %xmm0,(%rax) 1026 movl %esi,16(%rax) 1027 xorq %rax,%rax 1028 jmp L$enc_key_ret 1029 1030.p2align 4 1031L$14rounds_alt: 1032 movdqa L$key_rotate(%rip),%xmm5 1033 movdqa L$key_rcon1(%rip),%xmm4 1034 movl $7,%r10d 1035 movdqu %xmm0,0(%rdx) 1036 movdqa %xmm2,%xmm1 1037 movdqu %xmm2,16(%rdx) 1038 jmp L$oop_key256 1039 1040.p2align 4 1041L$oop_key256: 1042.byte 102,15,56,0,213 1043.byte 102,15,56,221,212 1044 1045 movdqa %xmm0,%xmm3 1046 pslldq $4,%xmm0 1047 pxor %xmm0,%xmm3 1048 pslldq $4,%xmm0 1049 pxor %xmm0,%xmm3 1050 pslldq $4,%xmm0 1051 pxor %xmm3,%xmm0 1052 pslld $1,%xmm4 1053 1054 pxor %xmm2,%xmm0 1055 movdqu %xmm0,(%rax) 1056 1057 decl %r10d 1058 jz L$done_key256 1059 1060 pshufd $0xff,%xmm0,%xmm2 1061 pxor %xmm3,%xmm3 1062.byte 102,15,56,221,211 1063 1064 movdqa %xmm1,%xmm3 1065 pslldq $4,%xmm1 1066 pxor %xmm1,%xmm3 1067 pslldq $4,%xmm1 1068 pxor %xmm1,%xmm3 1069 pslldq $4,%xmm1 1070 pxor %xmm3,%xmm1 1071 1072 pxor %xmm1,%xmm2 1073 movdqu %xmm2,16(%rax) 1074 leaq 32(%rax),%rax 1075 movdqa %xmm2,%xmm1 1076 1077 jmp L$oop_key256 1078 1079L$done_key256: 1080 movl %esi,16(%rax) 1081 xorl %eax,%eax 1082 jmp L$enc_key_ret 1083 1084.p2align 4 1085L$bad_keybits: 1086 movq $-2,%rax 1087L$enc_key_ret: 1088 pxor %xmm0,%xmm0 1089 pxor %xmm1,%xmm1 1090 pxor %xmm2,%xmm2 1091 pxor %xmm3,%xmm3 1092 pxor %xmm4,%xmm4 1093 pxor %xmm5,%xmm5 1094 addq $8,%rsp 1095 1096 ret 1097 1098L$SEH_end_set_encrypt_key: 1099 1100.p2align 4 1101L$key_expansion_128: 1102 movups %xmm0,(%rax) 1103 leaq 16(%rax),%rax 1104L$key_expansion_128_cold: 1105 shufps $16,%xmm0,%xmm4 1106 xorps %xmm4,%xmm0 1107 shufps $140,%xmm0,%xmm4 1108 xorps %xmm4,%xmm0 1109 shufps $255,%xmm1,%xmm1 1110 xorps %xmm1,%xmm0 1111 ret 1112 1113.p2align 4 1114L$key_expansion_192a: 1115 movups %xmm0,(%rax) 1116 leaq 16(%rax),%rax 1117L$key_expansion_192a_cold: 1118 movaps %xmm2,%xmm5 1119L$key_expansion_192b_warm: 1120 shufps $16,%xmm0,%xmm4 1121 movdqa %xmm2,%xmm3 1122 xorps %xmm4,%xmm0 1123 shufps $140,%xmm0,%xmm4 1124 pslldq $4,%xmm3 1125 xorps %xmm4,%xmm0 1126 pshufd $85,%xmm1,%xmm1 1127 pxor %xmm3,%xmm2 1128 pxor %xmm1,%xmm0 1129 pshufd $255,%xmm0,%xmm3 1130 pxor %xmm3,%xmm2 1131 ret 1132 1133.p2align 4 1134L$key_expansion_192b: 1135 movaps %xmm0,%xmm3 1136 shufps $68,%xmm0,%xmm5 1137 movups %xmm5,(%rax) 1138 shufps $78,%xmm2,%xmm3 1139 movups %xmm3,16(%rax) 1140 leaq 32(%rax),%rax 1141 jmp L$key_expansion_192b_warm 1142 1143.p2align 4 1144L$key_expansion_256a: 1145 movups %xmm2,(%rax) 1146 leaq 16(%rax),%rax 1147L$key_expansion_256a_cold: 1148 shufps $16,%xmm0,%xmm4 1149 xorps %xmm4,%xmm0 1150 shufps $140,%xmm0,%xmm4 1151 xorps %xmm4,%xmm0 1152 shufps $255,%xmm1,%xmm1 1153 xorps %xmm1,%xmm0 1154 ret 1155 1156.p2align 4 1157L$key_expansion_256b: 1158 movups %xmm0,(%rax) 1159 leaq 16(%rax),%rax 1160 1161 shufps $16,%xmm2,%xmm4 1162 xorps %xmm4,%xmm2 1163 shufps $140,%xmm2,%xmm4 1164 xorps %xmm4,%xmm2 1165 shufps $170,%xmm1,%xmm1 1166 xorps %xmm1,%xmm2 1167 ret 1168 1169 1170.section __DATA,__const 1171.p2align 6 1172L$bswap_mask: 1173.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1174L$increment32: 1175.long 6,6,6,0 1176L$increment64: 1177.long 1,0,0,0 1178L$increment1: 1179.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 1180L$key_rotate: 1181.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 1182L$key_rotate192: 1183.long 0x04070605,0x04070605,0x04070605,0x04070605 1184L$key_rcon1: 1185.long 1,1,1,1 1186L$key_rcon1b: 1187.long 0x1b,0x1b,0x1b,0x1b 1188 1189.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1190.p2align 6 1191.text 1192#endif 1193