1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) 7.text 8.globl _ChaCha20_ctr32_nohw 9.private_extern _ChaCha20_ctr32_nohw 10.align 4 11_ChaCha20_ctr32_nohw: 12L_ChaCha20_ctr32_nohw_begin: 13 pushl %ebp 14 pushl %ebx 15 pushl %esi 16 pushl %edi 17 movl 32(%esp),%esi 18 movl 36(%esp),%edi 19 subl $132,%esp 20 movl (%esi),%eax 21 movl 4(%esi),%ebx 22 movl 8(%esi),%ecx 23 movl 12(%esi),%edx 24 movl %eax,80(%esp) 25 movl %ebx,84(%esp) 26 movl %ecx,88(%esp) 27 movl %edx,92(%esp) 28 movl 16(%esi),%eax 29 movl 20(%esi),%ebx 30 movl 24(%esi),%ecx 31 movl 28(%esi),%edx 32 movl %eax,96(%esp) 33 movl %ebx,100(%esp) 34 movl %ecx,104(%esp) 35 movl %edx,108(%esp) 36 movl (%edi),%eax 37 movl 4(%edi),%ebx 38 movl 8(%edi),%ecx 39 movl 12(%edi),%edx 40 subl $1,%eax 41 movl %eax,112(%esp) 42 movl %ebx,116(%esp) 43 movl %ecx,120(%esp) 44 movl %edx,124(%esp) 45 jmp L000entry 46.align 4,0x90 47L001outer_loop: 48 movl %ebx,156(%esp) 49 movl %eax,152(%esp) 50 movl %ecx,160(%esp) 51L000entry: 52 movl $1634760805,%eax 53 movl $857760878,4(%esp) 54 movl $2036477234,8(%esp) 55 movl $1797285236,12(%esp) 56 movl 84(%esp),%ebx 57 movl 88(%esp),%ebp 58 movl 104(%esp),%ecx 59 movl 108(%esp),%esi 60 movl 116(%esp),%edx 61 movl 120(%esp),%edi 62 movl %ebx,20(%esp) 63 movl %ebp,24(%esp) 64 movl %ecx,40(%esp) 65 movl %esi,44(%esp) 66 movl %edx,52(%esp) 67 movl %edi,56(%esp) 68 movl 92(%esp),%ebx 69 movl 124(%esp),%edi 70 movl 112(%esp),%edx 71 movl 80(%esp),%ebp 72 movl 96(%esp),%ecx 73 movl 100(%esp),%esi 74 addl $1,%edx 75 movl %ebx,28(%esp) 76 movl %edi,60(%esp) 77 movl %edx,112(%esp) 78 movl $10,%ebx 79 jmp L002loop 80.align 4,0x90 81L002loop: 82 addl %ebp,%eax 83 movl %ebx,128(%esp) 84 movl %ebp,%ebx 85 xorl %eax,%edx 86 roll $16,%edx 87 addl %edx,%ecx 88 xorl %ecx,%ebx 89 movl 52(%esp),%edi 90 roll $12,%ebx 91 movl 20(%esp),%ebp 92 addl %ebx,%eax 93 xorl %eax,%edx 94 movl %eax,(%esp) 95 roll $8,%edx 96 movl 4(%esp),%eax 97 addl %edx,%ecx 98 movl %edx,48(%esp) 99 xorl %ecx,%ebx 100 addl %ebp,%eax 101 roll $7,%ebx 102 xorl %eax,%edi 103 movl %ecx,32(%esp) 104 roll $16,%edi 105 movl %ebx,16(%esp) 106 addl %edi,%esi 107 movl 40(%esp),%ecx 108 xorl %esi,%ebp 109 movl 56(%esp),%edx 110 roll $12,%ebp 111 movl 24(%esp),%ebx 112 addl %ebp,%eax 113 xorl %eax,%edi 114 movl %eax,4(%esp) 115 roll $8,%edi 116 movl 8(%esp),%eax 117 addl %edi,%esi 118 movl %edi,52(%esp) 119 xorl %esi,%ebp 120 addl %ebx,%eax 121 roll $7,%ebp 122 xorl %eax,%edx 123 movl %esi,36(%esp) 124 roll $16,%edx 125 movl %ebp,20(%esp) 126 addl %edx,%ecx 127 movl 44(%esp),%esi 128 xorl %ecx,%ebx 129 movl 60(%esp),%edi 130 roll $12,%ebx 131 movl 28(%esp),%ebp 132 addl %ebx,%eax 133 xorl %eax,%edx 134 movl %eax,8(%esp) 135 roll $8,%edx 136 movl 12(%esp),%eax 137 addl %edx,%ecx 138 movl %edx,56(%esp) 139 xorl %ecx,%ebx 140 addl %ebp,%eax 141 roll $7,%ebx 142 xorl %eax,%edi 143 roll $16,%edi 144 movl %ebx,24(%esp) 145 addl %edi,%esi 146 xorl %esi,%ebp 147 roll $12,%ebp 148 movl 20(%esp),%ebx 149 addl %ebp,%eax 150 xorl %eax,%edi 151 movl %eax,12(%esp) 152 roll $8,%edi 153 movl (%esp),%eax 154 addl %edi,%esi 155 movl %edi,%edx 156 xorl %esi,%ebp 157 addl %ebx,%eax 158 roll $7,%ebp 159 xorl %eax,%edx 160 roll $16,%edx 161 movl %ebp,28(%esp) 162 addl %edx,%ecx 163 xorl %ecx,%ebx 164 movl 48(%esp),%edi 165 roll $12,%ebx 166 movl 24(%esp),%ebp 167 addl %ebx,%eax 168 xorl %eax,%edx 169 movl %eax,(%esp) 170 roll $8,%edx 171 movl 4(%esp),%eax 172 addl %edx,%ecx 173 movl %edx,60(%esp) 174 xorl %ecx,%ebx 175 addl %ebp,%eax 176 roll $7,%ebx 177 xorl %eax,%edi 178 movl %ecx,40(%esp) 179 roll $16,%edi 180 movl %ebx,20(%esp) 181 addl %edi,%esi 182 movl 32(%esp),%ecx 183 xorl %esi,%ebp 184 movl 52(%esp),%edx 185 roll $12,%ebp 186 movl 28(%esp),%ebx 187 addl %ebp,%eax 188 xorl %eax,%edi 189 movl %eax,4(%esp) 190 roll $8,%edi 191 movl 8(%esp),%eax 192 addl %edi,%esi 193 movl %edi,48(%esp) 194 xorl %esi,%ebp 195 addl %ebx,%eax 196 roll $7,%ebp 197 xorl %eax,%edx 198 movl %esi,44(%esp) 199 roll $16,%edx 200 movl %ebp,24(%esp) 201 addl %edx,%ecx 202 movl 36(%esp),%esi 203 xorl %ecx,%ebx 204 movl 56(%esp),%edi 205 roll $12,%ebx 206 movl 16(%esp),%ebp 207 addl %ebx,%eax 208 xorl %eax,%edx 209 movl %eax,8(%esp) 210 roll $8,%edx 211 movl 12(%esp),%eax 212 addl %edx,%ecx 213 movl %edx,52(%esp) 214 xorl %ecx,%ebx 215 addl %ebp,%eax 216 roll $7,%ebx 217 xorl %eax,%edi 218 roll $16,%edi 219 movl %ebx,28(%esp) 220 addl %edi,%esi 221 xorl %esi,%ebp 222 movl 48(%esp),%edx 223 roll $12,%ebp 224 movl 128(%esp),%ebx 225 addl %ebp,%eax 226 xorl %eax,%edi 227 movl %eax,12(%esp) 228 roll $8,%edi 229 movl (%esp),%eax 230 addl %edi,%esi 231 movl %edi,56(%esp) 232 xorl %esi,%ebp 233 roll $7,%ebp 234 decl %ebx 235 jnz L002loop 236 movl 160(%esp),%ebx 237 addl $1634760805,%eax 238 addl 80(%esp),%ebp 239 addl 96(%esp),%ecx 240 addl 100(%esp),%esi 241 cmpl $64,%ebx 242 jb L003tail 243 movl 156(%esp),%ebx 244 addl 112(%esp),%edx 245 addl 120(%esp),%edi 246 xorl (%ebx),%eax 247 xorl 16(%ebx),%ebp 248 movl %eax,(%esp) 249 movl 152(%esp),%eax 250 xorl 32(%ebx),%ecx 251 xorl 36(%ebx),%esi 252 xorl 48(%ebx),%edx 253 xorl 56(%ebx),%edi 254 movl %ebp,16(%eax) 255 movl %ecx,32(%eax) 256 movl %esi,36(%eax) 257 movl %edx,48(%eax) 258 movl %edi,56(%eax) 259 movl 4(%esp),%ebp 260 movl 8(%esp),%ecx 261 movl 12(%esp),%esi 262 movl 20(%esp),%edx 263 movl 24(%esp),%edi 264 addl $857760878,%ebp 265 addl $2036477234,%ecx 266 addl $1797285236,%esi 267 addl 84(%esp),%edx 268 addl 88(%esp),%edi 269 xorl 4(%ebx),%ebp 270 xorl 8(%ebx),%ecx 271 xorl 12(%ebx),%esi 272 xorl 20(%ebx),%edx 273 xorl 24(%ebx),%edi 274 movl %ebp,4(%eax) 275 movl %ecx,8(%eax) 276 movl %esi,12(%eax) 277 movl %edx,20(%eax) 278 movl %edi,24(%eax) 279 movl 28(%esp),%ebp 280 movl 40(%esp),%ecx 281 movl 44(%esp),%esi 282 movl 52(%esp),%edx 283 movl 60(%esp),%edi 284 addl 92(%esp),%ebp 285 addl 104(%esp),%ecx 286 addl 108(%esp),%esi 287 addl 116(%esp),%edx 288 addl 124(%esp),%edi 289 xorl 28(%ebx),%ebp 290 xorl 40(%ebx),%ecx 291 xorl 44(%ebx),%esi 292 xorl 52(%ebx),%edx 293 xorl 60(%ebx),%edi 294 leal 64(%ebx),%ebx 295 movl %ebp,28(%eax) 296 movl (%esp),%ebp 297 movl %ecx,40(%eax) 298 movl 160(%esp),%ecx 299 movl %esi,44(%eax) 300 movl %edx,52(%eax) 301 movl %edi,60(%eax) 302 movl %ebp,(%eax) 303 leal 64(%eax),%eax 304 subl $64,%ecx 305 jnz L001outer_loop 306 jmp L004done 307L003tail: 308 addl 112(%esp),%edx 309 addl 120(%esp),%edi 310 movl %eax,(%esp) 311 movl %ebp,16(%esp) 312 movl %ecx,32(%esp) 313 movl %esi,36(%esp) 314 movl %edx,48(%esp) 315 movl %edi,56(%esp) 316 movl 4(%esp),%ebp 317 movl 8(%esp),%ecx 318 movl 12(%esp),%esi 319 movl 20(%esp),%edx 320 movl 24(%esp),%edi 321 addl $857760878,%ebp 322 addl $2036477234,%ecx 323 addl $1797285236,%esi 324 addl 84(%esp),%edx 325 addl 88(%esp),%edi 326 movl %ebp,4(%esp) 327 movl %ecx,8(%esp) 328 movl %esi,12(%esp) 329 movl %edx,20(%esp) 330 movl %edi,24(%esp) 331 movl 28(%esp),%ebp 332 movl 40(%esp),%ecx 333 movl 44(%esp),%esi 334 movl 52(%esp),%edx 335 movl 60(%esp),%edi 336 addl 92(%esp),%ebp 337 addl 104(%esp),%ecx 338 addl 108(%esp),%esi 339 addl 116(%esp),%edx 340 addl 124(%esp),%edi 341 movl %ebp,28(%esp) 342 movl 156(%esp),%ebp 343 movl %ecx,40(%esp) 344 movl 152(%esp),%ecx 345 movl %esi,44(%esp) 346 xorl %esi,%esi 347 movl %edx,52(%esp) 348 movl %edi,60(%esp) 349 xorl %eax,%eax 350 xorl %edx,%edx 351L005tail_loop: 352 movb (%esi,%ebp,1),%al 353 movb (%esp,%esi,1),%dl 354 leal 1(%esi),%esi 355 xorb %dl,%al 356 movb %al,-1(%ecx,%esi,1) 357 decl %ebx 358 jnz L005tail_loop 359L004done: 360 addl $132,%esp 361 popl %edi 362 popl %esi 363 popl %ebx 364 popl %ebp 365 ret 366.globl _ChaCha20_ctr32_ssse3 367.private_extern _ChaCha20_ctr32_ssse3 368.align 4 369_ChaCha20_ctr32_ssse3: 370L_ChaCha20_ctr32_ssse3_begin: 371 pushl %ebp 372 pushl %ebx 373 pushl %esi 374 pushl %edi 375 call Lpic_point 376Lpic_point: 377 popl %eax 378 movl 20(%esp),%edi 379 movl 24(%esp),%esi 380 movl 28(%esp),%ecx 381 movl 32(%esp),%edx 382 movl 36(%esp),%ebx 383 movl %esp,%ebp 384 subl $524,%esp 385 andl $-64,%esp 386 movl %ebp,512(%esp) 387 leal Lssse3_data-Lpic_point(%eax),%eax 388 movdqu (%ebx),%xmm3 389 cmpl $256,%ecx 390 jb L0061x 391 movl %edx,516(%esp) 392 movl %ebx,520(%esp) 393 subl $256,%ecx 394 leal 384(%esp),%ebp 395 movdqu (%edx),%xmm7 396 pshufd $0,%xmm3,%xmm0 397 pshufd $85,%xmm3,%xmm1 398 pshufd $170,%xmm3,%xmm2 399 pshufd $255,%xmm3,%xmm3 400 paddd 48(%eax),%xmm0 401 pshufd $0,%xmm7,%xmm4 402 pshufd $85,%xmm7,%xmm5 403 psubd 64(%eax),%xmm0 404 pshufd $170,%xmm7,%xmm6 405 pshufd $255,%xmm7,%xmm7 406 movdqa %xmm0,64(%ebp) 407 movdqa %xmm1,80(%ebp) 408 movdqa %xmm2,96(%ebp) 409 movdqa %xmm3,112(%ebp) 410 movdqu 16(%edx),%xmm3 411 movdqa %xmm4,-64(%ebp) 412 movdqa %xmm5,-48(%ebp) 413 movdqa %xmm6,-32(%ebp) 414 movdqa %xmm7,-16(%ebp) 415 movdqa 32(%eax),%xmm7 416 leal 128(%esp),%ebx 417 pshufd $0,%xmm3,%xmm0 418 pshufd $85,%xmm3,%xmm1 419 pshufd $170,%xmm3,%xmm2 420 pshufd $255,%xmm3,%xmm3 421 pshufd $0,%xmm7,%xmm4 422 pshufd $85,%xmm7,%xmm5 423 pshufd $170,%xmm7,%xmm6 424 pshufd $255,%xmm7,%xmm7 425 movdqa %xmm0,(%ebp) 426 movdqa %xmm1,16(%ebp) 427 movdqa %xmm2,32(%ebp) 428 movdqa %xmm3,48(%ebp) 429 movdqa %xmm4,-128(%ebp) 430 movdqa %xmm5,-112(%ebp) 431 movdqa %xmm6,-96(%ebp) 432 movdqa %xmm7,-80(%ebp) 433 leal 128(%esi),%esi 434 leal 128(%edi),%edi 435 jmp L007outer_loop 436.align 4,0x90 437L007outer_loop: 438 movdqa -112(%ebp),%xmm1 439 movdqa -96(%ebp),%xmm2 440 movdqa -80(%ebp),%xmm3 441 movdqa -48(%ebp),%xmm5 442 movdqa -32(%ebp),%xmm6 443 movdqa -16(%ebp),%xmm7 444 movdqa %xmm1,-112(%ebx) 445 movdqa %xmm2,-96(%ebx) 446 movdqa %xmm3,-80(%ebx) 447 movdqa %xmm5,-48(%ebx) 448 movdqa %xmm6,-32(%ebx) 449 movdqa %xmm7,-16(%ebx) 450 movdqa 32(%ebp),%xmm2 451 movdqa 48(%ebp),%xmm3 452 movdqa 64(%ebp),%xmm4 453 movdqa 80(%ebp),%xmm5 454 movdqa 96(%ebp),%xmm6 455 movdqa 112(%ebp),%xmm7 456 paddd 64(%eax),%xmm4 457 movdqa %xmm2,32(%ebx) 458 movdqa %xmm3,48(%ebx) 459 movdqa %xmm4,64(%ebx) 460 movdqa %xmm5,80(%ebx) 461 movdqa %xmm6,96(%ebx) 462 movdqa %xmm7,112(%ebx) 463 movdqa %xmm4,64(%ebp) 464 movdqa -128(%ebp),%xmm0 465 movdqa %xmm4,%xmm6 466 movdqa -64(%ebp),%xmm3 467 movdqa (%ebp),%xmm4 468 movdqa 16(%ebp),%xmm5 469 movl $10,%edx 470 nop 471.align 4,0x90 472L008loop: 473 paddd %xmm3,%xmm0 474 movdqa %xmm3,%xmm2 475 pxor %xmm0,%xmm6 476 pshufb (%eax),%xmm6 477 paddd %xmm6,%xmm4 478 pxor %xmm4,%xmm2 479 movdqa -48(%ebx),%xmm3 480 movdqa %xmm2,%xmm1 481 pslld $12,%xmm2 482 psrld $20,%xmm1 483 por %xmm1,%xmm2 484 movdqa -112(%ebx),%xmm1 485 paddd %xmm2,%xmm0 486 movdqa 80(%ebx),%xmm7 487 pxor %xmm0,%xmm6 488 movdqa %xmm0,-128(%ebx) 489 pshufb 16(%eax),%xmm6 490 paddd %xmm6,%xmm4 491 movdqa %xmm6,64(%ebx) 492 pxor %xmm4,%xmm2 493 paddd %xmm3,%xmm1 494 movdqa %xmm2,%xmm0 495 pslld $7,%xmm2 496 psrld $25,%xmm0 497 pxor %xmm1,%xmm7 498 por %xmm0,%xmm2 499 movdqa %xmm4,(%ebx) 500 pshufb (%eax),%xmm7 501 movdqa %xmm2,-64(%ebx) 502 paddd %xmm7,%xmm5 503 movdqa 32(%ebx),%xmm4 504 pxor %xmm5,%xmm3 505 movdqa -32(%ebx),%xmm2 506 movdqa %xmm3,%xmm0 507 pslld $12,%xmm3 508 psrld $20,%xmm0 509 por %xmm0,%xmm3 510 movdqa -96(%ebx),%xmm0 511 paddd %xmm3,%xmm1 512 movdqa 96(%ebx),%xmm6 513 pxor %xmm1,%xmm7 514 movdqa %xmm1,-112(%ebx) 515 pshufb 16(%eax),%xmm7 516 paddd %xmm7,%xmm5 517 movdqa %xmm7,80(%ebx) 518 pxor %xmm5,%xmm3 519 paddd %xmm2,%xmm0 520 movdqa %xmm3,%xmm1 521 pslld $7,%xmm3 522 psrld $25,%xmm1 523 pxor %xmm0,%xmm6 524 por %xmm1,%xmm3 525 movdqa %xmm5,16(%ebx) 526 pshufb (%eax),%xmm6 527 movdqa %xmm3,-48(%ebx) 528 paddd %xmm6,%xmm4 529 movdqa 48(%ebx),%xmm5 530 pxor %xmm4,%xmm2 531 movdqa -16(%ebx),%xmm3 532 movdqa %xmm2,%xmm1 533 pslld $12,%xmm2 534 psrld $20,%xmm1 535 por %xmm1,%xmm2 536 movdqa -80(%ebx),%xmm1 537 paddd %xmm2,%xmm0 538 movdqa 112(%ebx),%xmm7 539 pxor %xmm0,%xmm6 540 movdqa %xmm0,-96(%ebx) 541 pshufb 16(%eax),%xmm6 542 paddd %xmm6,%xmm4 543 movdqa %xmm6,96(%ebx) 544 pxor %xmm4,%xmm2 545 paddd %xmm3,%xmm1 546 movdqa %xmm2,%xmm0 547 pslld $7,%xmm2 548 psrld $25,%xmm0 549 pxor %xmm1,%xmm7 550 por %xmm0,%xmm2 551 pshufb (%eax),%xmm7 552 movdqa %xmm2,-32(%ebx) 553 paddd %xmm7,%xmm5 554 pxor %xmm5,%xmm3 555 movdqa -48(%ebx),%xmm2 556 movdqa %xmm3,%xmm0 557 pslld $12,%xmm3 558 psrld $20,%xmm0 559 por %xmm0,%xmm3 560 movdqa -128(%ebx),%xmm0 561 paddd %xmm3,%xmm1 562 pxor %xmm1,%xmm7 563 movdqa %xmm1,-80(%ebx) 564 pshufb 16(%eax),%xmm7 565 paddd %xmm7,%xmm5 566 movdqa %xmm7,%xmm6 567 pxor %xmm5,%xmm3 568 paddd %xmm2,%xmm0 569 movdqa %xmm3,%xmm1 570 pslld $7,%xmm3 571 psrld $25,%xmm1 572 pxor %xmm0,%xmm6 573 por %xmm1,%xmm3 574 pshufb (%eax),%xmm6 575 movdqa %xmm3,-16(%ebx) 576 paddd %xmm6,%xmm4 577 pxor %xmm4,%xmm2 578 movdqa -32(%ebx),%xmm3 579 movdqa %xmm2,%xmm1 580 pslld $12,%xmm2 581 psrld $20,%xmm1 582 por %xmm1,%xmm2 583 movdqa -112(%ebx),%xmm1 584 paddd %xmm2,%xmm0 585 movdqa 64(%ebx),%xmm7 586 pxor %xmm0,%xmm6 587 movdqa %xmm0,-128(%ebx) 588 pshufb 16(%eax),%xmm6 589 paddd %xmm6,%xmm4 590 movdqa %xmm6,112(%ebx) 591 pxor %xmm4,%xmm2 592 paddd %xmm3,%xmm1 593 movdqa %xmm2,%xmm0 594 pslld $7,%xmm2 595 psrld $25,%xmm0 596 pxor %xmm1,%xmm7 597 por %xmm0,%xmm2 598 movdqa %xmm4,32(%ebx) 599 pshufb (%eax),%xmm7 600 movdqa %xmm2,-48(%ebx) 601 paddd %xmm7,%xmm5 602 movdqa (%ebx),%xmm4 603 pxor %xmm5,%xmm3 604 movdqa -16(%ebx),%xmm2 605 movdqa %xmm3,%xmm0 606 pslld $12,%xmm3 607 psrld $20,%xmm0 608 por %xmm0,%xmm3 609 movdqa -96(%ebx),%xmm0 610 paddd %xmm3,%xmm1 611 movdqa 80(%ebx),%xmm6 612 pxor %xmm1,%xmm7 613 movdqa %xmm1,-112(%ebx) 614 pshufb 16(%eax),%xmm7 615 paddd %xmm7,%xmm5 616 movdqa %xmm7,64(%ebx) 617 pxor %xmm5,%xmm3 618 paddd %xmm2,%xmm0 619 movdqa %xmm3,%xmm1 620 pslld $7,%xmm3 621 psrld $25,%xmm1 622 pxor %xmm0,%xmm6 623 por %xmm1,%xmm3 624 movdqa %xmm5,48(%ebx) 625 pshufb (%eax),%xmm6 626 movdqa %xmm3,-32(%ebx) 627 paddd %xmm6,%xmm4 628 movdqa 16(%ebx),%xmm5 629 pxor %xmm4,%xmm2 630 movdqa -64(%ebx),%xmm3 631 movdqa %xmm2,%xmm1 632 pslld $12,%xmm2 633 psrld $20,%xmm1 634 por %xmm1,%xmm2 635 movdqa -80(%ebx),%xmm1 636 paddd %xmm2,%xmm0 637 movdqa 96(%ebx),%xmm7 638 pxor %xmm0,%xmm6 639 movdqa %xmm0,-96(%ebx) 640 pshufb 16(%eax),%xmm6 641 paddd %xmm6,%xmm4 642 movdqa %xmm6,80(%ebx) 643 pxor %xmm4,%xmm2 644 paddd %xmm3,%xmm1 645 movdqa %xmm2,%xmm0 646 pslld $7,%xmm2 647 psrld $25,%xmm0 648 pxor %xmm1,%xmm7 649 por %xmm0,%xmm2 650 pshufb (%eax),%xmm7 651 movdqa %xmm2,-16(%ebx) 652 paddd %xmm7,%xmm5 653 pxor %xmm5,%xmm3 654 movdqa %xmm3,%xmm0 655 pslld $12,%xmm3 656 psrld $20,%xmm0 657 por %xmm0,%xmm3 658 movdqa -128(%ebx),%xmm0 659 paddd %xmm3,%xmm1 660 movdqa 64(%ebx),%xmm6 661 pxor %xmm1,%xmm7 662 movdqa %xmm1,-80(%ebx) 663 pshufb 16(%eax),%xmm7 664 paddd %xmm7,%xmm5 665 movdqa %xmm7,96(%ebx) 666 pxor %xmm5,%xmm3 667 movdqa %xmm3,%xmm1 668 pslld $7,%xmm3 669 psrld $25,%xmm1 670 por %xmm1,%xmm3 671 decl %edx 672 jnz L008loop 673 movdqa %xmm3,-64(%ebx) 674 movdqa %xmm4,(%ebx) 675 movdqa %xmm5,16(%ebx) 676 movdqa %xmm6,64(%ebx) 677 movdqa %xmm7,96(%ebx) 678 movdqa -112(%ebx),%xmm1 679 movdqa -96(%ebx),%xmm2 680 movdqa -80(%ebx),%xmm3 681 paddd -128(%ebp),%xmm0 682 paddd -112(%ebp),%xmm1 683 paddd -96(%ebp),%xmm2 684 paddd -80(%ebp),%xmm3 685 movdqa %xmm0,%xmm6 686 punpckldq %xmm1,%xmm0 687 movdqa %xmm2,%xmm7 688 punpckldq %xmm3,%xmm2 689 punpckhdq %xmm1,%xmm6 690 punpckhdq %xmm3,%xmm7 691 movdqa %xmm0,%xmm1 692 punpcklqdq %xmm2,%xmm0 693 movdqa %xmm6,%xmm3 694 punpcklqdq %xmm7,%xmm6 695 punpckhqdq %xmm2,%xmm1 696 punpckhqdq %xmm7,%xmm3 697 movdqu -128(%esi),%xmm4 698 movdqu -64(%esi),%xmm5 699 movdqu (%esi),%xmm2 700 movdqu 64(%esi),%xmm7 701 leal 16(%esi),%esi 702 pxor %xmm0,%xmm4 703 movdqa -64(%ebx),%xmm0 704 pxor %xmm1,%xmm5 705 movdqa -48(%ebx),%xmm1 706 pxor %xmm2,%xmm6 707 movdqa -32(%ebx),%xmm2 708 pxor %xmm3,%xmm7 709 movdqa -16(%ebx),%xmm3 710 movdqu %xmm4,-128(%edi) 711 movdqu %xmm5,-64(%edi) 712 movdqu %xmm6,(%edi) 713 movdqu %xmm7,64(%edi) 714 leal 16(%edi),%edi 715 paddd -64(%ebp),%xmm0 716 paddd -48(%ebp),%xmm1 717 paddd -32(%ebp),%xmm2 718 paddd -16(%ebp),%xmm3 719 movdqa %xmm0,%xmm6 720 punpckldq %xmm1,%xmm0 721 movdqa %xmm2,%xmm7 722 punpckldq %xmm3,%xmm2 723 punpckhdq %xmm1,%xmm6 724 punpckhdq %xmm3,%xmm7 725 movdqa %xmm0,%xmm1 726 punpcklqdq %xmm2,%xmm0 727 movdqa %xmm6,%xmm3 728 punpcklqdq %xmm7,%xmm6 729 punpckhqdq %xmm2,%xmm1 730 punpckhqdq %xmm7,%xmm3 731 movdqu -128(%esi),%xmm4 732 movdqu -64(%esi),%xmm5 733 movdqu (%esi),%xmm2 734 movdqu 64(%esi),%xmm7 735 leal 16(%esi),%esi 736 pxor %xmm0,%xmm4 737 movdqa (%ebx),%xmm0 738 pxor %xmm1,%xmm5 739 movdqa 16(%ebx),%xmm1 740 pxor %xmm2,%xmm6 741 movdqa 32(%ebx),%xmm2 742 pxor %xmm3,%xmm7 743 movdqa 48(%ebx),%xmm3 744 movdqu %xmm4,-128(%edi) 745 movdqu %xmm5,-64(%edi) 746 movdqu %xmm6,(%edi) 747 movdqu %xmm7,64(%edi) 748 leal 16(%edi),%edi 749 paddd (%ebp),%xmm0 750 paddd 16(%ebp),%xmm1 751 paddd 32(%ebp),%xmm2 752 paddd 48(%ebp),%xmm3 753 movdqa %xmm0,%xmm6 754 punpckldq %xmm1,%xmm0 755 movdqa %xmm2,%xmm7 756 punpckldq %xmm3,%xmm2 757 punpckhdq %xmm1,%xmm6 758 punpckhdq %xmm3,%xmm7 759 movdqa %xmm0,%xmm1 760 punpcklqdq %xmm2,%xmm0 761 movdqa %xmm6,%xmm3 762 punpcklqdq %xmm7,%xmm6 763 punpckhqdq %xmm2,%xmm1 764 punpckhqdq %xmm7,%xmm3 765 movdqu -128(%esi),%xmm4 766 movdqu -64(%esi),%xmm5 767 movdqu (%esi),%xmm2 768 movdqu 64(%esi),%xmm7 769 leal 16(%esi),%esi 770 pxor %xmm0,%xmm4 771 movdqa 64(%ebx),%xmm0 772 pxor %xmm1,%xmm5 773 movdqa 80(%ebx),%xmm1 774 pxor %xmm2,%xmm6 775 movdqa 96(%ebx),%xmm2 776 pxor %xmm3,%xmm7 777 movdqa 112(%ebx),%xmm3 778 movdqu %xmm4,-128(%edi) 779 movdqu %xmm5,-64(%edi) 780 movdqu %xmm6,(%edi) 781 movdqu %xmm7,64(%edi) 782 leal 16(%edi),%edi 783 paddd 64(%ebp),%xmm0 784 paddd 80(%ebp),%xmm1 785 paddd 96(%ebp),%xmm2 786 paddd 112(%ebp),%xmm3 787 movdqa %xmm0,%xmm6 788 punpckldq %xmm1,%xmm0 789 movdqa %xmm2,%xmm7 790 punpckldq %xmm3,%xmm2 791 punpckhdq %xmm1,%xmm6 792 punpckhdq %xmm3,%xmm7 793 movdqa %xmm0,%xmm1 794 punpcklqdq %xmm2,%xmm0 795 movdqa %xmm6,%xmm3 796 punpcklqdq %xmm7,%xmm6 797 punpckhqdq %xmm2,%xmm1 798 punpckhqdq %xmm7,%xmm3 799 movdqu -128(%esi),%xmm4 800 movdqu -64(%esi),%xmm5 801 movdqu (%esi),%xmm2 802 movdqu 64(%esi),%xmm7 803 leal 208(%esi),%esi 804 pxor %xmm0,%xmm4 805 pxor %xmm1,%xmm5 806 pxor %xmm2,%xmm6 807 pxor %xmm3,%xmm7 808 movdqu %xmm4,-128(%edi) 809 movdqu %xmm5,-64(%edi) 810 movdqu %xmm6,(%edi) 811 movdqu %xmm7,64(%edi) 812 leal 208(%edi),%edi 813 subl $256,%ecx 814 jnc L007outer_loop 815 addl $256,%ecx 816 jz L009done 817 movl 520(%esp),%ebx 818 leal -128(%esi),%esi 819 movl 516(%esp),%edx 820 leal -128(%edi),%edi 821 movd 64(%ebp),%xmm2 822 movdqu (%ebx),%xmm3 823 paddd 96(%eax),%xmm2 824 pand 112(%eax),%xmm3 825 por %xmm2,%xmm3 826L0061x: 827 movdqa 32(%eax),%xmm0 828 movdqu (%edx),%xmm1 829 movdqu 16(%edx),%xmm2 830 movdqa (%eax),%xmm6 831 movdqa 16(%eax),%xmm7 832 movl %ebp,48(%esp) 833 movdqa %xmm0,(%esp) 834 movdqa %xmm1,16(%esp) 835 movdqa %xmm2,32(%esp) 836 movdqa %xmm3,48(%esp) 837 movl $10,%edx 838 jmp L010loop1x 839.align 4,0x90 840L011outer1x: 841 movdqa 80(%eax),%xmm3 842 movdqa (%esp),%xmm0 843 movdqa 16(%esp),%xmm1 844 movdqa 32(%esp),%xmm2 845 paddd 48(%esp),%xmm3 846 movl $10,%edx 847 movdqa %xmm3,48(%esp) 848 jmp L010loop1x 849.align 4,0x90 850L010loop1x: 851 paddd %xmm1,%xmm0 852 pxor %xmm0,%xmm3 853.byte 102,15,56,0,222 854 paddd %xmm3,%xmm2 855 pxor %xmm2,%xmm1 856 movdqa %xmm1,%xmm4 857 psrld $20,%xmm1 858 pslld $12,%xmm4 859 por %xmm4,%xmm1 860 paddd %xmm1,%xmm0 861 pxor %xmm0,%xmm3 862.byte 102,15,56,0,223 863 paddd %xmm3,%xmm2 864 pxor %xmm2,%xmm1 865 movdqa %xmm1,%xmm4 866 psrld $25,%xmm1 867 pslld $7,%xmm4 868 por %xmm4,%xmm1 869 pshufd $78,%xmm2,%xmm2 870 pshufd $57,%xmm1,%xmm1 871 pshufd $147,%xmm3,%xmm3 872 nop 873 paddd %xmm1,%xmm0 874 pxor %xmm0,%xmm3 875.byte 102,15,56,0,222 876 paddd %xmm3,%xmm2 877 pxor %xmm2,%xmm1 878 movdqa %xmm1,%xmm4 879 psrld $20,%xmm1 880 pslld $12,%xmm4 881 por %xmm4,%xmm1 882 paddd %xmm1,%xmm0 883 pxor %xmm0,%xmm3 884.byte 102,15,56,0,223 885 paddd %xmm3,%xmm2 886 pxor %xmm2,%xmm1 887 movdqa %xmm1,%xmm4 888 psrld $25,%xmm1 889 pslld $7,%xmm4 890 por %xmm4,%xmm1 891 pshufd $78,%xmm2,%xmm2 892 pshufd $147,%xmm1,%xmm1 893 pshufd $57,%xmm3,%xmm3 894 decl %edx 895 jnz L010loop1x 896 paddd (%esp),%xmm0 897 paddd 16(%esp),%xmm1 898 paddd 32(%esp),%xmm2 899 paddd 48(%esp),%xmm3 900 cmpl $64,%ecx 901 jb L012tail 902 movdqu (%esi),%xmm4 903 movdqu 16(%esi),%xmm5 904 pxor %xmm4,%xmm0 905 movdqu 32(%esi),%xmm4 906 pxor %xmm5,%xmm1 907 movdqu 48(%esi),%xmm5 908 pxor %xmm4,%xmm2 909 pxor %xmm5,%xmm3 910 leal 64(%esi),%esi 911 movdqu %xmm0,(%edi) 912 movdqu %xmm1,16(%edi) 913 movdqu %xmm2,32(%edi) 914 movdqu %xmm3,48(%edi) 915 leal 64(%edi),%edi 916 subl $64,%ecx 917 jnz L011outer1x 918 jmp L009done 919L012tail: 920 movdqa %xmm0,(%esp) 921 movdqa %xmm1,16(%esp) 922 movdqa %xmm2,32(%esp) 923 movdqa %xmm3,48(%esp) 924 xorl %eax,%eax 925 xorl %edx,%edx 926 xorl %ebp,%ebp 927L013tail_loop: 928 movb (%esp,%ebp,1),%al 929 movb (%esi,%ebp,1),%dl 930 leal 1(%ebp),%ebp 931 xorb %dl,%al 932 movb %al,-1(%edi,%ebp,1) 933 decl %ecx 934 jnz L013tail_loop 935L009done: 936 movl 512(%esp),%esp 937 popl %edi 938 popl %esi 939 popl %ebx 940 popl %ebp 941 ret 942.align 6,0x90 943Lssse3_data: 944.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 945.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 946.long 1634760805,857760878,2036477234,1797285236 947.long 0,1,2,3 948.long 4,4,4,4 949.long 1,0,0,0 950.long 4,0,0,0 951.long 0,-1,-1,-1 952.align 6,0x90 953.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 954.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 955.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 956.byte 114,103,62,0 957#endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) 958