1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4%ifdef BORINGSSL_PREFIX 5%include "boringssl_prefix_symbols_nasm.inc" 6%endif 7%ifidn __OUTPUT_FORMAT__,obj 8section code use32 class=code align=64 9%elifidn __OUTPUT_FORMAT__,win32 10$@feat.00 equ 1 11section .text code align=64 12%else 13section .text code 14%endif 15global _ChaCha20_ctr32 16align 16 17_ChaCha20_ctr32: 18L$_ChaCha20_ctr32_begin: 19 push ebp 20 push ebx 21 push esi 22 push edi 23 xor eax,eax 24 cmp eax,DWORD [28+esp] 25 je NEAR L$000no_data 26 call L$pic_point 27L$pic_point: 28 pop eax 29 lea ebp,[_OPENSSL_ia32cap_P] 30 test DWORD [ebp],16777216 31 jz NEAR L$001x86 32 test DWORD [4+ebp],512 33 jz NEAR L$001x86 34 jmp NEAR L$ssse3_shortcut 35L$001x86: 36 mov esi,DWORD [32+esp] 37 mov edi,DWORD [36+esp] 38 sub esp,132 39 mov eax,DWORD [esi] 40 mov ebx,DWORD [4+esi] 41 mov ecx,DWORD [8+esi] 42 mov edx,DWORD [12+esi] 43 mov DWORD [80+esp],eax 44 mov DWORD [84+esp],ebx 45 mov DWORD [88+esp],ecx 46 mov DWORD [92+esp],edx 47 mov eax,DWORD [16+esi] 48 mov ebx,DWORD [20+esi] 49 mov ecx,DWORD [24+esi] 50 mov edx,DWORD [28+esi] 51 mov DWORD [96+esp],eax 52 mov DWORD [100+esp],ebx 53 mov DWORD [104+esp],ecx 54 mov DWORD [108+esp],edx 55 mov eax,DWORD [edi] 56 mov ebx,DWORD [4+edi] 57 mov ecx,DWORD [8+edi] 58 mov edx,DWORD [12+edi] 59 sub eax,1 60 mov DWORD [112+esp],eax 61 mov DWORD [116+esp],ebx 62 mov DWORD [120+esp],ecx 63 mov DWORD [124+esp],edx 64 jmp NEAR L$002entry 65align 16 66L$003outer_loop: 67 mov DWORD [156+esp],ebx 68 mov DWORD [152+esp],eax 69 mov DWORD [160+esp],ecx 70L$002entry: 71 mov eax,1634760805 72 mov DWORD [4+esp],857760878 73 mov DWORD [8+esp],2036477234 74 mov DWORD [12+esp],1797285236 75 mov ebx,DWORD [84+esp] 76 mov ebp,DWORD [88+esp] 77 mov ecx,DWORD [104+esp] 78 mov esi,DWORD [108+esp] 79 mov edx,DWORD [116+esp] 80 mov edi,DWORD [120+esp] 81 mov DWORD [20+esp],ebx 82 mov DWORD [24+esp],ebp 83 mov DWORD [40+esp],ecx 84 mov DWORD [44+esp],esi 85 mov DWORD [52+esp],edx 86 mov DWORD [56+esp],edi 87 mov ebx,DWORD [92+esp] 88 mov edi,DWORD [124+esp] 89 mov edx,DWORD [112+esp] 90 mov ebp,DWORD [80+esp] 91 mov ecx,DWORD [96+esp] 92 mov esi,DWORD [100+esp] 93 add edx,1 94 mov DWORD [28+esp],ebx 95 mov DWORD [60+esp],edi 96 mov DWORD [112+esp],edx 97 mov ebx,10 98 jmp NEAR L$004loop 99align 16 100L$004loop: 101 add eax,ebp 102 mov DWORD [128+esp],ebx 103 mov ebx,ebp 104 xor edx,eax 105 rol edx,16 106 add ecx,edx 107 xor ebx,ecx 108 mov edi,DWORD [52+esp] 109 rol ebx,12 110 mov ebp,DWORD [20+esp] 111 add eax,ebx 112 xor edx,eax 113 mov DWORD [esp],eax 114 rol edx,8 115 mov eax,DWORD [4+esp] 116 add ecx,edx 117 mov DWORD [48+esp],edx 118 xor ebx,ecx 119 add eax,ebp 120 rol ebx,7 121 xor edi,eax 122 mov DWORD [32+esp],ecx 123 rol edi,16 124 mov DWORD [16+esp],ebx 125 add esi,edi 126 mov ecx,DWORD [40+esp] 127 xor ebp,esi 128 mov edx,DWORD [56+esp] 129 rol ebp,12 130 mov ebx,DWORD [24+esp] 131 add eax,ebp 132 xor edi,eax 133 mov DWORD [4+esp],eax 134 rol edi,8 135 mov eax,DWORD [8+esp] 136 add esi,edi 137 mov DWORD [52+esp],edi 138 xor ebp,esi 139 add eax,ebx 140 rol ebp,7 141 xor edx,eax 142 mov DWORD [36+esp],esi 143 rol edx,16 144 mov DWORD [20+esp],ebp 145 add ecx,edx 146 mov esi,DWORD [44+esp] 147 xor ebx,ecx 148 mov edi,DWORD [60+esp] 149 rol ebx,12 150 mov ebp,DWORD [28+esp] 151 add eax,ebx 152 xor edx,eax 153 mov DWORD [8+esp],eax 154 rol edx,8 155 mov eax,DWORD [12+esp] 156 add ecx,edx 157 mov DWORD [56+esp],edx 158 xor ebx,ecx 159 add eax,ebp 160 rol ebx,7 161 xor edi,eax 162 rol edi,16 163 mov DWORD [24+esp],ebx 164 add esi,edi 165 xor ebp,esi 166 rol ebp,12 167 mov ebx,DWORD [20+esp] 168 add eax,ebp 169 xor edi,eax 170 mov DWORD [12+esp],eax 171 rol edi,8 172 mov eax,DWORD [esp] 173 add esi,edi 174 mov edx,edi 175 xor ebp,esi 176 add eax,ebx 177 rol ebp,7 178 xor edx,eax 179 rol edx,16 180 mov DWORD [28+esp],ebp 181 add ecx,edx 182 xor ebx,ecx 183 mov edi,DWORD [48+esp] 184 rol ebx,12 185 mov ebp,DWORD [24+esp] 186 add eax,ebx 187 xor edx,eax 188 mov DWORD [esp],eax 189 rol edx,8 190 mov eax,DWORD [4+esp] 191 add ecx,edx 192 mov DWORD [60+esp],edx 193 xor ebx,ecx 194 add eax,ebp 195 rol ebx,7 196 xor edi,eax 197 mov DWORD [40+esp],ecx 198 rol edi,16 199 mov DWORD [20+esp],ebx 200 add esi,edi 201 mov ecx,DWORD [32+esp] 202 xor ebp,esi 203 mov edx,DWORD [52+esp] 204 rol ebp,12 205 mov ebx,DWORD [28+esp] 206 add eax,ebp 207 xor edi,eax 208 mov DWORD [4+esp],eax 209 rol edi,8 210 mov eax,DWORD [8+esp] 211 add esi,edi 212 mov DWORD [48+esp],edi 213 xor ebp,esi 214 add eax,ebx 215 rol ebp,7 216 xor edx,eax 217 mov DWORD [44+esp],esi 218 rol edx,16 219 mov DWORD [24+esp],ebp 220 add ecx,edx 221 mov esi,DWORD [36+esp] 222 xor ebx,ecx 223 mov edi,DWORD [56+esp] 224 rol ebx,12 225 mov ebp,DWORD [16+esp] 226 add eax,ebx 227 xor edx,eax 228 mov DWORD [8+esp],eax 229 rol edx,8 230 mov eax,DWORD [12+esp] 231 add ecx,edx 232 mov DWORD [52+esp],edx 233 xor ebx,ecx 234 add eax,ebp 235 rol ebx,7 236 xor edi,eax 237 rol edi,16 238 mov DWORD [28+esp],ebx 239 add esi,edi 240 xor ebp,esi 241 mov edx,DWORD [48+esp] 242 rol ebp,12 243 mov ebx,DWORD [128+esp] 244 add eax,ebp 245 xor edi,eax 246 mov DWORD [12+esp],eax 247 rol edi,8 248 mov eax,DWORD [esp] 249 add esi,edi 250 mov DWORD [56+esp],edi 251 xor ebp,esi 252 rol ebp,7 253 dec ebx 254 jnz NEAR L$004loop 255 mov ebx,DWORD [160+esp] 256 add eax,1634760805 257 add ebp,DWORD [80+esp] 258 add ecx,DWORD [96+esp] 259 add esi,DWORD [100+esp] 260 cmp ebx,64 261 jb NEAR L$005tail 262 mov ebx,DWORD [156+esp] 263 add edx,DWORD [112+esp] 264 add edi,DWORD [120+esp] 265 xor eax,DWORD [ebx] 266 xor ebp,DWORD [16+ebx] 267 mov DWORD [esp],eax 268 mov eax,DWORD [152+esp] 269 xor ecx,DWORD [32+ebx] 270 xor esi,DWORD [36+ebx] 271 xor edx,DWORD [48+ebx] 272 xor edi,DWORD [56+ebx] 273 mov DWORD [16+eax],ebp 274 mov DWORD [32+eax],ecx 275 mov DWORD [36+eax],esi 276 mov DWORD [48+eax],edx 277 mov DWORD [56+eax],edi 278 mov ebp,DWORD [4+esp] 279 mov ecx,DWORD [8+esp] 280 mov esi,DWORD [12+esp] 281 mov edx,DWORD [20+esp] 282 mov edi,DWORD [24+esp] 283 add ebp,857760878 284 add ecx,2036477234 285 add esi,1797285236 286 add edx,DWORD [84+esp] 287 add edi,DWORD [88+esp] 288 xor ebp,DWORD [4+ebx] 289 xor ecx,DWORD [8+ebx] 290 xor esi,DWORD [12+ebx] 291 xor edx,DWORD [20+ebx] 292 xor edi,DWORD [24+ebx] 293 mov DWORD [4+eax],ebp 294 mov DWORD [8+eax],ecx 295 mov DWORD [12+eax],esi 296 mov DWORD [20+eax],edx 297 mov DWORD [24+eax],edi 298 mov ebp,DWORD [28+esp] 299 mov ecx,DWORD [40+esp] 300 mov esi,DWORD [44+esp] 301 mov edx,DWORD [52+esp] 302 mov edi,DWORD [60+esp] 303 add ebp,DWORD [92+esp] 304 add ecx,DWORD [104+esp] 305 add esi,DWORD [108+esp] 306 add edx,DWORD [116+esp] 307 add edi,DWORD [124+esp] 308 xor ebp,DWORD [28+ebx] 309 xor ecx,DWORD [40+ebx] 310 xor esi,DWORD [44+ebx] 311 xor edx,DWORD [52+ebx] 312 xor edi,DWORD [60+ebx] 313 lea ebx,[64+ebx] 314 mov DWORD [28+eax],ebp 315 mov ebp,DWORD [esp] 316 mov DWORD [40+eax],ecx 317 mov ecx,DWORD [160+esp] 318 mov DWORD [44+eax],esi 319 mov DWORD [52+eax],edx 320 mov DWORD [60+eax],edi 321 mov DWORD [eax],ebp 322 lea eax,[64+eax] 323 sub ecx,64 324 jnz NEAR L$003outer_loop 325 jmp NEAR L$006done 326L$005tail: 327 add edx,DWORD [112+esp] 328 add edi,DWORD [120+esp] 329 mov DWORD [esp],eax 330 mov DWORD [16+esp],ebp 331 mov DWORD [32+esp],ecx 332 mov DWORD [36+esp],esi 333 mov DWORD [48+esp],edx 334 mov DWORD [56+esp],edi 335 mov ebp,DWORD [4+esp] 336 mov ecx,DWORD [8+esp] 337 mov esi,DWORD [12+esp] 338 mov edx,DWORD [20+esp] 339 mov edi,DWORD [24+esp] 340 add ebp,857760878 341 add ecx,2036477234 342 add esi,1797285236 343 add edx,DWORD [84+esp] 344 add edi,DWORD [88+esp] 345 mov DWORD [4+esp],ebp 346 mov DWORD [8+esp],ecx 347 mov DWORD [12+esp],esi 348 mov DWORD [20+esp],edx 349 mov DWORD [24+esp],edi 350 mov ebp,DWORD [28+esp] 351 mov ecx,DWORD [40+esp] 352 mov esi,DWORD [44+esp] 353 mov edx,DWORD [52+esp] 354 mov edi,DWORD [60+esp] 355 add ebp,DWORD [92+esp] 356 add ecx,DWORD [104+esp] 357 add esi,DWORD [108+esp] 358 add edx,DWORD [116+esp] 359 add edi,DWORD [124+esp] 360 mov DWORD [28+esp],ebp 361 mov ebp,DWORD [156+esp] 362 mov DWORD [40+esp],ecx 363 mov ecx,DWORD [152+esp] 364 mov DWORD [44+esp],esi 365 xor esi,esi 366 mov DWORD [52+esp],edx 367 mov DWORD [60+esp],edi 368 xor eax,eax 369 xor edx,edx 370L$007tail_loop: 371 mov al,BYTE [ebp*1+esi] 372 mov dl,BYTE [esi*1+esp] 373 lea esi,[1+esi] 374 xor al,dl 375 mov BYTE [esi*1+ecx-1],al 376 dec ebx 377 jnz NEAR L$007tail_loop 378L$006done: 379 add esp,132 380L$000no_data: 381 pop edi 382 pop esi 383 pop ebx 384 pop ebp 385 ret 386global _ChaCha20_ssse3 387align 16 388_ChaCha20_ssse3: 389L$_ChaCha20_ssse3_begin: 390 push ebp 391 push ebx 392 push esi 393 push edi 394L$ssse3_shortcut: 395 mov edi,DWORD [20+esp] 396 mov esi,DWORD [24+esp] 397 mov ecx,DWORD [28+esp] 398 mov edx,DWORD [32+esp] 399 mov ebx,DWORD [36+esp] 400 mov ebp,esp 401 sub esp,524 402 and esp,-64 403 mov DWORD [512+esp],ebp 404 lea eax,[(L$ssse3_data-L$pic_point)+eax] 405 movdqu xmm3,[ebx] 406 cmp ecx,256 407 jb NEAR L$0081x 408 mov DWORD [516+esp],edx 409 mov DWORD [520+esp],ebx 410 sub ecx,256 411 lea ebp,[384+esp] 412 movdqu xmm7,[edx] 413 pshufd xmm0,xmm3,0 414 pshufd xmm1,xmm3,85 415 pshufd xmm2,xmm3,170 416 pshufd xmm3,xmm3,255 417 paddd xmm0,[48+eax] 418 pshufd xmm4,xmm7,0 419 pshufd xmm5,xmm7,85 420 psubd xmm0,[64+eax] 421 pshufd xmm6,xmm7,170 422 pshufd xmm7,xmm7,255 423 movdqa [64+ebp],xmm0 424 movdqa [80+ebp],xmm1 425 movdqa [96+ebp],xmm2 426 movdqa [112+ebp],xmm3 427 movdqu xmm3,[16+edx] 428 movdqa [ebp-64],xmm4 429 movdqa [ebp-48],xmm5 430 movdqa [ebp-32],xmm6 431 movdqa [ebp-16],xmm7 432 movdqa xmm7,[32+eax] 433 lea ebx,[128+esp] 434 pshufd xmm0,xmm3,0 435 pshufd xmm1,xmm3,85 436 pshufd xmm2,xmm3,170 437 pshufd xmm3,xmm3,255 438 pshufd xmm4,xmm7,0 439 pshufd xmm5,xmm7,85 440 pshufd xmm6,xmm7,170 441 pshufd xmm7,xmm7,255 442 movdqa [ebp],xmm0 443 movdqa [16+ebp],xmm1 444 movdqa [32+ebp],xmm2 445 movdqa [48+ebp],xmm3 446 movdqa [ebp-128],xmm4 447 movdqa [ebp-112],xmm5 448 movdqa [ebp-96],xmm6 449 movdqa [ebp-80],xmm7 450 lea esi,[128+esi] 451 lea edi,[128+edi] 452 jmp NEAR L$009outer_loop 453align 16 454L$009outer_loop: 455 movdqa xmm1,[ebp-112] 456 movdqa xmm2,[ebp-96] 457 movdqa xmm3,[ebp-80] 458 movdqa xmm5,[ebp-48] 459 movdqa xmm6,[ebp-32] 460 movdqa xmm7,[ebp-16] 461 movdqa [ebx-112],xmm1 462 movdqa [ebx-96],xmm2 463 movdqa [ebx-80],xmm3 464 movdqa [ebx-48],xmm5 465 movdqa [ebx-32],xmm6 466 movdqa [ebx-16],xmm7 467 movdqa xmm2,[32+ebp] 468 movdqa xmm3,[48+ebp] 469 movdqa xmm4,[64+ebp] 470 movdqa xmm5,[80+ebp] 471 movdqa xmm6,[96+ebp] 472 movdqa xmm7,[112+ebp] 473 paddd xmm4,[64+eax] 474 movdqa [32+ebx],xmm2 475 movdqa [48+ebx],xmm3 476 movdqa [64+ebx],xmm4 477 movdqa [80+ebx],xmm5 478 movdqa [96+ebx],xmm6 479 movdqa [112+ebx],xmm7 480 movdqa [64+ebp],xmm4 481 movdqa xmm0,[ebp-128] 482 movdqa xmm6,xmm4 483 movdqa xmm3,[ebp-64] 484 movdqa xmm4,[ebp] 485 movdqa xmm5,[16+ebp] 486 mov edx,10 487 nop 488align 16 489L$010loop: 490 paddd xmm0,xmm3 491 movdqa xmm2,xmm3 492 pxor xmm6,xmm0 493 pshufb xmm6,[eax] 494 paddd xmm4,xmm6 495 pxor xmm2,xmm4 496 movdqa xmm3,[ebx-48] 497 movdqa xmm1,xmm2 498 pslld xmm2,12 499 psrld xmm1,20 500 por xmm2,xmm1 501 movdqa xmm1,[ebx-112] 502 paddd xmm0,xmm2 503 movdqa xmm7,[80+ebx] 504 pxor xmm6,xmm0 505 movdqa [ebx-128],xmm0 506 pshufb xmm6,[16+eax] 507 paddd xmm4,xmm6 508 movdqa [64+ebx],xmm6 509 pxor xmm2,xmm4 510 paddd xmm1,xmm3 511 movdqa xmm0,xmm2 512 pslld xmm2,7 513 psrld xmm0,25 514 pxor xmm7,xmm1 515 por xmm2,xmm0 516 movdqa [ebx],xmm4 517 pshufb xmm7,[eax] 518 movdqa [ebx-64],xmm2 519 paddd xmm5,xmm7 520 movdqa xmm4,[32+ebx] 521 pxor xmm3,xmm5 522 movdqa xmm2,[ebx-32] 523 movdqa xmm0,xmm3 524 pslld xmm3,12 525 psrld xmm0,20 526 por xmm3,xmm0 527 movdqa xmm0,[ebx-96] 528 paddd xmm1,xmm3 529 movdqa xmm6,[96+ebx] 530 pxor xmm7,xmm1 531 movdqa [ebx-112],xmm1 532 pshufb xmm7,[16+eax] 533 paddd xmm5,xmm7 534 movdqa [80+ebx],xmm7 535 pxor xmm3,xmm5 536 paddd xmm0,xmm2 537 movdqa xmm1,xmm3 538 pslld xmm3,7 539 psrld xmm1,25 540 pxor xmm6,xmm0 541 por xmm3,xmm1 542 movdqa [16+ebx],xmm5 543 pshufb xmm6,[eax] 544 movdqa [ebx-48],xmm3 545 paddd xmm4,xmm6 546 movdqa xmm5,[48+ebx] 547 pxor xmm2,xmm4 548 movdqa xmm3,[ebx-16] 549 movdqa xmm1,xmm2 550 pslld xmm2,12 551 psrld xmm1,20 552 por xmm2,xmm1 553 movdqa xmm1,[ebx-80] 554 paddd xmm0,xmm2 555 movdqa xmm7,[112+ebx] 556 pxor xmm6,xmm0 557 movdqa [ebx-96],xmm0 558 pshufb xmm6,[16+eax] 559 paddd xmm4,xmm6 560 movdqa [96+ebx],xmm6 561 pxor xmm2,xmm4 562 paddd xmm1,xmm3 563 movdqa xmm0,xmm2 564 pslld xmm2,7 565 psrld xmm0,25 566 pxor xmm7,xmm1 567 por xmm2,xmm0 568 pshufb xmm7,[eax] 569 movdqa [ebx-32],xmm2 570 paddd xmm5,xmm7 571 pxor xmm3,xmm5 572 movdqa xmm2,[ebx-48] 573 movdqa xmm0,xmm3 574 pslld xmm3,12 575 psrld xmm0,20 576 por xmm3,xmm0 577 movdqa xmm0,[ebx-128] 578 paddd xmm1,xmm3 579 pxor xmm7,xmm1 580 movdqa [ebx-80],xmm1 581 pshufb xmm7,[16+eax] 582 paddd xmm5,xmm7 583 movdqa xmm6,xmm7 584 pxor xmm3,xmm5 585 paddd xmm0,xmm2 586 movdqa xmm1,xmm3 587 pslld xmm3,7 588 psrld xmm1,25 589 pxor xmm6,xmm0 590 por xmm3,xmm1 591 pshufb xmm6,[eax] 592 movdqa [ebx-16],xmm3 593 paddd xmm4,xmm6 594 pxor xmm2,xmm4 595 movdqa xmm3,[ebx-32] 596 movdqa xmm1,xmm2 597 pslld xmm2,12 598 psrld xmm1,20 599 por xmm2,xmm1 600 movdqa xmm1,[ebx-112] 601 paddd xmm0,xmm2 602 movdqa xmm7,[64+ebx] 603 pxor xmm6,xmm0 604 movdqa [ebx-128],xmm0 605 pshufb xmm6,[16+eax] 606 paddd xmm4,xmm6 607 movdqa [112+ebx],xmm6 608 pxor xmm2,xmm4 609 paddd xmm1,xmm3 610 movdqa xmm0,xmm2 611 pslld xmm2,7 612 psrld xmm0,25 613 pxor xmm7,xmm1 614 por xmm2,xmm0 615 movdqa [32+ebx],xmm4 616 pshufb xmm7,[eax] 617 movdqa [ebx-48],xmm2 618 paddd xmm5,xmm7 619 movdqa xmm4,[ebx] 620 pxor xmm3,xmm5 621 movdqa xmm2,[ebx-16] 622 movdqa xmm0,xmm3 623 pslld xmm3,12 624 psrld xmm0,20 625 por xmm3,xmm0 626 movdqa xmm0,[ebx-96] 627 paddd xmm1,xmm3 628 movdqa xmm6,[80+ebx] 629 pxor xmm7,xmm1 630 movdqa [ebx-112],xmm1 631 pshufb xmm7,[16+eax] 632 paddd xmm5,xmm7 633 movdqa [64+ebx],xmm7 634 pxor xmm3,xmm5 635 paddd xmm0,xmm2 636 movdqa xmm1,xmm3 637 pslld xmm3,7 638 psrld xmm1,25 639 pxor xmm6,xmm0 640 por xmm3,xmm1 641 movdqa [48+ebx],xmm5 642 pshufb xmm6,[eax] 643 movdqa [ebx-32],xmm3 644 paddd xmm4,xmm6 645 movdqa xmm5,[16+ebx] 646 pxor xmm2,xmm4 647 movdqa xmm3,[ebx-64] 648 movdqa xmm1,xmm2 649 pslld xmm2,12 650 psrld xmm1,20 651 por xmm2,xmm1 652 movdqa xmm1,[ebx-80] 653 paddd xmm0,xmm2 654 movdqa xmm7,[96+ebx] 655 pxor xmm6,xmm0 656 movdqa [ebx-96],xmm0 657 pshufb xmm6,[16+eax] 658 paddd xmm4,xmm6 659 movdqa [80+ebx],xmm6 660 pxor xmm2,xmm4 661 paddd xmm1,xmm3 662 movdqa xmm0,xmm2 663 pslld xmm2,7 664 psrld xmm0,25 665 pxor xmm7,xmm1 666 por xmm2,xmm0 667 pshufb xmm7,[eax] 668 movdqa [ebx-16],xmm2 669 paddd xmm5,xmm7 670 pxor xmm3,xmm5 671 movdqa xmm0,xmm3 672 pslld xmm3,12 673 psrld xmm0,20 674 por xmm3,xmm0 675 movdqa xmm0,[ebx-128] 676 paddd xmm1,xmm3 677 movdqa xmm6,[64+ebx] 678 pxor xmm7,xmm1 679 movdqa [ebx-80],xmm1 680 pshufb xmm7,[16+eax] 681 paddd xmm5,xmm7 682 movdqa [96+ebx],xmm7 683 pxor xmm3,xmm5 684 movdqa xmm1,xmm3 685 pslld xmm3,7 686 psrld xmm1,25 687 por xmm3,xmm1 688 dec edx 689 jnz NEAR L$010loop 690 movdqa [ebx-64],xmm3 691 movdqa [ebx],xmm4 692 movdqa [16+ebx],xmm5 693 movdqa [64+ebx],xmm6 694 movdqa [96+ebx],xmm7 695 movdqa xmm1,[ebx-112] 696 movdqa xmm2,[ebx-96] 697 movdqa xmm3,[ebx-80] 698 paddd xmm0,[ebp-128] 699 paddd xmm1,[ebp-112] 700 paddd xmm2,[ebp-96] 701 paddd xmm3,[ebp-80] 702 movdqa xmm6,xmm0 703 punpckldq xmm0,xmm1 704 movdqa xmm7,xmm2 705 punpckldq xmm2,xmm3 706 punpckhdq xmm6,xmm1 707 punpckhdq xmm7,xmm3 708 movdqa xmm1,xmm0 709 punpcklqdq xmm0,xmm2 710 movdqa xmm3,xmm6 711 punpcklqdq xmm6,xmm7 712 punpckhqdq xmm1,xmm2 713 punpckhqdq xmm3,xmm7 714 movdqu xmm4,[esi-128] 715 movdqu xmm5,[esi-64] 716 movdqu xmm2,[esi] 717 movdqu xmm7,[64+esi] 718 lea esi,[16+esi] 719 pxor xmm4,xmm0 720 movdqa xmm0,[ebx-64] 721 pxor xmm5,xmm1 722 movdqa xmm1,[ebx-48] 723 pxor xmm6,xmm2 724 movdqa xmm2,[ebx-32] 725 pxor xmm7,xmm3 726 movdqa xmm3,[ebx-16] 727 movdqu [edi-128],xmm4 728 movdqu [edi-64],xmm5 729 movdqu [edi],xmm6 730 movdqu [64+edi],xmm7 731 lea edi,[16+edi] 732 paddd xmm0,[ebp-64] 733 paddd xmm1,[ebp-48] 734 paddd xmm2,[ebp-32] 735 paddd xmm3,[ebp-16] 736 movdqa xmm6,xmm0 737 punpckldq xmm0,xmm1 738 movdqa xmm7,xmm2 739 punpckldq xmm2,xmm3 740 punpckhdq xmm6,xmm1 741 punpckhdq xmm7,xmm3 742 movdqa xmm1,xmm0 743 punpcklqdq xmm0,xmm2 744 movdqa xmm3,xmm6 745 punpcklqdq xmm6,xmm7 746 punpckhqdq xmm1,xmm2 747 punpckhqdq xmm3,xmm7 748 movdqu xmm4,[esi-128] 749 movdqu xmm5,[esi-64] 750 movdqu xmm2,[esi] 751 movdqu xmm7,[64+esi] 752 lea esi,[16+esi] 753 pxor xmm4,xmm0 754 movdqa xmm0,[ebx] 755 pxor xmm5,xmm1 756 movdqa xmm1,[16+ebx] 757 pxor xmm6,xmm2 758 movdqa xmm2,[32+ebx] 759 pxor xmm7,xmm3 760 movdqa xmm3,[48+ebx] 761 movdqu [edi-128],xmm4 762 movdqu [edi-64],xmm5 763 movdqu [edi],xmm6 764 movdqu [64+edi],xmm7 765 lea edi,[16+edi] 766 paddd xmm0,[ebp] 767 paddd xmm1,[16+ebp] 768 paddd xmm2,[32+ebp] 769 paddd xmm3,[48+ebp] 770 movdqa xmm6,xmm0 771 punpckldq xmm0,xmm1 772 movdqa xmm7,xmm2 773 punpckldq xmm2,xmm3 774 punpckhdq xmm6,xmm1 775 punpckhdq xmm7,xmm3 776 movdqa xmm1,xmm0 777 punpcklqdq xmm0,xmm2 778 movdqa xmm3,xmm6 779 punpcklqdq xmm6,xmm7 780 punpckhqdq xmm1,xmm2 781 punpckhqdq xmm3,xmm7 782 movdqu xmm4,[esi-128] 783 movdqu xmm5,[esi-64] 784 movdqu xmm2,[esi] 785 movdqu xmm7,[64+esi] 786 lea esi,[16+esi] 787 pxor xmm4,xmm0 788 movdqa xmm0,[64+ebx] 789 pxor xmm5,xmm1 790 movdqa xmm1,[80+ebx] 791 pxor xmm6,xmm2 792 movdqa xmm2,[96+ebx] 793 pxor xmm7,xmm3 794 movdqa xmm3,[112+ebx] 795 movdqu [edi-128],xmm4 796 movdqu [edi-64],xmm5 797 movdqu [edi],xmm6 798 movdqu [64+edi],xmm7 799 lea edi,[16+edi] 800 paddd xmm0,[64+ebp] 801 paddd xmm1,[80+ebp] 802 paddd xmm2,[96+ebp] 803 paddd xmm3,[112+ebp] 804 movdqa xmm6,xmm0 805 punpckldq xmm0,xmm1 806 movdqa xmm7,xmm2 807 punpckldq xmm2,xmm3 808 punpckhdq xmm6,xmm1 809 punpckhdq xmm7,xmm3 810 movdqa xmm1,xmm0 811 punpcklqdq xmm0,xmm2 812 movdqa xmm3,xmm6 813 punpcklqdq xmm6,xmm7 814 punpckhqdq xmm1,xmm2 815 punpckhqdq xmm3,xmm7 816 movdqu xmm4,[esi-128] 817 movdqu xmm5,[esi-64] 818 movdqu xmm2,[esi] 819 movdqu xmm7,[64+esi] 820 lea esi,[208+esi] 821 pxor xmm4,xmm0 822 pxor xmm5,xmm1 823 pxor xmm6,xmm2 824 pxor xmm7,xmm3 825 movdqu [edi-128],xmm4 826 movdqu [edi-64],xmm5 827 movdqu [edi],xmm6 828 movdqu [64+edi],xmm7 829 lea edi,[208+edi] 830 sub ecx,256 831 jnc NEAR L$009outer_loop 832 add ecx,256 833 jz NEAR L$011done 834 mov ebx,DWORD [520+esp] 835 lea esi,[esi-128] 836 mov edx,DWORD [516+esp] 837 lea edi,[edi-128] 838 movd xmm2,DWORD [64+ebp] 839 movdqu xmm3,[ebx] 840 paddd xmm2,[96+eax] 841 pand xmm3,[112+eax] 842 por xmm3,xmm2 843L$0081x: 844 movdqa xmm0,[32+eax] 845 movdqu xmm1,[edx] 846 movdqu xmm2,[16+edx] 847 movdqa xmm6,[eax] 848 movdqa xmm7,[16+eax] 849 mov DWORD [48+esp],ebp 850 movdqa [esp],xmm0 851 movdqa [16+esp],xmm1 852 movdqa [32+esp],xmm2 853 movdqa [48+esp],xmm3 854 mov edx,10 855 jmp NEAR L$012loop1x 856align 16 857L$013outer1x: 858 movdqa xmm3,[80+eax] 859 movdqa xmm0,[esp] 860 movdqa xmm1,[16+esp] 861 movdqa xmm2,[32+esp] 862 paddd xmm3,[48+esp] 863 mov edx,10 864 movdqa [48+esp],xmm3 865 jmp NEAR L$012loop1x 866align 16 867L$012loop1x: 868 paddd xmm0,xmm1 869 pxor xmm3,xmm0 870db 102,15,56,0,222 871 paddd xmm2,xmm3 872 pxor xmm1,xmm2 873 movdqa xmm4,xmm1 874 psrld xmm1,20 875 pslld xmm4,12 876 por xmm1,xmm4 877 paddd xmm0,xmm1 878 pxor xmm3,xmm0 879db 102,15,56,0,223 880 paddd xmm2,xmm3 881 pxor xmm1,xmm2 882 movdqa xmm4,xmm1 883 psrld xmm1,25 884 pslld xmm4,7 885 por xmm1,xmm4 886 pshufd xmm2,xmm2,78 887 pshufd xmm1,xmm1,57 888 pshufd xmm3,xmm3,147 889 nop 890 paddd xmm0,xmm1 891 pxor xmm3,xmm0 892db 102,15,56,0,222 893 paddd xmm2,xmm3 894 pxor xmm1,xmm2 895 movdqa xmm4,xmm1 896 psrld xmm1,20 897 pslld xmm4,12 898 por xmm1,xmm4 899 paddd xmm0,xmm1 900 pxor xmm3,xmm0 901db 102,15,56,0,223 902 paddd xmm2,xmm3 903 pxor xmm1,xmm2 904 movdqa xmm4,xmm1 905 psrld xmm1,25 906 pslld xmm4,7 907 por xmm1,xmm4 908 pshufd xmm2,xmm2,78 909 pshufd xmm1,xmm1,147 910 pshufd xmm3,xmm3,57 911 dec edx 912 jnz NEAR L$012loop1x 913 paddd xmm0,[esp] 914 paddd xmm1,[16+esp] 915 paddd xmm2,[32+esp] 916 paddd xmm3,[48+esp] 917 cmp ecx,64 918 jb NEAR L$014tail 919 movdqu xmm4,[esi] 920 movdqu xmm5,[16+esi] 921 pxor xmm0,xmm4 922 movdqu xmm4,[32+esi] 923 pxor xmm1,xmm5 924 movdqu xmm5,[48+esi] 925 pxor xmm2,xmm4 926 pxor xmm3,xmm5 927 lea esi,[64+esi] 928 movdqu [edi],xmm0 929 movdqu [16+edi],xmm1 930 movdqu [32+edi],xmm2 931 movdqu [48+edi],xmm3 932 lea edi,[64+edi] 933 sub ecx,64 934 jnz NEAR L$013outer1x 935 jmp NEAR L$011done 936L$014tail: 937 movdqa [esp],xmm0 938 movdqa [16+esp],xmm1 939 movdqa [32+esp],xmm2 940 movdqa [48+esp],xmm3 941 xor eax,eax 942 xor edx,edx 943 xor ebp,ebp 944L$015tail_loop: 945 mov al,BYTE [ebp*1+esp] 946 mov dl,BYTE [ebp*1+esi] 947 lea ebp,[1+ebp] 948 xor al,dl 949 mov BYTE [ebp*1+edi-1],al 950 dec ecx 951 jnz NEAR L$015tail_loop 952L$011done: 953 mov esp,DWORD [512+esp] 954 pop edi 955 pop esi 956 pop ebx 957 pop ebp 958 ret 959align 64 960L$ssse3_data: 961db 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 962db 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 963dd 1634760805,857760878,2036477234,1797285236 964dd 0,1,2,3 965dd 4,4,4,4 966dd 1,0,0,0 967dd 4,0,0,0 968dd 0,-1,-1,-1 969align 64 970db 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 971db 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 972db 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 973db 114,103,62,0 974segment .bss 975common _OPENSSL_ia32cap_P 16 976