1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4%include "ring_core_generated/prefix_symbols_nasm.inc" 5%ifidn __OUTPUT_FORMAT__, win32 6%ifidn __OUTPUT_FORMAT__,obj 7section code use32 class=code align=64 8%elifidn __OUTPUT_FORMAT__,win32 9$@feat.00 equ 1 10section .text code align=64 11%else 12section .text code 13%endif 14global _ChaCha20_ctr32 15align 16 16_ChaCha20_ctr32: 17L$_ChaCha20_ctr32_begin: 18 push ebp 19 push ebx 20 push esi 21 push edi 22 xor eax,eax 23 cmp eax,DWORD [28+esp] 24 je NEAR L$000no_data 25 call L$pic_point 26L$pic_point: 27 pop eax 28 lea ebp,[_OPENSSL_ia32cap_P] 29 test DWORD [ebp],16777216 30 jz NEAR L$001x86 31 test DWORD [4+ebp],512 32 jz NEAR L$001x86 33 jmp NEAR L$ssse3_shortcut 34L$001x86: 35 mov esi,DWORD [32+esp] 36 mov edi,DWORD [36+esp] 37 sub esp,132 38 mov eax,DWORD [esi] 39 mov ebx,DWORD [4+esi] 40 mov ecx,DWORD [8+esi] 41 mov edx,DWORD [12+esi] 42 mov DWORD [80+esp],eax 43 mov DWORD [84+esp],ebx 44 mov DWORD [88+esp],ecx 45 mov DWORD [92+esp],edx 46 mov eax,DWORD [16+esi] 47 mov ebx,DWORD [20+esi] 48 mov ecx,DWORD [24+esi] 49 mov edx,DWORD [28+esi] 50 mov DWORD [96+esp],eax 51 mov DWORD [100+esp],ebx 52 mov DWORD [104+esp],ecx 53 mov DWORD [108+esp],edx 54 mov eax,DWORD [edi] 55 mov ebx,DWORD [4+edi] 56 mov ecx,DWORD [8+edi] 57 mov edx,DWORD [12+edi] 58 sub eax,1 59 mov DWORD [112+esp],eax 60 mov DWORD [116+esp],ebx 61 mov DWORD [120+esp],ecx 62 mov DWORD [124+esp],edx 63 jmp NEAR L$002entry 64align 16 65L$003outer_loop: 66 mov DWORD [156+esp],ebx 67 mov DWORD [152+esp],eax 68 mov DWORD [160+esp],ecx 69L$002entry: 70 mov eax,1634760805 71 mov DWORD [4+esp],857760878 72 mov DWORD [8+esp],2036477234 73 mov DWORD [12+esp],1797285236 74 mov ebx,DWORD [84+esp] 75 mov ebp,DWORD [88+esp] 76 mov ecx,DWORD [104+esp] 77 mov esi,DWORD [108+esp] 78 mov edx,DWORD [116+esp] 79 mov edi,DWORD [120+esp] 80 mov DWORD [20+esp],ebx 81 mov DWORD [24+esp],ebp 82 mov DWORD [40+esp],ecx 83 mov DWORD [44+esp],esi 84 mov DWORD [52+esp],edx 85 mov DWORD [56+esp],edi 86 mov ebx,DWORD [92+esp] 87 mov edi,DWORD [124+esp] 88 mov edx,DWORD [112+esp] 89 mov ebp,DWORD [80+esp] 90 mov ecx,DWORD [96+esp] 91 mov esi,DWORD [100+esp] 92 add edx,1 93 mov DWORD [28+esp],ebx 94 mov DWORD [60+esp],edi 95 mov DWORD [112+esp],edx 96 mov ebx,10 97 jmp NEAR L$004loop 98align 16 99L$004loop: 100 add eax,ebp 101 mov DWORD [128+esp],ebx 102 mov ebx,ebp 103 xor edx,eax 104 rol edx,16 105 add ecx,edx 106 xor ebx,ecx 107 mov edi,DWORD [52+esp] 108 rol ebx,12 109 mov ebp,DWORD [20+esp] 110 add eax,ebx 111 xor edx,eax 112 mov DWORD [esp],eax 113 rol edx,8 114 mov eax,DWORD [4+esp] 115 add ecx,edx 116 mov DWORD [48+esp],edx 117 xor ebx,ecx 118 add eax,ebp 119 rol ebx,7 120 xor edi,eax 121 mov DWORD [32+esp],ecx 122 rol edi,16 123 mov DWORD [16+esp],ebx 124 add esi,edi 125 mov ecx,DWORD [40+esp] 126 xor ebp,esi 127 mov edx,DWORD [56+esp] 128 rol ebp,12 129 mov ebx,DWORD [24+esp] 130 add eax,ebp 131 xor edi,eax 132 mov DWORD [4+esp],eax 133 rol edi,8 134 mov eax,DWORD [8+esp] 135 add esi,edi 136 mov DWORD [52+esp],edi 137 xor ebp,esi 138 add eax,ebx 139 rol ebp,7 140 xor edx,eax 141 mov DWORD [36+esp],esi 142 rol edx,16 143 mov DWORD [20+esp],ebp 144 add ecx,edx 145 mov esi,DWORD [44+esp] 146 xor ebx,ecx 147 mov edi,DWORD [60+esp] 148 rol ebx,12 149 mov ebp,DWORD [28+esp] 150 add eax,ebx 151 xor edx,eax 152 mov DWORD [8+esp],eax 153 rol edx,8 154 mov eax,DWORD [12+esp] 155 add ecx,edx 156 mov DWORD [56+esp],edx 157 xor ebx,ecx 158 add eax,ebp 159 rol ebx,7 160 xor edi,eax 161 rol edi,16 162 mov DWORD [24+esp],ebx 163 add esi,edi 164 xor ebp,esi 165 rol ebp,12 166 mov ebx,DWORD [20+esp] 167 add eax,ebp 168 xor edi,eax 169 mov DWORD [12+esp],eax 170 rol edi,8 171 mov eax,DWORD [esp] 172 add esi,edi 173 mov edx,edi 174 xor ebp,esi 175 add eax,ebx 176 rol ebp,7 177 xor edx,eax 178 rol edx,16 179 mov DWORD [28+esp],ebp 180 add ecx,edx 181 xor ebx,ecx 182 mov edi,DWORD [48+esp] 183 rol ebx,12 184 mov ebp,DWORD [24+esp] 185 add eax,ebx 186 xor edx,eax 187 mov DWORD [esp],eax 188 rol edx,8 189 mov eax,DWORD [4+esp] 190 add ecx,edx 191 mov DWORD [60+esp],edx 192 xor ebx,ecx 193 add eax,ebp 194 rol ebx,7 195 xor edi,eax 196 mov DWORD [40+esp],ecx 197 rol edi,16 198 mov DWORD [20+esp],ebx 199 add esi,edi 200 mov ecx,DWORD [32+esp] 201 xor ebp,esi 202 mov edx,DWORD [52+esp] 203 rol ebp,12 204 mov ebx,DWORD [28+esp] 205 add eax,ebp 206 xor edi,eax 207 mov DWORD [4+esp],eax 208 rol edi,8 209 mov eax,DWORD [8+esp] 210 add esi,edi 211 mov DWORD [48+esp],edi 212 xor ebp,esi 213 add eax,ebx 214 rol ebp,7 215 xor edx,eax 216 mov DWORD [44+esp],esi 217 rol edx,16 218 mov DWORD [24+esp],ebp 219 add ecx,edx 220 mov esi,DWORD [36+esp] 221 xor ebx,ecx 222 mov edi,DWORD [56+esp] 223 rol ebx,12 224 mov ebp,DWORD [16+esp] 225 add eax,ebx 226 xor edx,eax 227 mov DWORD [8+esp],eax 228 rol edx,8 229 mov eax,DWORD [12+esp] 230 add ecx,edx 231 mov DWORD [52+esp],edx 232 xor ebx,ecx 233 add eax,ebp 234 rol ebx,7 235 xor edi,eax 236 rol edi,16 237 mov DWORD [28+esp],ebx 238 add esi,edi 239 xor ebp,esi 240 mov edx,DWORD [48+esp] 241 rol ebp,12 242 mov ebx,DWORD [128+esp] 243 add eax,ebp 244 xor edi,eax 245 mov DWORD [12+esp],eax 246 rol edi,8 247 mov eax,DWORD [esp] 248 add esi,edi 249 mov DWORD [56+esp],edi 250 xor ebp,esi 251 rol ebp,7 252 dec ebx 253 jnz NEAR L$004loop 254 mov ebx,DWORD [160+esp] 255 add eax,1634760805 256 add ebp,DWORD [80+esp] 257 add ecx,DWORD [96+esp] 258 add esi,DWORD [100+esp] 259 cmp ebx,64 260 jb NEAR L$005tail 261 mov ebx,DWORD [156+esp] 262 add edx,DWORD [112+esp] 263 add edi,DWORD [120+esp] 264 xor eax,DWORD [ebx] 265 xor ebp,DWORD [16+ebx] 266 mov DWORD [esp],eax 267 mov eax,DWORD [152+esp] 268 xor ecx,DWORD [32+ebx] 269 xor esi,DWORD [36+ebx] 270 xor edx,DWORD [48+ebx] 271 xor edi,DWORD [56+ebx] 272 mov DWORD [16+eax],ebp 273 mov DWORD [32+eax],ecx 274 mov DWORD [36+eax],esi 275 mov DWORD [48+eax],edx 276 mov DWORD [56+eax],edi 277 mov ebp,DWORD [4+esp] 278 mov ecx,DWORD [8+esp] 279 mov esi,DWORD [12+esp] 280 mov edx,DWORD [20+esp] 281 mov edi,DWORD [24+esp] 282 add ebp,857760878 283 add ecx,2036477234 284 add esi,1797285236 285 add edx,DWORD [84+esp] 286 add edi,DWORD [88+esp] 287 xor ebp,DWORD [4+ebx] 288 xor ecx,DWORD [8+ebx] 289 xor esi,DWORD [12+ebx] 290 xor edx,DWORD [20+ebx] 291 xor edi,DWORD [24+ebx] 292 mov DWORD [4+eax],ebp 293 mov DWORD [8+eax],ecx 294 mov DWORD [12+eax],esi 295 mov DWORD [20+eax],edx 296 mov DWORD [24+eax],edi 297 mov ebp,DWORD [28+esp] 298 mov ecx,DWORD [40+esp] 299 mov esi,DWORD [44+esp] 300 mov edx,DWORD [52+esp] 301 mov edi,DWORD [60+esp] 302 add ebp,DWORD [92+esp] 303 add ecx,DWORD [104+esp] 304 add esi,DWORD [108+esp] 305 add edx,DWORD [116+esp] 306 add edi,DWORD [124+esp] 307 xor ebp,DWORD [28+ebx] 308 xor ecx,DWORD [40+ebx] 309 xor esi,DWORD [44+ebx] 310 xor edx,DWORD [52+ebx] 311 xor edi,DWORD [60+ebx] 312 lea ebx,[64+ebx] 313 mov DWORD [28+eax],ebp 314 mov ebp,DWORD [esp] 315 mov DWORD [40+eax],ecx 316 mov ecx,DWORD [160+esp] 317 mov DWORD [44+eax],esi 318 mov DWORD [52+eax],edx 319 mov DWORD [60+eax],edi 320 mov DWORD [eax],ebp 321 lea eax,[64+eax] 322 sub ecx,64 323 jnz NEAR L$003outer_loop 324 jmp NEAR L$006done 325L$005tail: 326 add edx,DWORD [112+esp] 327 add edi,DWORD [120+esp] 328 mov DWORD [esp],eax 329 mov DWORD [16+esp],ebp 330 mov DWORD [32+esp],ecx 331 mov DWORD [36+esp],esi 332 mov DWORD [48+esp],edx 333 mov DWORD [56+esp],edi 334 mov ebp,DWORD [4+esp] 335 mov ecx,DWORD [8+esp] 336 mov esi,DWORD [12+esp] 337 mov edx,DWORD [20+esp] 338 mov edi,DWORD [24+esp] 339 add ebp,857760878 340 add ecx,2036477234 341 add esi,1797285236 342 add edx,DWORD [84+esp] 343 add edi,DWORD [88+esp] 344 mov DWORD [4+esp],ebp 345 mov DWORD [8+esp],ecx 346 mov DWORD [12+esp],esi 347 mov DWORD [20+esp],edx 348 mov DWORD [24+esp],edi 349 mov ebp,DWORD [28+esp] 350 mov ecx,DWORD [40+esp] 351 mov esi,DWORD [44+esp] 352 mov edx,DWORD [52+esp] 353 mov edi,DWORD [60+esp] 354 add ebp,DWORD [92+esp] 355 add ecx,DWORD [104+esp] 356 add esi,DWORD [108+esp] 357 add edx,DWORD [116+esp] 358 add edi,DWORD [124+esp] 359 mov DWORD [28+esp],ebp 360 mov ebp,DWORD [156+esp] 361 mov DWORD [40+esp],ecx 362 mov ecx,DWORD [152+esp] 363 mov DWORD [44+esp],esi 364 xor esi,esi 365 mov DWORD [52+esp],edx 366 mov DWORD [60+esp],edi 367 xor eax,eax 368 xor edx,edx 369L$007tail_loop: 370 mov al,BYTE [ebp*1+esi] 371 mov dl,BYTE [esi*1+esp] 372 lea esi,[1+esi] 373 xor al,dl 374 mov BYTE [esi*1+ecx-1],al 375 dec ebx 376 jnz NEAR L$007tail_loop 377L$006done: 378 add esp,132 379L$000no_data: 380 pop edi 381 pop esi 382 pop ebx 383 pop ebp 384 ret 385align 16 386__ChaCha20_ssse3: 387 push ebp 388 push ebx 389 push esi 390 push edi 391L$ssse3_shortcut: 392 mov edi,DWORD [20+esp] 393 mov esi,DWORD [24+esp] 394 mov ecx,DWORD [28+esp] 395 mov edx,DWORD [32+esp] 396 mov ebx,DWORD [36+esp] 397 mov ebp,esp 398 sub esp,524 399 and esp,-64 400 mov DWORD [512+esp],ebp 401 lea eax,[(L$ssse3_data-L$pic_point)+eax] 402 movdqu xmm3,[ebx] 403 cmp ecx,256 404 jb NEAR L$0081x 405 mov DWORD [516+esp],edx 406 mov DWORD [520+esp],ebx 407 sub ecx,256 408 lea ebp,[384+esp] 409 movdqu xmm7,[edx] 410 pshufd xmm0,xmm3,0 411 pshufd xmm1,xmm3,85 412 pshufd xmm2,xmm3,170 413 pshufd xmm3,xmm3,255 414 paddd xmm0,[48+eax] 415 pshufd xmm4,xmm7,0 416 pshufd xmm5,xmm7,85 417 psubd xmm0,[64+eax] 418 pshufd xmm6,xmm7,170 419 pshufd xmm7,xmm7,255 420 movdqa [64+ebp],xmm0 421 movdqa [80+ebp],xmm1 422 movdqa [96+ebp],xmm2 423 movdqa [112+ebp],xmm3 424 movdqu xmm3,[16+edx] 425 movdqa [ebp-64],xmm4 426 movdqa [ebp-48],xmm5 427 movdqa [ebp-32],xmm6 428 movdqa [ebp-16],xmm7 429 movdqa xmm7,[32+eax] 430 lea ebx,[128+esp] 431 pshufd xmm0,xmm3,0 432 pshufd xmm1,xmm3,85 433 pshufd xmm2,xmm3,170 434 pshufd xmm3,xmm3,255 435 pshufd xmm4,xmm7,0 436 pshufd xmm5,xmm7,85 437 pshufd xmm6,xmm7,170 438 pshufd xmm7,xmm7,255 439 movdqa [ebp],xmm0 440 movdqa [16+ebp],xmm1 441 movdqa [32+ebp],xmm2 442 movdqa [48+ebp],xmm3 443 movdqa [ebp-128],xmm4 444 movdqa [ebp-112],xmm5 445 movdqa [ebp-96],xmm6 446 movdqa [ebp-80],xmm7 447 lea esi,[128+esi] 448 lea edi,[128+edi] 449 jmp NEAR L$009outer_loop 450align 16 451L$009outer_loop: 452 movdqa xmm1,[ebp-112] 453 movdqa xmm2,[ebp-96] 454 movdqa xmm3,[ebp-80] 455 movdqa xmm5,[ebp-48] 456 movdqa xmm6,[ebp-32] 457 movdqa xmm7,[ebp-16] 458 movdqa [ebx-112],xmm1 459 movdqa [ebx-96],xmm2 460 movdqa [ebx-80],xmm3 461 movdqa [ebx-48],xmm5 462 movdqa [ebx-32],xmm6 463 movdqa [ebx-16],xmm7 464 movdqa xmm2,[32+ebp] 465 movdqa xmm3,[48+ebp] 466 movdqa xmm4,[64+ebp] 467 movdqa xmm5,[80+ebp] 468 movdqa xmm6,[96+ebp] 469 movdqa xmm7,[112+ebp] 470 paddd xmm4,[64+eax] 471 movdqa [32+ebx],xmm2 472 movdqa [48+ebx],xmm3 473 movdqa [64+ebx],xmm4 474 movdqa [80+ebx],xmm5 475 movdqa [96+ebx],xmm6 476 movdqa [112+ebx],xmm7 477 movdqa [64+ebp],xmm4 478 movdqa xmm0,[ebp-128] 479 movdqa xmm6,xmm4 480 movdqa xmm3,[ebp-64] 481 movdqa xmm4,[ebp] 482 movdqa xmm5,[16+ebp] 483 mov edx,10 484 nop 485align 16 486L$010loop: 487 paddd xmm0,xmm3 488 movdqa xmm2,xmm3 489 pxor xmm6,xmm0 490 pshufb xmm6,[eax] 491 paddd xmm4,xmm6 492 pxor xmm2,xmm4 493 movdqa xmm3,[ebx-48] 494 movdqa xmm1,xmm2 495 pslld xmm2,12 496 psrld xmm1,20 497 por xmm2,xmm1 498 movdqa xmm1,[ebx-112] 499 paddd xmm0,xmm2 500 movdqa xmm7,[80+ebx] 501 pxor xmm6,xmm0 502 movdqa [ebx-128],xmm0 503 pshufb xmm6,[16+eax] 504 paddd xmm4,xmm6 505 movdqa [64+ebx],xmm6 506 pxor xmm2,xmm4 507 paddd xmm1,xmm3 508 movdqa xmm0,xmm2 509 pslld xmm2,7 510 psrld xmm0,25 511 pxor xmm7,xmm1 512 por xmm2,xmm0 513 movdqa [ebx],xmm4 514 pshufb xmm7,[eax] 515 movdqa [ebx-64],xmm2 516 paddd xmm5,xmm7 517 movdqa xmm4,[32+ebx] 518 pxor xmm3,xmm5 519 movdqa xmm2,[ebx-32] 520 movdqa xmm0,xmm3 521 pslld xmm3,12 522 psrld xmm0,20 523 por xmm3,xmm0 524 movdqa xmm0,[ebx-96] 525 paddd xmm1,xmm3 526 movdqa xmm6,[96+ebx] 527 pxor xmm7,xmm1 528 movdqa [ebx-112],xmm1 529 pshufb xmm7,[16+eax] 530 paddd xmm5,xmm7 531 movdqa [80+ebx],xmm7 532 pxor xmm3,xmm5 533 paddd xmm0,xmm2 534 movdqa xmm1,xmm3 535 pslld xmm3,7 536 psrld xmm1,25 537 pxor xmm6,xmm0 538 por xmm3,xmm1 539 movdqa [16+ebx],xmm5 540 pshufb xmm6,[eax] 541 movdqa [ebx-48],xmm3 542 paddd xmm4,xmm6 543 movdqa xmm5,[48+ebx] 544 pxor xmm2,xmm4 545 movdqa xmm3,[ebx-16] 546 movdqa xmm1,xmm2 547 pslld xmm2,12 548 psrld xmm1,20 549 por xmm2,xmm1 550 movdqa xmm1,[ebx-80] 551 paddd xmm0,xmm2 552 movdqa xmm7,[112+ebx] 553 pxor xmm6,xmm0 554 movdqa [ebx-96],xmm0 555 pshufb xmm6,[16+eax] 556 paddd xmm4,xmm6 557 movdqa [96+ebx],xmm6 558 pxor xmm2,xmm4 559 paddd xmm1,xmm3 560 movdqa xmm0,xmm2 561 pslld xmm2,7 562 psrld xmm0,25 563 pxor xmm7,xmm1 564 por xmm2,xmm0 565 pshufb xmm7,[eax] 566 movdqa [ebx-32],xmm2 567 paddd xmm5,xmm7 568 pxor xmm3,xmm5 569 movdqa xmm2,[ebx-48] 570 movdqa xmm0,xmm3 571 pslld xmm3,12 572 psrld xmm0,20 573 por xmm3,xmm0 574 movdqa xmm0,[ebx-128] 575 paddd xmm1,xmm3 576 pxor xmm7,xmm1 577 movdqa [ebx-80],xmm1 578 pshufb xmm7,[16+eax] 579 paddd xmm5,xmm7 580 movdqa xmm6,xmm7 581 pxor xmm3,xmm5 582 paddd xmm0,xmm2 583 movdqa xmm1,xmm3 584 pslld xmm3,7 585 psrld xmm1,25 586 pxor xmm6,xmm0 587 por xmm3,xmm1 588 pshufb xmm6,[eax] 589 movdqa [ebx-16],xmm3 590 paddd xmm4,xmm6 591 pxor xmm2,xmm4 592 movdqa xmm3,[ebx-32] 593 movdqa xmm1,xmm2 594 pslld xmm2,12 595 psrld xmm1,20 596 por xmm2,xmm1 597 movdqa xmm1,[ebx-112] 598 paddd xmm0,xmm2 599 movdqa xmm7,[64+ebx] 600 pxor xmm6,xmm0 601 movdqa [ebx-128],xmm0 602 pshufb xmm6,[16+eax] 603 paddd xmm4,xmm6 604 movdqa [112+ebx],xmm6 605 pxor xmm2,xmm4 606 paddd xmm1,xmm3 607 movdqa xmm0,xmm2 608 pslld xmm2,7 609 psrld xmm0,25 610 pxor xmm7,xmm1 611 por xmm2,xmm0 612 movdqa [32+ebx],xmm4 613 pshufb xmm7,[eax] 614 movdqa [ebx-48],xmm2 615 paddd xmm5,xmm7 616 movdqa xmm4,[ebx] 617 pxor xmm3,xmm5 618 movdqa xmm2,[ebx-16] 619 movdqa xmm0,xmm3 620 pslld xmm3,12 621 psrld xmm0,20 622 por xmm3,xmm0 623 movdqa xmm0,[ebx-96] 624 paddd xmm1,xmm3 625 movdqa xmm6,[80+ebx] 626 pxor xmm7,xmm1 627 movdqa [ebx-112],xmm1 628 pshufb xmm7,[16+eax] 629 paddd xmm5,xmm7 630 movdqa [64+ebx],xmm7 631 pxor xmm3,xmm5 632 paddd xmm0,xmm2 633 movdqa xmm1,xmm3 634 pslld xmm3,7 635 psrld xmm1,25 636 pxor xmm6,xmm0 637 por xmm3,xmm1 638 movdqa [48+ebx],xmm5 639 pshufb xmm6,[eax] 640 movdqa [ebx-32],xmm3 641 paddd xmm4,xmm6 642 movdqa xmm5,[16+ebx] 643 pxor xmm2,xmm4 644 movdqa xmm3,[ebx-64] 645 movdqa xmm1,xmm2 646 pslld xmm2,12 647 psrld xmm1,20 648 por xmm2,xmm1 649 movdqa xmm1,[ebx-80] 650 paddd xmm0,xmm2 651 movdqa xmm7,[96+ebx] 652 pxor xmm6,xmm0 653 movdqa [ebx-96],xmm0 654 pshufb xmm6,[16+eax] 655 paddd xmm4,xmm6 656 movdqa [80+ebx],xmm6 657 pxor xmm2,xmm4 658 paddd xmm1,xmm3 659 movdqa xmm0,xmm2 660 pslld xmm2,7 661 psrld xmm0,25 662 pxor xmm7,xmm1 663 por xmm2,xmm0 664 pshufb xmm7,[eax] 665 movdqa [ebx-16],xmm2 666 paddd xmm5,xmm7 667 pxor xmm3,xmm5 668 movdqa xmm0,xmm3 669 pslld xmm3,12 670 psrld xmm0,20 671 por xmm3,xmm0 672 movdqa xmm0,[ebx-128] 673 paddd xmm1,xmm3 674 movdqa xmm6,[64+ebx] 675 pxor xmm7,xmm1 676 movdqa [ebx-80],xmm1 677 pshufb xmm7,[16+eax] 678 paddd xmm5,xmm7 679 movdqa [96+ebx],xmm7 680 pxor xmm3,xmm5 681 movdqa xmm1,xmm3 682 pslld xmm3,7 683 psrld xmm1,25 684 por xmm3,xmm1 685 dec edx 686 jnz NEAR L$010loop 687 movdqa [ebx-64],xmm3 688 movdqa [ebx],xmm4 689 movdqa [16+ebx],xmm5 690 movdqa [64+ebx],xmm6 691 movdqa [96+ebx],xmm7 692 movdqa xmm1,[ebx-112] 693 movdqa xmm2,[ebx-96] 694 movdqa xmm3,[ebx-80] 695 paddd xmm0,[ebp-128] 696 paddd xmm1,[ebp-112] 697 paddd xmm2,[ebp-96] 698 paddd xmm3,[ebp-80] 699 movdqa xmm6,xmm0 700 punpckldq xmm0,xmm1 701 movdqa xmm7,xmm2 702 punpckldq xmm2,xmm3 703 punpckhdq xmm6,xmm1 704 punpckhdq xmm7,xmm3 705 movdqa xmm1,xmm0 706 punpcklqdq xmm0,xmm2 707 movdqa xmm3,xmm6 708 punpcklqdq xmm6,xmm7 709 punpckhqdq xmm1,xmm2 710 punpckhqdq xmm3,xmm7 711 movdqu xmm4,[esi-128] 712 movdqu xmm5,[esi-64] 713 movdqu xmm2,[esi] 714 movdqu xmm7,[64+esi] 715 lea esi,[16+esi] 716 pxor xmm4,xmm0 717 movdqa xmm0,[ebx-64] 718 pxor xmm5,xmm1 719 movdqa xmm1,[ebx-48] 720 pxor xmm6,xmm2 721 movdqa xmm2,[ebx-32] 722 pxor xmm7,xmm3 723 movdqa xmm3,[ebx-16] 724 movdqu [edi-128],xmm4 725 movdqu [edi-64],xmm5 726 movdqu [edi],xmm6 727 movdqu [64+edi],xmm7 728 lea edi,[16+edi] 729 paddd xmm0,[ebp-64] 730 paddd xmm1,[ebp-48] 731 paddd xmm2,[ebp-32] 732 paddd xmm3,[ebp-16] 733 movdqa xmm6,xmm0 734 punpckldq xmm0,xmm1 735 movdqa xmm7,xmm2 736 punpckldq xmm2,xmm3 737 punpckhdq xmm6,xmm1 738 punpckhdq xmm7,xmm3 739 movdqa xmm1,xmm0 740 punpcklqdq xmm0,xmm2 741 movdqa xmm3,xmm6 742 punpcklqdq xmm6,xmm7 743 punpckhqdq xmm1,xmm2 744 punpckhqdq xmm3,xmm7 745 movdqu xmm4,[esi-128] 746 movdqu xmm5,[esi-64] 747 movdqu xmm2,[esi] 748 movdqu xmm7,[64+esi] 749 lea esi,[16+esi] 750 pxor xmm4,xmm0 751 movdqa xmm0,[ebx] 752 pxor xmm5,xmm1 753 movdqa xmm1,[16+ebx] 754 pxor xmm6,xmm2 755 movdqa xmm2,[32+ebx] 756 pxor xmm7,xmm3 757 movdqa xmm3,[48+ebx] 758 movdqu [edi-128],xmm4 759 movdqu [edi-64],xmm5 760 movdqu [edi],xmm6 761 movdqu [64+edi],xmm7 762 lea edi,[16+edi] 763 paddd xmm0,[ebp] 764 paddd xmm1,[16+ebp] 765 paddd xmm2,[32+ebp] 766 paddd xmm3,[48+ebp] 767 movdqa xmm6,xmm0 768 punpckldq xmm0,xmm1 769 movdqa xmm7,xmm2 770 punpckldq xmm2,xmm3 771 punpckhdq xmm6,xmm1 772 punpckhdq xmm7,xmm3 773 movdqa xmm1,xmm0 774 punpcklqdq xmm0,xmm2 775 movdqa xmm3,xmm6 776 punpcklqdq xmm6,xmm7 777 punpckhqdq xmm1,xmm2 778 punpckhqdq xmm3,xmm7 779 movdqu xmm4,[esi-128] 780 movdqu xmm5,[esi-64] 781 movdqu xmm2,[esi] 782 movdqu xmm7,[64+esi] 783 lea esi,[16+esi] 784 pxor xmm4,xmm0 785 movdqa xmm0,[64+ebx] 786 pxor xmm5,xmm1 787 movdqa xmm1,[80+ebx] 788 pxor xmm6,xmm2 789 movdqa xmm2,[96+ebx] 790 pxor xmm7,xmm3 791 movdqa xmm3,[112+ebx] 792 movdqu [edi-128],xmm4 793 movdqu [edi-64],xmm5 794 movdqu [edi],xmm6 795 movdqu [64+edi],xmm7 796 lea edi,[16+edi] 797 paddd xmm0,[64+ebp] 798 paddd xmm1,[80+ebp] 799 paddd xmm2,[96+ebp] 800 paddd xmm3,[112+ebp] 801 movdqa xmm6,xmm0 802 punpckldq xmm0,xmm1 803 movdqa xmm7,xmm2 804 punpckldq xmm2,xmm3 805 punpckhdq xmm6,xmm1 806 punpckhdq xmm7,xmm3 807 movdqa xmm1,xmm0 808 punpcklqdq xmm0,xmm2 809 movdqa xmm3,xmm6 810 punpcklqdq xmm6,xmm7 811 punpckhqdq xmm1,xmm2 812 punpckhqdq xmm3,xmm7 813 movdqu xmm4,[esi-128] 814 movdqu xmm5,[esi-64] 815 movdqu xmm2,[esi] 816 movdqu xmm7,[64+esi] 817 lea esi,[208+esi] 818 pxor xmm4,xmm0 819 pxor xmm5,xmm1 820 pxor xmm6,xmm2 821 pxor xmm7,xmm3 822 movdqu [edi-128],xmm4 823 movdqu [edi-64],xmm5 824 movdqu [edi],xmm6 825 movdqu [64+edi],xmm7 826 lea edi,[208+edi] 827 sub ecx,256 828 jnc NEAR L$009outer_loop 829 add ecx,256 830 jz NEAR L$011done 831 mov ebx,DWORD [520+esp] 832 lea esi,[esi-128] 833 mov edx,DWORD [516+esp] 834 lea edi,[edi-128] 835 movd xmm2,DWORD [64+ebp] 836 movdqu xmm3,[ebx] 837 paddd xmm2,[96+eax] 838 pand xmm3,[112+eax] 839 por xmm3,xmm2 840L$0081x: 841 movdqa xmm0,[32+eax] 842 movdqu xmm1,[edx] 843 movdqu xmm2,[16+edx] 844 movdqa xmm6,[eax] 845 movdqa xmm7,[16+eax] 846 mov DWORD [48+esp],ebp 847 movdqa [esp],xmm0 848 movdqa [16+esp],xmm1 849 movdqa [32+esp],xmm2 850 movdqa [48+esp],xmm3 851 mov edx,10 852 jmp NEAR L$012loop1x 853align 16 854L$013outer1x: 855 movdqa xmm3,[80+eax] 856 movdqa xmm0,[esp] 857 movdqa xmm1,[16+esp] 858 movdqa xmm2,[32+esp] 859 paddd xmm3,[48+esp] 860 mov edx,10 861 movdqa [48+esp],xmm3 862 jmp NEAR L$012loop1x 863align 16 864L$012loop1x: 865 paddd xmm0,xmm1 866 pxor xmm3,xmm0 867db 102,15,56,0,222 868 paddd xmm2,xmm3 869 pxor xmm1,xmm2 870 movdqa xmm4,xmm1 871 psrld xmm1,20 872 pslld xmm4,12 873 por xmm1,xmm4 874 paddd xmm0,xmm1 875 pxor xmm3,xmm0 876db 102,15,56,0,223 877 paddd xmm2,xmm3 878 pxor xmm1,xmm2 879 movdqa xmm4,xmm1 880 psrld xmm1,25 881 pslld xmm4,7 882 por xmm1,xmm4 883 pshufd xmm2,xmm2,78 884 pshufd xmm1,xmm1,57 885 pshufd xmm3,xmm3,147 886 nop 887 paddd xmm0,xmm1 888 pxor xmm3,xmm0 889db 102,15,56,0,222 890 paddd xmm2,xmm3 891 pxor xmm1,xmm2 892 movdqa xmm4,xmm1 893 psrld xmm1,20 894 pslld xmm4,12 895 por xmm1,xmm4 896 paddd xmm0,xmm1 897 pxor xmm3,xmm0 898db 102,15,56,0,223 899 paddd xmm2,xmm3 900 pxor xmm1,xmm2 901 movdqa xmm4,xmm1 902 psrld xmm1,25 903 pslld xmm4,7 904 por xmm1,xmm4 905 pshufd xmm2,xmm2,78 906 pshufd xmm1,xmm1,147 907 pshufd xmm3,xmm3,57 908 dec edx 909 jnz NEAR L$012loop1x 910 paddd xmm0,[esp] 911 paddd xmm1,[16+esp] 912 paddd xmm2,[32+esp] 913 paddd xmm3,[48+esp] 914 cmp ecx,64 915 jb NEAR L$014tail 916 movdqu xmm4,[esi] 917 movdqu xmm5,[16+esi] 918 pxor xmm0,xmm4 919 movdqu xmm4,[32+esi] 920 pxor xmm1,xmm5 921 movdqu xmm5,[48+esi] 922 pxor xmm2,xmm4 923 pxor xmm3,xmm5 924 lea esi,[64+esi] 925 movdqu [edi],xmm0 926 movdqu [16+edi],xmm1 927 movdqu [32+edi],xmm2 928 movdqu [48+edi],xmm3 929 lea edi,[64+edi] 930 sub ecx,64 931 jnz NEAR L$013outer1x 932 jmp NEAR L$011done 933L$014tail: 934 movdqa [esp],xmm0 935 movdqa [16+esp],xmm1 936 movdqa [32+esp],xmm2 937 movdqa [48+esp],xmm3 938 xor eax,eax 939 xor edx,edx 940 xor ebp,ebp 941L$015tail_loop: 942 mov al,BYTE [ebp*1+esp] 943 mov dl,BYTE [ebp*1+esi] 944 lea ebp,[1+ebp] 945 xor al,dl 946 mov BYTE [ebp*1+edi-1],al 947 dec ecx 948 jnz NEAR L$015tail_loop 949L$011done: 950 mov esp,DWORD [512+esp] 951 pop edi 952 pop esi 953 pop ebx 954 pop ebp 955 ret 956align 64 957L$ssse3_data: 958db 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 959db 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 960dd 1634760805,857760878,2036477234,1797285236 961dd 0,1,2,3 962dd 4,4,4,4 963dd 1,0,0,0 964dd 4,0,0,0 965dd 0,-1,-1,-1 966align 64 967db 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 968db 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 969db 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 970db 114,103,62,0 971segment .bss 972common _OPENSSL_ia32cap_P 16 973%else 974; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 975ret 976%endif 977