1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4%ifidn __OUTPUT_FORMAT__, win64 5default rel 6%define XMMWORD 7%define YMMWORD 8%define ZMMWORD 9%define _CET_ENDBR 10 11%include "ring_core_generated/prefix_symbols_nasm.inc" 12section .text code align=64 13 14 15EXTERN OPENSSL_ia32cap_P 16 17global bn_mul_mont 18 19ALIGN 16 20bn_mul_mont: 21 mov QWORD[8+rsp],rdi ;WIN64 prologue 22 mov QWORD[16+rsp],rsi 23 mov rax,rsp 24$L$SEH_begin_bn_mul_mont: 25 mov rdi,rcx 26 mov rsi,rdx 27 mov rdx,r8 28 mov rcx,r9 29 mov r8,QWORD[40+rsp] 30 mov r9,QWORD[48+rsp] 31 32 33 34_CET_ENDBR 35 mov r9d,r9d 36 mov rax,rsp 37 38 test r9d,3 39 jnz NEAR $L$mul_enter 40 cmp r9d,8 41 jb NEAR $L$mul_enter 42 lea r11,[OPENSSL_ia32cap_P] 43 mov r11d,DWORD[8+r11] 44 cmp rdx,rsi 45 jne NEAR $L$mul4x_enter 46 test r9d,7 47 jz NEAR $L$sqr8x_enter 48 jmp NEAR $L$mul4x_enter 49 50ALIGN 16 51$L$mul_enter: 52 push rbx 53 54 push rbp 55 56 push r12 57 58 push r13 59 60 push r14 61 62 push r15 63 64 65 neg r9 66 mov r11,rsp 67 lea r10,[((-16))+r9*8+rsp] 68 neg r9 69 and r10,-1024 70 71 72 73 74 75 76 77 78 79 sub r11,r10 80 and r11,-4096 81 lea rsp,[r11*1+r10] 82 mov r11,QWORD[rsp] 83 cmp rsp,r10 84 ja NEAR $L$mul_page_walk 85 jmp NEAR $L$mul_page_walk_done 86 87ALIGN 16 88$L$mul_page_walk: 89 lea rsp,[((-4096))+rsp] 90 mov r11,QWORD[rsp] 91 cmp rsp,r10 92 ja NEAR $L$mul_page_walk 93$L$mul_page_walk_done: 94 95 mov QWORD[8+r9*8+rsp],rax 96 97$L$mul_body: 98 mov r12,rdx 99 mov r8,QWORD[r8] 100 mov rbx,QWORD[r12] 101 mov rax,QWORD[rsi] 102 103 xor r14,r14 104 xor r15,r15 105 106 mov rbp,r8 107 mul rbx 108 mov r10,rax 109 mov rax,QWORD[rcx] 110 111 imul rbp,r10 112 mov r11,rdx 113 114 mul rbp 115 add r10,rax 116 mov rax,QWORD[8+rsi] 117 adc rdx,0 118 mov r13,rdx 119 120 lea r15,[1+r15] 121 jmp NEAR $L$1st_enter 122 123ALIGN 16 124$L$1st: 125 add r13,rax 126 mov rax,QWORD[r15*8+rsi] 127 adc rdx,0 128 add r13,r11 129 mov r11,r10 130 adc rdx,0 131 mov QWORD[((-16))+r15*8+rsp],r13 132 mov r13,rdx 133 134$L$1st_enter: 135 mul rbx 136 add r11,rax 137 mov rax,QWORD[r15*8+rcx] 138 adc rdx,0 139 lea r15,[1+r15] 140 mov r10,rdx 141 142 mul rbp 143 cmp r15,r9 144 jne NEAR $L$1st 145 146 add r13,rax 147 mov rax,QWORD[rsi] 148 adc rdx,0 149 add r13,r11 150 adc rdx,0 151 mov QWORD[((-16))+r15*8+rsp],r13 152 mov r13,rdx 153 mov r11,r10 154 155 xor rdx,rdx 156 add r13,r11 157 adc rdx,0 158 mov QWORD[((-8))+r9*8+rsp],r13 159 mov QWORD[r9*8+rsp],rdx 160 161 lea r14,[1+r14] 162 jmp NEAR $L$outer 163ALIGN 16 164$L$outer: 165 mov rbx,QWORD[r14*8+r12] 166 xor r15,r15 167 mov rbp,r8 168 mov r10,QWORD[rsp] 169 mul rbx 170 add r10,rax 171 mov rax,QWORD[rcx] 172 adc rdx,0 173 174 imul rbp,r10 175 mov r11,rdx 176 177 mul rbp 178 add r10,rax 179 mov rax,QWORD[8+rsi] 180 adc rdx,0 181 mov r10,QWORD[8+rsp] 182 mov r13,rdx 183 184 lea r15,[1+r15] 185 jmp NEAR $L$inner_enter 186 187ALIGN 16 188$L$inner: 189 add r13,rax 190 mov rax,QWORD[r15*8+rsi] 191 adc rdx,0 192 add r13,r10 193 mov r10,QWORD[r15*8+rsp] 194 adc rdx,0 195 mov QWORD[((-16))+r15*8+rsp],r13 196 mov r13,rdx 197 198$L$inner_enter: 199 mul rbx 200 add r11,rax 201 mov rax,QWORD[r15*8+rcx] 202 adc rdx,0 203 add r10,r11 204 mov r11,rdx 205 adc r11,0 206 lea r15,[1+r15] 207 208 mul rbp 209 cmp r15,r9 210 jne NEAR $L$inner 211 212 add r13,rax 213 mov rax,QWORD[rsi] 214 adc rdx,0 215 add r13,r10 216 mov r10,QWORD[r15*8+rsp] 217 adc rdx,0 218 mov QWORD[((-16))+r15*8+rsp],r13 219 mov r13,rdx 220 221 xor rdx,rdx 222 add r13,r11 223 adc rdx,0 224 add r13,r10 225 adc rdx,0 226 mov QWORD[((-8))+r9*8+rsp],r13 227 mov QWORD[r9*8+rsp],rdx 228 229 lea r14,[1+r14] 230 cmp r14,r9 231 jb NEAR $L$outer 232 233 xor r14,r14 234 mov rax,QWORD[rsp] 235 mov r15,r9 236 237ALIGN 16 238$L$sub: sbb rax,QWORD[r14*8+rcx] 239 mov QWORD[r14*8+rdi],rax 240 mov rax,QWORD[8+r14*8+rsp] 241 lea r14,[1+r14] 242 dec r15 243 jnz NEAR $L$sub 244 245 sbb rax,0 246 mov rbx,-1 247 xor rbx,rax 248 xor r14,r14 249 mov r15,r9 250 251$L$copy: 252 mov rcx,QWORD[r14*8+rdi] 253 mov rdx,QWORD[r14*8+rsp] 254 and rcx,rbx 255 and rdx,rax 256 mov QWORD[r14*8+rsp],r9 257 or rdx,rcx 258 mov QWORD[r14*8+rdi],rdx 259 lea r14,[1+r14] 260 sub r15,1 261 jnz NEAR $L$copy 262 263 mov rsi,QWORD[8+r9*8+rsp] 264 265 mov rax,1 266 mov r15,QWORD[((-48))+rsi] 267 268 mov r14,QWORD[((-40))+rsi] 269 270 mov r13,QWORD[((-32))+rsi] 271 272 mov r12,QWORD[((-24))+rsi] 273 274 mov rbp,QWORD[((-16))+rsi] 275 276 mov rbx,QWORD[((-8))+rsi] 277 278 lea rsp,[rsi] 279 280$L$mul_epilogue: 281 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 282 mov rsi,QWORD[16+rsp] 283 ret 284 285$L$SEH_end_bn_mul_mont: 286 287ALIGN 16 288bn_mul4x_mont: 289 mov QWORD[8+rsp],rdi ;WIN64 prologue 290 mov QWORD[16+rsp],rsi 291 mov rax,rsp 292$L$SEH_begin_bn_mul4x_mont: 293 mov rdi,rcx 294 mov rsi,rdx 295 mov rdx,r8 296 mov rcx,r9 297 mov r8,QWORD[40+rsp] 298 mov r9,QWORD[48+rsp] 299 300 301 302 mov r9d,r9d 303 mov rax,rsp 304 305$L$mul4x_enter: 306 and r11d,0x80100 307 cmp r11d,0x80100 308 je NEAR $L$mulx4x_enter 309 push rbx 310 311 push rbp 312 313 push r12 314 315 push r13 316 317 push r14 318 319 push r15 320 321 322 neg r9 323 mov r11,rsp 324 lea r10,[((-32))+r9*8+rsp] 325 neg r9 326 and r10,-1024 327 328 sub r11,r10 329 and r11,-4096 330 lea rsp,[r11*1+r10] 331 mov r11,QWORD[rsp] 332 cmp rsp,r10 333 ja NEAR $L$mul4x_page_walk 334 jmp NEAR $L$mul4x_page_walk_done 335 336$L$mul4x_page_walk: 337 lea rsp,[((-4096))+rsp] 338 mov r11,QWORD[rsp] 339 cmp rsp,r10 340 ja NEAR $L$mul4x_page_walk 341$L$mul4x_page_walk_done: 342 343 mov QWORD[8+r9*8+rsp],rax 344 345$L$mul4x_body: 346 mov QWORD[16+r9*8+rsp],rdi 347 mov r12,rdx 348 mov r8,QWORD[r8] 349 mov rbx,QWORD[r12] 350 mov rax,QWORD[rsi] 351 352 xor r14,r14 353 xor r15,r15 354 355 mov rbp,r8 356 mul rbx 357 mov r10,rax 358 mov rax,QWORD[rcx] 359 360 imul rbp,r10 361 mov r11,rdx 362 363 mul rbp 364 add r10,rax 365 mov rax,QWORD[8+rsi] 366 adc rdx,0 367 mov rdi,rdx 368 369 mul rbx 370 add r11,rax 371 mov rax,QWORD[8+rcx] 372 adc rdx,0 373 mov r10,rdx 374 375 mul rbp 376 add rdi,rax 377 mov rax,QWORD[16+rsi] 378 adc rdx,0 379 add rdi,r11 380 lea r15,[4+r15] 381 adc rdx,0 382 mov QWORD[rsp],rdi 383 mov r13,rdx 384 jmp NEAR $L$1st4x 385ALIGN 16 386$L$1st4x: 387 mul rbx 388 add r10,rax 389 mov rax,QWORD[((-16))+r15*8+rcx] 390 adc rdx,0 391 mov r11,rdx 392 393 mul rbp 394 add r13,rax 395 mov rax,QWORD[((-8))+r15*8+rsi] 396 adc rdx,0 397 add r13,r10 398 adc rdx,0 399 mov QWORD[((-24))+r15*8+rsp],r13 400 mov rdi,rdx 401 402 mul rbx 403 add r11,rax 404 mov rax,QWORD[((-8))+r15*8+rcx] 405 adc rdx,0 406 mov r10,rdx 407 408 mul rbp 409 add rdi,rax 410 mov rax,QWORD[r15*8+rsi] 411 adc rdx,0 412 add rdi,r11 413 adc rdx,0 414 mov QWORD[((-16))+r15*8+rsp],rdi 415 mov r13,rdx 416 417 mul rbx 418 add r10,rax 419 mov rax,QWORD[r15*8+rcx] 420 adc rdx,0 421 mov r11,rdx 422 423 mul rbp 424 add r13,rax 425 mov rax,QWORD[8+r15*8+rsi] 426 adc rdx,0 427 add r13,r10 428 adc rdx,0 429 mov QWORD[((-8))+r15*8+rsp],r13 430 mov rdi,rdx 431 432 mul rbx 433 add r11,rax 434 mov rax,QWORD[8+r15*8+rcx] 435 adc rdx,0 436 lea r15,[4+r15] 437 mov r10,rdx 438 439 mul rbp 440 add rdi,rax 441 mov rax,QWORD[((-16))+r15*8+rsi] 442 adc rdx,0 443 add rdi,r11 444 adc rdx,0 445 mov QWORD[((-32))+r15*8+rsp],rdi 446 mov r13,rdx 447 cmp r15,r9 448 jb NEAR $L$1st4x 449 450 mul rbx 451 add r10,rax 452 mov rax,QWORD[((-16))+r15*8+rcx] 453 adc rdx,0 454 mov r11,rdx 455 456 mul rbp 457 add r13,rax 458 mov rax,QWORD[((-8))+r15*8+rsi] 459 adc rdx,0 460 add r13,r10 461 adc rdx,0 462 mov QWORD[((-24))+r15*8+rsp],r13 463 mov rdi,rdx 464 465 mul rbx 466 add r11,rax 467 mov rax,QWORD[((-8))+r15*8+rcx] 468 adc rdx,0 469 mov r10,rdx 470 471 mul rbp 472 add rdi,rax 473 mov rax,QWORD[rsi] 474 adc rdx,0 475 add rdi,r11 476 adc rdx,0 477 mov QWORD[((-16))+r15*8+rsp],rdi 478 mov r13,rdx 479 480 xor rdi,rdi 481 add r13,r10 482 adc rdi,0 483 mov QWORD[((-8))+r15*8+rsp],r13 484 mov QWORD[r15*8+rsp],rdi 485 486 lea r14,[1+r14] 487ALIGN 4 488$L$outer4x: 489 mov rbx,QWORD[r14*8+r12] 490 xor r15,r15 491 mov r10,QWORD[rsp] 492 mov rbp,r8 493 mul rbx 494 add r10,rax 495 mov rax,QWORD[rcx] 496 adc rdx,0 497 498 imul rbp,r10 499 mov r11,rdx 500 501 mul rbp 502 add r10,rax 503 mov rax,QWORD[8+rsi] 504 adc rdx,0 505 mov rdi,rdx 506 507 mul rbx 508 add r11,rax 509 mov rax,QWORD[8+rcx] 510 adc rdx,0 511 add r11,QWORD[8+rsp] 512 adc rdx,0 513 mov r10,rdx 514 515 mul rbp 516 add rdi,rax 517 mov rax,QWORD[16+rsi] 518 adc rdx,0 519 add rdi,r11 520 lea r15,[4+r15] 521 adc rdx,0 522 mov QWORD[rsp],rdi 523 mov r13,rdx 524 jmp NEAR $L$inner4x 525ALIGN 16 526$L$inner4x: 527 mul rbx 528 add r10,rax 529 mov rax,QWORD[((-16))+r15*8+rcx] 530 adc rdx,0 531 add r10,QWORD[((-16))+r15*8+rsp] 532 adc rdx,0 533 mov r11,rdx 534 535 mul rbp 536 add r13,rax 537 mov rax,QWORD[((-8))+r15*8+rsi] 538 adc rdx,0 539 add r13,r10 540 adc rdx,0 541 mov QWORD[((-24))+r15*8+rsp],r13 542 mov rdi,rdx 543 544 mul rbx 545 add r11,rax 546 mov rax,QWORD[((-8))+r15*8+rcx] 547 adc rdx,0 548 add r11,QWORD[((-8))+r15*8+rsp] 549 adc rdx,0 550 mov r10,rdx 551 552 mul rbp 553 add rdi,rax 554 mov rax,QWORD[r15*8+rsi] 555 adc rdx,0 556 add rdi,r11 557 adc rdx,0 558 mov QWORD[((-16))+r15*8+rsp],rdi 559 mov r13,rdx 560 561 mul rbx 562 add r10,rax 563 mov rax,QWORD[r15*8+rcx] 564 adc rdx,0 565 add r10,QWORD[r15*8+rsp] 566 adc rdx,0 567 mov r11,rdx 568 569 mul rbp 570 add r13,rax 571 mov rax,QWORD[8+r15*8+rsi] 572 adc rdx,0 573 add r13,r10 574 adc rdx,0 575 mov QWORD[((-8))+r15*8+rsp],r13 576 mov rdi,rdx 577 578 mul rbx 579 add r11,rax 580 mov rax,QWORD[8+r15*8+rcx] 581 adc rdx,0 582 add r11,QWORD[8+r15*8+rsp] 583 adc rdx,0 584 lea r15,[4+r15] 585 mov r10,rdx 586 587 mul rbp 588 add rdi,rax 589 mov rax,QWORD[((-16))+r15*8+rsi] 590 adc rdx,0 591 add rdi,r11 592 adc rdx,0 593 mov QWORD[((-32))+r15*8+rsp],rdi 594 mov r13,rdx 595 cmp r15,r9 596 jb NEAR $L$inner4x 597 598 mul rbx 599 add r10,rax 600 mov rax,QWORD[((-16))+r15*8+rcx] 601 adc rdx,0 602 add r10,QWORD[((-16))+r15*8+rsp] 603 adc rdx,0 604 mov r11,rdx 605 606 mul rbp 607 add r13,rax 608 mov rax,QWORD[((-8))+r15*8+rsi] 609 adc rdx,0 610 add r13,r10 611 adc rdx,0 612 mov QWORD[((-24))+r15*8+rsp],r13 613 mov rdi,rdx 614 615 mul rbx 616 add r11,rax 617 mov rax,QWORD[((-8))+r15*8+rcx] 618 adc rdx,0 619 add r11,QWORD[((-8))+r15*8+rsp] 620 adc rdx,0 621 lea r14,[1+r14] 622 mov r10,rdx 623 624 mul rbp 625 add rdi,rax 626 mov rax,QWORD[rsi] 627 adc rdx,0 628 add rdi,r11 629 adc rdx,0 630 mov QWORD[((-16))+r15*8+rsp],rdi 631 mov r13,rdx 632 633 xor rdi,rdi 634 add r13,r10 635 adc rdi,0 636 add r13,QWORD[r9*8+rsp] 637 adc rdi,0 638 mov QWORD[((-8))+r15*8+rsp],r13 639 mov QWORD[r15*8+rsp],rdi 640 641 cmp r14,r9 642 jb NEAR $L$outer4x 643 mov rdi,QWORD[16+r9*8+rsp] 644 lea r15,[((-4))+r9] 645 mov rax,QWORD[rsp] 646 mov rdx,QWORD[8+rsp] 647 shr r15,2 648 lea rsi,[rsp] 649 xor r14,r14 650 651 sub rax,QWORD[rcx] 652 mov rbx,QWORD[16+rsi] 653 mov rbp,QWORD[24+rsi] 654 sbb rdx,QWORD[8+rcx] 655 656$L$sub4x: 657 mov QWORD[r14*8+rdi],rax 658 mov QWORD[8+r14*8+rdi],rdx 659 sbb rbx,QWORD[16+r14*8+rcx] 660 mov rax,QWORD[32+r14*8+rsi] 661 mov rdx,QWORD[40+r14*8+rsi] 662 sbb rbp,QWORD[24+r14*8+rcx] 663 mov QWORD[16+r14*8+rdi],rbx 664 mov QWORD[24+r14*8+rdi],rbp 665 sbb rax,QWORD[32+r14*8+rcx] 666 mov rbx,QWORD[48+r14*8+rsi] 667 mov rbp,QWORD[56+r14*8+rsi] 668 sbb rdx,QWORD[40+r14*8+rcx] 669 lea r14,[4+r14] 670 dec r15 671 jnz NEAR $L$sub4x 672 673 mov QWORD[r14*8+rdi],rax 674 mov rax,QWORD[32+r14*8+rsi] 675 sbb rbx,QWORD[16+r14*8+rcx] 676 mov QWORD[8+r14*8+rdi],rdx 677 sbb rbp,QWORD[24+r14*8+rcx] 678 mov QWORD[16+r14*8+rdi],rbx 679 680 sbb rax,0 681 mov QWORD[24+r14*8+rdi],rbp 682 pxor xmm0,xmm0 683DB 102,72,15,110,224 684 pcmpeqd xmm5,xmm5 685 pshufd xmm4,xmm4,0 686 mov r15,r9 687 pxor xmm5,xmm4 688 shr r15,2 689 xor eax,eax 690 691 jmp NEAR $L$copy4x 692ALIGN 16 693$L$copy4x: 694 movdqa xmm1,XMMWORD[rax*1+rsp] 695 movdqu xmm2,XMMWORD[rax*1+rdi] 696 pand xmm1,xmm4 697 pand xmm2,xmm5 698 movdqa xmm3,XMMWORD[16+rax*1+rsp] 699 movdqa XMMWORD[rax*1+rsp],xmm0 700 por xmm1,xmm2 701 movdqu xmm2,XMMWORD[16+rax*1+rdi] 702 movdqu XMMWORD[rax*1+rdi],xmm1 703 pand xmm3,xmm4 704 pand xmm2,xmm5 705 movdqa XMMWORD[16+rax*1+rsp],xmm0 706 por xmm3,xmm2 707 movdqu XMMWORD[16+rax*1+rdi],xmm3 708 lea rax,[32+rax] 709 dec r15 710 jnz NEAR $L$copy4x 711 mov rsi,QWORD[8+r9*8+rsp] 712 713 mov rax,1 714 mov r15,QWORD[((-48))+rsi] 715 716 mov r14,QWORD[((-40))+rsi] 717 718 mov r13,QWORD[((-32))+rsi] 719 720 mov r12,QWORD[((-24))+rsi] 721 722 mov rbp,QWORD[((-16))+rsi] 723 724 mov rbx,QWORD[((-8))+rsi] 725 726 lea rsp,[rsi] 727 728$L$mul4x_epilogue: 729 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 730 mov rsi,QWORD[16+rsp] 731 ret 732 733$L$SEH_end_bn_mul4x_mont: 734EXTERN bn_sqrx8x_internal 735EXTERN bn_sqr8x_internal 736 737 738ALIGN 32 739bn_sqr8x_mont: 740 mov QWORD[8+rsp],rdi ;WIN64 prologue 741 mov QWORD[16+rsp],rsi 742 mov rax,rsp 743$L$SEH_begin_bn_sqr8x_mont: 744 mov rdi,rcx 745 mov rsi,rdx 746 mov rdx,r8 747 mov rcx,r9 748 mov r8,QWORD[40+rsp] 749 mov r9,QWORD[48+rsp] 750 751 752 753 mov rax,rsp 754 755$L$sqr8x_enter: 756 push rbx 757 758 push rbp 759 760 push r12 761 762 push r13 763 764 push r14 765 766 push r15 767 768$L$sqr8x_prologue: 769 770 mov r10d,r9d 771 shl r9d,3 772 shl r10,3+2 773 neg r9 774 775 776 777 778 779 780 lea r11,[((-64))+r9*2+rsp] 781 mov rbp,rsp 782 mov r8,QWORD[r8] 783 sub r11,rsi 784 and r11,4095 785 cmp r10,r11 786 jb NEAR $L$sqr8x_sp_alt 787 sub rbp,r11 788 lea rbp,[((-64))+r9*2+rbp] 789 jmp NEAR $L$sqr8x_sp_done 790 791ALIGN 32 792$L$sqr8x_sp_alt: 793 lea r10,[((4096-64))+r9*2] 794 lea rbp,[((-64))+r9*2+rbp] 795 sub r11,r10 796 mov r10,0 797 cmovc r11,r10 798 sub rbp,r11 799$L$sqr8x_sp_done: 800 and rbp,-64 801 mov r11,rsp 802 sub r11,rbp 803 and r11,-4096 804 lea rsp,[rbp*1+r11] 805 mov r10,QWORD[rsp] 806 cmp rsp,rbp 807 ja NEAR $L$sqr8x_page_walk 808 jmp NEAR $L$sqr8x_page_walk_done 809 810ALIGN 16 811$L$sqr8x_page_walk: 812 lea rsp,[((-4096))+rsp] 813 mov r10,QWORD[rsp] 814 cmp rsp,rbp 815 ja NEAR $L$sqr8x_page_walk 816$L$sqr8x_page_walk_done: 817 818 mov r10,r9 819 neg r9 820 821 mov QWORD[32+rsp],r8 822 mov QWORD[40+rsp],rax 823 824$L$sqr8x_body: 825 826DB 102,72,15,110,209 827 pxor xmm0,xmm0 828DB 102,72,15,110,207 829DB 102,73,15,110,218 830 lea rax,[OPENSSL_ia32cap_P] 831 mov eax,DWORD[8+rax] 832 and eax,0x80100 833 cmp eax,0x80100 834 jne NEAR $L$sqr8x_nox 835 836 call bn_sqrx8x_internal 837 838 839 840 841 lea rbx,[rcx*1+r8] 842 mov r9,rcx 843 mov rdx,rcx 844DB 102,72,15,126,207 845 sar rcx,3+2 846 jmp NEAR $L$sqr8x_sub 847 848ALIGN 32 849$L$sqr8x_nox: 850 call bn_sqr8x_internal 851 852 853 854 855 lea rbx,[r9*1+rdi] 856 mov rcx,r9 857 mov rdx,r9 858DB 102,72,15,126,207 859 sar rcx,3+2 860 jmp NEAR $L$sqr8x_sub 861 862ALIGN 32 863$L$sqr8x_sub: 864 mov r12,QWORD[rbx] 865 mov r13,QWORD[8+rbx] 866 mov r14,QWORD[16+rbx] 867 mov r15,QWORD[24+rbx] 868 lea rbx,[32+rbx] 869 sbb r12,QWORD[rbp] 870 sbb r13,QWORD[8+rbp] 871 sbb r14,QWORD[16+rbp] 872 sbb r15,QWORD[24+rbp] 873 lea rbp,[32+rbp] 874 mov QWORD[rdi],r12 875 mov QWORD[8+rdi],r13 876 mov QWORD[16+rdi],r14 877 mov QWORD[24+rdi],r15 878 lea rdi,[32+rdi] 879 inc rcx 880 jnz NEAR $L$sqr8x_sub 881 882 sbb rax,0 883 lea rbx,[r9*1+rbx] 884 lea rdi,[r9*1+rdi] 885 886DB 102,72,15,110,200 887 pxor xmm0,xmm0 888 pshufd xmm1,xmm1,0 889 mov rsi,QWORD[40+rsp] 890 891 jmp NEAR $L$sqr8x_cond_copy 892 893ALIGN 32 894$L$sqr8x_cond_copy: 895 movdqa xmm2,XMMWORD[rbx] 896 movdqa xmm3,XMMWORD[16+rbx] 897 lea rbx,[32+rbx] 898 movdqu xmm4,XMMWORD[rdi] 899 movdqu xmm5,XMMWORD[16+rdi] 900 lea rdi,[32+rdi] 901 movdqa XMMWORD[(-32)+rbx],xmm0 902 movdqa XMMWORD[(-16)+rbx],xmm0 903 movdqa XMMWORD[(-32)+rdx*1+rbx],xmm0 904 movdqa XMMWORD[(-16)+rdx*1+rbx],xmm0 905 pcmpeqd xmm0,xmm1 906 pand xmm2,xmm1 907 pand xmm3,xmm1 908 pand xmm4,xmm0 909 pand xmm5,xmm0 910 pxor xmm0,xmm0 911 por xmm4,xmm2 912 por xmm5,xmm3 913 movdqu XMMWORD[(-32)+rdi],xmm4 914 movdqu XMMWORD[(-16)+rdi],xmm5 915 add r9,32 916 jnz NEAR $L$sqr8x_cond_copy 917 918 mov rax,1 919 mov r15,QWORD[((-48))+rsi] 920 921 mov r14,QWORD[((-40))+rsi] 922 923 mov r13,QWORD[((-32))+rsi] 924 925 mov r12,QWORD[((-24))+rsi] 926 927 mov rbp,QWORD[((-16))+rsi] 928 929 mov rbx,QWORD[((-8))+rsi] 930 931 lea rsp,[rsi] 932 933$L$sqr8x_epilogue: 934 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 935 mov rsi,QWORD[16+rsp] 936 ret 937 938$L$SEH_end_bn_sqr8x_mont: 939 940ALIGN 32 941bn_mulx4x_mont: 942 mov QWORD[8+rsp],rdi ;WIN64 prologue 943 mov QWORD[16+rsp],rsi 944 mov rax,rsp 945$L$SEH_begin_bn_mulx4x_mont: 946 mov rdi,rcx 947 mov rsi,rdx 948 mov rdx,r8 949 mov rcx,r9 950 mov r8,QWORD[40+rsp] 951 mov r9,QWORD[48+rsp] 952 953 954 955 mov rax,rsp 956 957$L$mulx4x_enter: 958 push rbx 959 960 push rbp 961 962 push r12 963 964 push r13 965 966 push r14 967 968 push r15 969 970$L$mulx4x_prologue: 971 972 shl r9d,3 973 xor r10,r10 974 sub r10,r9 975 mov r8,QWORD[r8] 976 lea rbp,[((-72))+r10*1+rsp] 977 and rbp,-128 978 mov r11,rsp 979 sub r11,rbp 980 and r11,-4096 981 lea rsp,[rbp*1+r11] 982 mov r10,QWORD[rsp] 983 cmp rsp,rbp 984 ja NEAR $L$mulx4x_page_walk 985 jmp NEAR $L$mulx4x_page_walk_done 986 987ALIGN 16 988$L$mulx4x_page_walk: 989 lea rsp,[((-4096))+rsp] 990 mov r10,QWORD[rsp] 991 cmp rsp,rbp 992 ja NEAR $L$mulx4x_page_walk 993$L$mulx4x_page_walk_done: 994 995 lea r10,[r9*1+rdx] 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 mov QWORD[rsp],r9 1009 shr r9,5 1010 mov QWORD[16+rsp],r10 1011 sub r9,1 1012 mov QWORD[24+rsp],r8 1013 mov QWORD[32+rsp],rdi 1014 mov QWORD[40+rsp],rax 1015 1016 mov QWORD[48+rsp],r9 1017 jmp NEAR $L$mulx4x_body 1018 1019ALIGN 32 1020$L$mulx4x_body: 1021 lea rdi,[8+rdx] 1022 mov rdx,QWORD[rdx] 1023 lea rbx,[((64+32))+rsp] 1024 mov r9,rdx 1025 1026 mulx rax,r8,QWORD[rsi] 1027 mulx r14,r11,QWORD[8+rsi] 1028 add r11,rax 1029 mov QWORD[8+rsp],rdi 1030 mulx r13,r12,QWORD[16+rsi] 1031 adc r12,r14 1032 adc r13,0 1033 1034 mov rdi,r8 1035 imul r8,QWORD[24+rsp] 1036 xor rbp,rbp 1037 1038 mulx r14,rax,QWORD[24+rsi] 1039 mov rdx,r8 1040 lea rsi,[32+rsi] 1041 adcx r13,rax 1042 adcx r14,rbp 1043 1044 mulx r10,rax,QWORD[rcx] 1045 adcx rdi,rax 1046 adox r10,r11 1047 mulx r11,rax,QWORD[8+rcx] 1048 adcx r10,rax 1049 adox r11,r12 1050 DB 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 1051 mov rdi,QWORD[48+rsp] 1052 mov QWORD[((-32))+rbx],r10 1053 adcx r11,rax 1054 adox r12,r13 1055 mulx r15,rax,QWORD[24+rcx] 1056 mov rdx,r9 1057 mov QWORD[((-24))+rbx],r11 1058 adcx r12,rax 1059 adox r15,rbp 1060 lea rcx,[32+rcx] 1061 mov QWORD[((-16))+rbx],r12 1062 1063 jmp NEAR $L$mulx4x_1st 1064 1065ALIGN 32 1066$L$mulx4x_1st: 1067 adcx r15,rbp 1068 mulx rax,r10,QWORD[rsi] 1069 adcx r10,r14 1070 mulx r14,r11,QWORD[8+rsi] 1071 adcx r11,rax 1072 mulx rax,r12,QWORD[16+rsi] 1073 adcx r12,r14 1074 mulx r14,r13,QWORD[24+rsi] 1075 DB 0x67,0x67 1076 mov rdx,r8 1077 adcx r13,rax 1078 adcx r14,rbp 1079 lea rsi,[32+rsi] 1080 lea rbx,[32+rbx] 1081 1082 adox r10,r15 1083 mulx r15,rax,QWORD[rcx] 1084 adcx r10,rax 1085 adox r11,r15 1086 mulx r15,rax,QWORD[8+rcx] 1087 adcx r11,rax 1088 adox r12,r15 1089 mulx r15,rax,QWORD[16+rcx] 1090 mov QWORD[((-40))+rbx],r10 1091 adcx r12,rax 1092 mov QWORD[((-32))+rbx],r11 1093 adox r13,r15 1094 mulx r15,rax,QWORD[24+rcx] 1095 mov rdx,r9 1096 mov QWORD[((-24))+rbx],r12 1097 adcx r13,rax 1098 adox r15,rbp 1099 lea rcx,[32+rcx] 1100 mov QWORD[((-16))+rbx],r13 1101 1102 dec rdi 1103 jnz NEAR $L$mulx4x_1st 1104 1105 mov rax,QWORD[rsp] 1106 mov rdi,QWORD[8+rsp] 1107 adc r15,rbp 1108 add r14,r15 1109 sbb r15,r15 1110 mov QWORD[((-8))+rbx],r14 1111 jmp NEAR $L$mulx4x_outer 1112 1113ALIGN 32 1114$L$mulx4x_outer: 1115 mov rdx,QWORD[rdi] 1116 lea rdi,[8+rdi] 1117 sub rsi,rax 1118 mov QWORD[rbx],r15 1119 lea rbx,[((64+32))+rsp] 1120 sub rcx,rax 1121 1122 mulx r11,r8,QWORD[rsi] 1123 xor ebp,ebp 1124 mov r9,rdx 1125 mulx r12,r14,QWORD[8+rsi] 1126 adox r8,QWORD[((-32))+rbx] 1127 adcx r11,r14 1128 mulx r13,r15,QWORD[16+rsi] 1129 adox r11,QWORD[((-24))+rbx] 1130 adcx r12,r15 1131 adox r12,QWORD[((-16))+rbx] 1132 adcx r13,rbp 1133 adox r13,rbp 1134 1135 mov QWORD[8+rsp],rdi 1136 mov r15,r8 1137 imul r8,QWORD[24+rsp] 1138 xor ebp,ebp 1139 1140 mulx r14,rax,QWORD[24+rsi] 1141 mov rdx,r8 1142 adcx r13,rax 1143 adox r13,QWORD[((-8))+rbx] 1144 adcx r14,rbp 1145 lea rsi,[32+rsi] 1146 adox r14,rbp 1147 1148 mulx r10,rax,QWORD[rcx] 1149 adcx r15,rax 1150 adox r10,r11 1151 mulx r11,rax,QWORD[8+rcx] 1152 adcx r10,rax 1153 adox r11,r12 1154 mulx r12,rax,QWORD[16+rcx] 1155 mov QWORD[((-32))+rbx],r10 1156 adcx r11,rax 1157 adox r12,r13 1158 mulx r15,rax,QWORD[24+rcx] 1159 mov rdx,r9 1160 mov QWORD[((-24))+rbx],r11 1161 lea rcx,[32+rcx] 1162 adcx r12,rax 1163 adox r15,rbp 1164 mov rdi,QWORD[48+rsp] 1165 mov QWORD[((-16))+rbx],r12 1166 1167 jmp NEAR $L$mulx4x_inner 1168 1169ALIGN 32 1170$L$mulx4x_inner: 1171 mulx rax,r10,QWORD[rsi] 1172 adcx r15,rbp 1173 adox r10,r14 1174 mulx r14,r11,QWORD[8+rsi] 1175 adcx r10,QWORD[rbx] 1176 adox r11,rax 1177 mulx rax,r12,QWORD[16+rsi] 1178 adcx r11,QWORD[8+rbx] 1179 adox r12,r14 1180 mulx r14,r13,QWORD[24+rsi] 1181 mov rdx,r8 1182 adcx r12,QWORD[16+rbx] 1183 adox r13,rax 1184 adcx r13,QWORD[24+rbx] 1185 adox r14,rbp 1186 lea rsi,[32+rsi] 1187 lea rbx,[32+rbx] 1188 adcx r14,rbp 1189 1190 adox r10,r15 1191 mulx r15,rax,QWORD[rcx] 1192 adcx r10,rax 1193 adox r11,r15 1194 mulx r15,rax,QWORD[8+rcx] 1195 adcx r11,rax 1196 adox r12,r15 1197 mulx r15,rax,QWORD[16+rcx] 1198 mov QWORD[((-40))+rbx],r10 1199 adcx r12,rax 1200 adox r13,r15 1201 mulx r15,rax,QWORD[24+rcx] 1202 mov rdx,r9 1203 mov QWORD[((-32))+rbx],r11 1204 mov QWORD[((-24))+rbx],r12 1205 adcx r13,rax 1206 adox r15,rbp 1207 lea rcx,[32+rcx] 1208 mov QWORD[((-16))+rbx],r13 1209 1210 dec rdi 1211 jnz NEAR $L$mulx4x_inner 1212 1213 mov rax,QWORD[rsp] 1214 mov rdi,QWORD[8+rsp] 1215 adc r15,rbp 1216 sub rbp,QWORD[rbx] 1217 adc r14,r15 1218 sbb r15,r15 1219 mov QWORD[((-8))+rbx],r14 1220 1221 cmp rdi,QWORD[16+rsp] 1222 jne NEAR $L$mulx4x_outer 1223 1224 lea rbx,[64+rsp] 1225 sub rcx,rax 1226 neg r15 1227 mov rdx,rax 1228 shr rax,3+2 1229 mov rdi,QWORD[32+rsp] 1230 jmp NEAR $L$mulx4x_sub 1231 1232ALIGN 32 1233$L$mulx4x_sub: 1234 mov r11,QWORD[rbx] 1235 mov r12,QWORD[8+rbx] 1236 mov r13,QWORD[16+rbx] 1237 mov r14,QWORD[24+rbx] 1238 lea rbx,[32+rbx] 1239 sbb r11,QWORD[rcx] 1240 sbb r12,QWORD[8+rcx] 1241 sbb r13,QWORD[16+rcx] 1242 sbb r14,QWORD[24+rcx] 1243 lea rcx,[32+rcx] 1244 mov QWORD[rdi],r11 1245 mov QWORD[8+rdi],r12 1246 mov QWORD[16+rdi],r13 1247 mov QWORD[24+rdi],r14 1248 lea rdi,[32+rdi] 1249 dec rax 1250 jnz NEAR $L$mulx4x_sub 1251 1252 sbb r15,0 1253 lea rbx,[64+rsp] 1254 sub rdi,rdx 1255 1256DB 102,73,15,110,207 1257 pxor xmm0,xmm0 1258 pshufd xmm1,xmm1,0 1259 mov rsi,QWORD[40+rsp] 1260 1261 jmp NEAR $L$mulx4x_cond_copy 1262 1263ALIGN 32 1264$L$mulx4x_cond_copy: 1265 movdqa xmm2,XMMWORD[rbx] 1266 movdqa xmm3,XMMWORD[16+rbx] 1267 lea rbx,[32+rbx] 1268 movdqu xmm4,XMMWORD[rdi] 1269 movdqu xmm5,XMMWORD[16+rdi] 1270 lea rdi,[32+rdi] 1271 movdqa XMMWORD[(-32)+rbx],xmm0 1272 movdqa XMMWORD[(-16)+rbx],xmm0 1273 pcmpeqd xmm0,xmm1 1274 pand xmm2,xmm1 1275 pand xmm3,xmm1 1276 pand xmm4,xmm0 1277 pand xmm5,xmm0 1278 pxor xmm0,xmm0 1279 por xmm4,xmm2 1280 por xmm5,xmm3 1281 movdqu XMMWORD[(-32)+rdi],xmm4 1282 movdqu XMMWORD[(-16)+rdi],xmm5 1283 sub rdx,32 1284 jnz NEAR $L$mulx4x_cond_copy 1285 1286 mov QWORD[rbx],rdx 1287 1288 mov rax,1 1289 mov r15,QWORD[((-48))+rsi] 1290 1291 mov r14,QWORD[((-40))+rsi] 1292 1293 mov r13,QWORD[((-32))+rsi] 1294 1295 mov r12,QWORD[((-24))+rsi] 1296 1297 mov rbp,QWORD[((-16))+rsi] 1298 1299 mov rbx,QWORD[((-8))+rsi] 1300 1301 lea rsp,[rsi] 1302 1303$L$mulx4x_epilogue: 1304 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1305 mov rsi,QWORD[16+rsp] 1306 ret 1307 1308$L$SEH_end_bn_mulx4x_mont: 1309 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 1310 DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 1311 DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83 1312 DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 1313 DB 115,108,46,111,114,103,62,0 1314ALIGN 16 1315EXTERN __imp_RtlVirtualUnwind 1316 1317ALIGN 16 1318mul_handler: 1319 push rsi 1320 push rdi 1321 push rbx 1322 push rbp 1323 push r12 1324 push r13 1325 push r14 1326 push r15 1327 pushfq 1328 sub rsp,64 1329 1330 mov rax,QWORD[120+r8] 1331 mov rbx,QWORD[248+r8] 1332 1333 mov rsi,QWORD[8+r9] 1334 mov r11,QWORD[56+r9] 1335 1336 mov r10d,DWORD[r11] 1337 lea r10,[r10*1+rsi] 1338 cmp rbx,r10 1339 jb NEAR $L$common_seh_tail 1340 1341 mov rax,QWORD[152+r8] 1342 1343 mov r10d,DWORD[4+r11] 1344 lea r10,[r10*1+rsi] 1345 cmp rbx,r10 1346 jae NEAR $L$common_seh_tail 1347 1348 mov r10,QWORD[192+r8] 1349 mov rax,QWORD[8+r10*8+rax] 1350 1351 jmp NEAR $L$common_pop_regs 1352 1353 1354 1355ALIGN 16 1356sqr_handler: 1357 push rsi 1358 push rdi 1359 push rbx 1360 push rbp 1361 push r12 1362 push r13 1363 push r14 1364 push r15 1365 pushfq 1366 sub rsp,64 1367 1368 mov rax,QWORD[120+r8] 1369 mov rbx,QWORD[248+r8] 1370 1371 mov rsi,QWORD[8+r9] 1372 mov r11,QWORD[56+r9] 1373 1374 mov r10d,DWORD[r11] 1375 lea r10,[r10*1+rsi] 1376 cmp rbx,r10 1377 jb NEAR $L$common_seh_tail 1378 1379 mov r10d,DWORD[4+r11] 1380 lea r10,[r10*1+rsi] 1381 cmp rbx,r10 1382 jb NEAR $L$common_pop_regs 1383 1384 mov rax,QWORD[152+r8] 1385 1386 mov r10d,DWORD[8+r11] 1387 lea r10,[r10*1+rsi] 1388 cmp rbx,r10 1389 jae NEAR $L$common_seh_tail 1390 1391 mov rax,QWORD[40+rax] 1392 1393$L$common_pop_regs: 1394 mov rbx,QWORD[((-8))+rax] 1395 mov rbp,QWORD[((-16))+rax] 1396 mov r12,QWORD[((-24))+rax] 1397 mov r13,QWORD[((-32))+rax] 1398 mov r14,QWORD[((-40))+rax] 1399 mov r15,QWORD[((-48))+rax] 1400 mov QWORD[144+r8],rbx 1401 mov QWORD[160+r8],rbp 1402 mov QWORD[216+r8],r12 1403 mov QWORD[224+r8],r13 1404 mov QWORD[232+r8],r14 1405 mov QWORD[240+r8],r15 1406 1407$L$common_seh_tail: 1408 mov rdi,QWORD[8+rax] 1409 mov rsi,QWORD[16+rax] 1410 mov QWORD[152+r8],rax 1411 mov QWORD[168+r8],rsi 1412 mov QWORD[176+r8],rdi 1413 1414 mov rdi,QWORD[40+r9] 1415 mov rsi,r8 1416 mov ecx,154 1417 DD 0xa548f3fc 1418 1419 mov rsi,r9 1420 xor rcx,rcx 1421 mov rdx,QWORD[8+rsi] 1422 mov r8,QWORD[rsi] 1423 mov r9,QWORD[16+rsi] 1424 mov r10,QWORD[40+rsi] 1425 lea r11,[56+rsi] 1426 lea r12,[24+rsi] 1427 mov QWORD[32+rsp],r10 1428 mov QWORD[40+rsp],r11 1429 mov QWORD[48+rsp],r12 1430 mov QWORD[56+rsp],rcx 1431 call QWORD[__imp_RtlVirtualUnwind] 1432 1433 mov eax,1 1434 add rsp,64 1435 popfq 1436 pop r15 1437 pop r14 1438 pop r13 1439 pop r12 1440 pop rbp 1441 pop rbx 1442 pop rdi 1443 pop rsi 1444 ret 1445 1446 1447section .pdata rdata align=4 1448ALIGN 4 1449 DD $L$SEH_begin_bn_mul_mont wrt ..imagebase 1450 DD $L$SEH_end_bn_mul_mont wrt ..imagebase 1451 DD $L$SEH_info_bn_mul_mont wrt ..imagebase 1452 1453 DD $L$SEH_begin_bn_mul4x_mont wrt ..imagebase 1454 DD $L$SEH_end_bn_mul4x_mont wrt ..imagebase 1455 DD $L$SEH_info_bn_mul4x_mont wrt ..imagebase 1456 1457 DD $L$SEH_begin_bn_sqr8x_mont wrt ..imagebase 1458 DD $L$SEH_end_bn_sqr8x_mont wrt ..imagebase 1459 DD $L$SEH_info_bn_sqr8x_mont wrt ..imagebase 1460 DD $L$SEH_begin_bn_mulx4x_mont wrt ..imagebase 1461 DD $L$SEH_end_bn_mulx4x_mont wrt ..imagebase 1462 DD $L$SEH_info_bn_mulx4x_mont wrt ..imagebase 1463section .xdata rdata align=8 1464ALIGN 8 1465$L$SEH_info_bn_mul_mont: 1466 DB 9,0,0,0 1467 DD mul_handler wrt ..imagebase 1468 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase 1469$L$SEH_info_bn_mul4x_mont: 1470 DB 9,0,0,0 1471 DD mul_handler wrt ..imagebase 1472 DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase 1473$L$SEH_info_bn_sqr8x_mont: 1474 DB 9,0,0,0 1475 DD sqr_handler wrt ..imagebase 1476 DD $L$sqr8x_prologue wrt ..imagebase,$L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase 1477ALIGN 8 1478$L$SEH_info_bn_mulx4x_mont: 1479 DB 9,0,0,0 1480 DD sqr_handler wrt ..imagebase 1481 DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase 1482ALIGN 8 1483%else 1484; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 1485ret 1486%endif 1487