1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) 7.text 8.extern OPENSSL_ia32cap_P 9.hidden OPENSSL_ia32cap_P 10 11chacha20_poly1305_constants: 12 13.section .rodata 14.align 64 15.Lchacha20_consts: 16.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 17.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 18.Lrol8: 19.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 20.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 21.Lrol16: 22.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 23.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 24.Lavx2_init: 25.long 0,0,0,0 26.Lsse_inc: 27.long 1,0,0,0 28.Lavx2_inc: 29.long 2,0,0,0,2,0,0,0 30.Lclamp: 31.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC 32.quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF 33.align 16 34.Land_masks: 35.byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 36.byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 37.byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 38.byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 39.byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 40.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 41.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 42.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 43.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 44.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 45.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 46.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 47.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 48.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 49.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 50.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff 51.text 52 53.type poly_hash_ad_internal,@function 54.align 64 55poly_hash_ad_internal: 56.cfi_startproc 57.cfi_def_cfa rsp, 8 58 xorq %r10,%r10 59 xorq %r11,%r11 60 xorq %r12,%r12 61 cmpq $13,%r8 62 jne .Lhash_ad_loop 63.Lpoly_fast_tls_ad: 64 65 movq (%rcx),%r10 66 movq 5(%rcx),%r11 67 shrq $24,%r11 68 movq $1,%r12 69 movq 0+0+0(%rbp),%rax 70 movq %rax,%r15 71 mulq %r10 72 movq %rax,%r13 73 movq %rdx,%r14 74 movq 0+0+0(%rbp),%rax 75 mulq %r11 76 imulq %r12,%r15 77 addq %rax,%r14 78 adcq %rdx,%r15 79 movq 8+0+0(%rbp),%rax 80 movq %rax,%r9 81 mulq %r10 82 addq %rax,%r14 83 adcq $0,%rdx 84 movq %rdx,%r10 85 movq 8+0+0(%rbp),%rax 86 mulq %r11 87 addq %rax,%r15 88 adcq $0,%rdx 89 imulq %r12,%r9 90 addq %r10,%r15 91 adcq %rdx,%r9 92 movq %r13,%r10 93 movq %r14,%r11 94 movq %r15,%r12 95 andq $3,%r12 96 movq %r15,%r13 97 andq $-4,%r13 98 movq %r9,%r14 99 shrdq $2,%r9,%r15 100 shrq $2,%r9 101 addq %r13,%r15 102 adcq %r14,%r9 103 addq %r15,%r10 104 adcq %r9,%r11 105 adcq $0,%r12 106 107 ret 108.Lhash_ad_loop: 109 110 cmpq $16,%r8 111 jb .Lhash_ad_tail 112 addq 0+0(%rcx),%r10 113 adcq 8+0(%rcx),%r11 114 adcq $1,%r12 115 movq 0+0+0(%rbp),%rax 116 movq %rax,%r15 117 mulq %r10 118 movq %rax,%r13 119 movq %rdx,%r14 120 movq 0+0+0(%rbp),%rax 121 mulq %r11 122 imulq %r12,%r15 123 addq %rax,%r14 124 adcq %rdx,%r15 125 movq 8+0+0(%rbp),%rax 126 movq %rax,%r9 127 mulq %r10 128 addq %rax,%r14 129 adcq $0,%rdx 130 movq %rdx,%r10 131 movq 8+0+0(%rbp),%rax 132 mulq %r11 133 addq %rax,%r15 134 adcq $0,%rdx 135 imulq %r12,%r9 136 addq %r10,%r15 137 adcq %rdx,%r9 138 movq %r13,%r10 139 movq %r14,%r11 140 movq %r15,%r12 141 andq $3,%r12 142 movq %r15,%r13 143 andq $-4,%r13 144 movq %r9,%r14 145 shrdq $2,%r9,%r15 146 shrq $2,%r9 147 addq %r13,%r15 148 adcq %r14,%r9 149 addq %r15,%r10 150 adcq %r9,%r11 151 adcq $0,%r12 152 153 leaq 16(%rcx),%rcx 154 subq $16,%r8 155 jmp .Lhash_ad_loop 156.Lhash_ad_tail: 157 cmpq $0,%r8 158 je .Lhash_ad_done 159 160 xorq %r13,%r13 161 xorq %r14,%r14 162 xorq %r15,%r15 163 addq %r8,%rcx 164.Lhash_ad_tail_loop: 165 shldq $8,%r13,%r14 166 shlq $8,%r13 167 movzbq -1(%rcx),%r15 168 xorq %r15,%r13 169 decq %rcx 170 decq %r8 171 jne .Lhash_ad_tail_loop 172 173 addq %r13,%r10 174 adcq %r14,%r11 175 adcq $1,%r12 176 movq 0+0+0(%rbp),%rax 177 movq %rax,%r15 178 mulq %r10 179 movq %rax,%r13 180 movq %rdx,%r14 181 movq 0+0+0(%rbp),%rax 182 mulq %r11 183 imulq %r12,%r15 184 addq %rax,%r14 185 adcq %rdx,%r15 186 movq 8+0+0(%rbp),%rax 187 movq %rax,%r9 188 mulq %r10 189 addq %rax,%r14 190 adcq $0,%rdx 191 movq %rdx,%r10 192 movq 8+0+0(%rbp),%rax 193 mulq %r11 194 addq %rax,%r15 195 adcq $0,%rdx 196 imulq %r12,%r9 197 addq %r10,%r15 198 adcq %rdx,%r9 199 movq %r13,%r10 200 movq %r14,%r11 201 movq %r15,%r12 202 andq $3,%r12 203 movq %r15,%r13 204 andq $-4,%r13 205 movq %r9,%r14 206 shrdq $2,%r9,%r15 207 shrq $2,%r9 208 addq %r13,%r15 209 adcq %r14,%r9 210 addq %r15,%r10 211 adcq %r9,%r11 212 adcq $0,%r12 213 214 215.Lhash_ad_done: 216 ret 217.cfi_endproc 218.size poly_hash_ad_internal, .-poly_hash_ad_internal 219 220.globl chacha20_poly1305_open 221.hidden chacha20_poly1305_open 222.type chacha20_poly1305_open,@function 223.align 64 224chacha20_poly1305_open: 225.cfi_startproc 226_CET_ENDBR 227 pushq %rbp 228.cfi_adjust_cfa_offset 8 229.cfi_offset %rbp,-16 230 pushq %rbx 231.cfi_adjust_cfa_offset 8 232.cfi_offset %rbx,-24 233 pushq %r12 234.cfi_adjust_cfa_offset 8 235.cfi_offset %r12,-32 236 pushq %r13 237.cfi_adjust_cfa_offset 8 238.cfi_offset %r13,-40 239 pushq %r14 240.cfi_adjust_cfa_offset 8 241.cfi_offset %r14,-48 242 pushq %r15 243.cfi_adjust_cfa_offset 8 244.cfi_offset %r15,-56 245 246 247 pushq %r9 248.cfi_adjust_cfa_offset 8 249.cfi_offset %r9,-64 250 subq $288 + 0 + 32,%rsp 251.cfi_adjust_cfa_offset 288 + 32 252 253 leaq 32(%rsp),%rbp 254 andq $-32,%rbp 255 256 movq %rdx,%rbx 257 movq %r8,0+0+32(%rbp) 258 movq %rbx,8+0+32(%rbp) 259 260 movl OPENSSL_ia32cap_P+8(%rip),%eax 261 andl $288,%eax 262 xorl $288,%eax 263 jz chacha20_poly1305_open_avx2 264 265 cmpq $128,%rbx 266 jbe .Lopen_sse_128 267 268 movdqa .Lchacha20_consts(%rip),%xmm0 269 movdqu 0(%r9),%xmm4 270 movdqu 16(%r9),%xmm8 271 movdqu 32(%r9),%xmm12 272 273 movdqa %xmm12,%xmm7 274 275 movdqa %xmm4,0+48(%rbp) 276 movdqa %xmm8,0+64(%rbp) 277 movdqa %xmm12,0+96(%rbp) 278 movq $10,%r10 279.Lopen_sse_init_rounds: 280 paddd %xmm4,%xmm0 281 pxor %xmm0,%xmm12 282 pshufb .Lrol16(%rip),%xmm12 283 paddd %xmm12,%xmm8 284 pxor %xmm8,%xmm4 285 movdqa %xmm4,%xmm3 286 pslld $12,%xmm3 287 psrld $20,%xmm4 288 pxor %xmm3,%xmm4 289 paddd %xmm4,%xmm0 290 pxor %xmm0,%xmm12 291 pshufb .Lrol8(%rip),%xmm12 292 paddd %xmm12,%xmm8 293 pxor %xmm8,%xmm4 294 movdqa %xmm4,%xmm3 295 pslld $7,%xmm3 296 psrld $25,%xmm4 297 pxor %xmm3,%xmm4 298.byte 102,15,58,15,228,4 299.byte 102,69,15,58,15,192,8 300.byte 102,69,15,58,15,228,12 301 paddd %xmm4,%xmm0 302 pxor %xmm0,%xmm12 303 pshufb .Lrol16(%rip),%xmm12 304 paddd %xmm12,%xmm8 305 pxor %xmm8,%xmm4 306 movdqa %xmm4,%xmm3 307 pslld $12,%xmm3 308 psrld $20,%xmm4 309 pxor %xmm3,%xmm4 310 paddd %xmm4,%xmm0 311 pxor %xmm0,%xmm12 312 pshufb .Lrol8(%rip),%xmm12 313 paddd %xmm12,%xmm8 314 pxor %xmm8,%xmm4 315 movdqa %xmm4,%xmm3 316 pslld $7,%xmm3 317 psrld $25,%xmm4 318 pxor %xmm3,%xmm4 319.byte 102,15,58,15,228,12 320.byte 102,69,15,58,15,192,8 321.byte 102,69,15,58,15,228,4 322 323 decq %r10 324 jne .Lopen_sse_init_rounds 325 326 paddd .Lchacha20_consts(%rip),%xmm0 327 paddd 0+48(%rbp),%xmm4 328 329 pand .Lclamp(%rip),%xmm0 330 movdqa %xmm0,0+0(%rbp) 331 movdqa %xmm4,0+16(%rbp) 332 333 movq %r8,%r8 334 call poly_hash_ad_internal 335.Lopen_sse_main_loop: 336 cmpq $256,%rbx 337 jb .Lopen_sse_tail 338 339 movdqa .Lchacha20_consts(%rip),%xmm0 340 movdqa 0+48(%rbp),%xmm4 341 movdqa 0+64(%rbp),%xmm8 342 movdqa %xmm0,%xmm1 343 movdqa %xmm4,%xmm5 344 movdqa %xmm8,%xmm9 345 movdqa %xmm0,%xmm2 346 movdqa %xmm4,%xmm6 347 movdqa %xmm8,%xmm10 348 movdqa %xmm0,%xmm3 349 movdqa %xmm4,%xmm7 350 movdqa %xmm8,%xmm11 351 movdqa 0+96(%rbp),%xmm15 352 paddd .Lsse_inc(%rip),%xmm15 353 movdqa %xmm15,%xmm14 354 paddd .Lsse_inc(%rip),%xmm14 355 movdqa %xmm14,%xmm13 356 paddd .Lsse_inc(%rip),%xmm13 357 movdqa %xmm13,%xmm12 358 paddd .Lsse_inc(%rip),%xmm12 359 movdqa %xmm12,0+96(%rbp) 360 movdqa %xmm13,0+112(%rbp) 361 movdqa %xmm14,0+128(%rbp) 362 movdqa %xmm15,0+144(%rbp) 363 364 365 366 movq $4,%rcx 367 movq %rsi,%r8 368.Lopen_sse_main_loop_rounds: 369 movdqa %xmm8,0+80(%rbp) 370 movdqa .Lrol16(%rip),%xmm8 371 paddd %xmm7,%xmm3 372 paddd %xmm6,%xmm2 373 paddd %xmm5,%xmm1 374 paddd %xmm4,%xmm0 375 pxor %xmm3,%xmm15 376 pxor %xmm2,%xmm14 377 pxor %xmm1,%xmm13 378 pxor %xmm0,%xmm12 379.byte 102,69,15,56,0,248 380.byte 102,69,15,56,0,240 381.byte 102,69,15,56,0,232 382.byte 102,69,15,56,0,224 383 movdqa 0+80(%rbp),%xmm8 384 paddd %xmm15,%xmm11 385 paddd %xmm14,%xmm10 386 paddd %xmm13,%xmm9 387 paddd %xmm12,%xmm8 388 pxor %xmm11,%xmm7 389 addq 0+0(%r8),%r10 390 adcq 8+0(%r8),%r11 391 adcq $1,%r12 392 393 leaq 16(%r8),%r8 394 pxor %xmm10,%xmm6 395 pxor %xmm9,%xmm5 396 pxor %xmm8,%xmm4 397 movdqa %xmm8,0+80(%rbp) 398 movdqa %xmm7,%xmm8 399 psrld $20,%xmm8 400 pslld $32-20,%xmm7 401 pxor %xmm8,%xmm7 402 movdqa %xmm6,%xmm8 403 psrld $20,%xmm8 404 pslld $32-20,%xmm6 405 pxor %xmm8,%xmm6 406 movdqa %xmm5,%xmm8 407 psrld $20,%xmm8 408 pslld $32-20,%xmm5 409 pxor %xmm8,%xmm5 410 movdqa %xmm4,%xmm8 411 psrld $20,%xmm8 412 pslld $32-20,%xmm4 413 pxor %xmm8,%xmm4 414 movq 0+0+0(%rbp),%rax 415 movq %rax,%r15 416 mulq %r10 417 movq %rax,%r13 418 movq %rdx,%r14 419 movq 0+0+0(%rbp),%rax 420 mulq %r11 421 imulq %r12,%r15 422 addq %rax,%r14 423 adcq %rdx,%r15 424 movdqa .Lrol8(%rip),%xmm8 425 paddd %xmm7,%xmm3 426 paddd %xmm6,%xmm2 427 paddd %xmm5,%xmm1 428 paddd %xmm4,%xmm0 429 pxor %xmm3,%xmm15 430 pxor %xmm2,%xmm14 431 pxor %xmm1,%xmm13 432 pxor %xmm0,%xmm12 433.byte 102,69,15,56,0,248 434.byte 102,69,15,56,0,240 435.byte 102,69,15,56,0,232 436.byte 102,69,15,56,0,224 437 movdqa 0+80(%rbp),%xmm8 438 paddd %xmm15,%xmm11 439 paddd %xmm14,%xmm10 440 paddd %xmm13,%xmm9 441 paddd %xmm12,%xmm8 442 pxor %xmm11,%xmm7 443 pxor %xmm10,%xmm6 444 movq 8+0+0(%rbp),%rax 445 movq %rax,%r9 446 mulq %r10 447 addq %rax,%r14 448 adcq $0,%rdx 449 movq %rdx,%r10 450 movq 8+0+0(%rbp),%rax 451 mulq %r11 452 addq %rax,%r15 453 adcq $0,%rdx 454 pxor %xmm9,%xmm5 455 pxor %xmm8,%xmm4 456 movdqa %xmm8,0+80(%rbp) 457 movdqa %xmm7,%xmm8 458 psrld $25,%xmm8 459 pslld $32-25,%xmm7 460 pxor %xmm8,%xmm7 461 movdqa %xmm6,%xmm8 462 psrld $25,%xmm8 463 pslld $32-25,%xmm6 464 pxor %xmm8,%xmm6 465 movdqa %xmm5,%xmm8 466 psrld $25,%xmm8 467 pslld $32-25,%xmm5 468 pxor %xmm8,%xmm5 469 movdqa %xmm4,%xmm8 470 psrld $25,%xmm8 471 pslld $32-25,%xmm4 472 pxor %xmm8,%xmm4 473 movdqa 0+80(%rbp),%xmm8 474 imulq %r12,%r9 475 addq %r10,%r15 476 adcq %rdx,%r9 477.byte 102,15,58,15,255,4 478.byte 102,69,15,58,15,219,8 479.byte 102,69,15,58,15,255,12 480.byte 102,15,58,15,246,4 481.byte 102,69,15,58,15,210,8 482.byte 102,69,15,58,15,246,12 483.byte 102,15,58,15,237,4 484.byte 102,69,15,58,15,201,8 485.byte 102,69,15,58,15,237,12 486.byte 102,15,58,15,228,4 487.byte 102,69,15,58,15,192,8 488.byte 102,69,15,58,15,228,12 489 movdqa %xmm8,0+80(%rbp) 490 movdqa .Lrol16(%rip),%xmm8 491 paddd %xmm7,%xmm3 492 paddd %xmm6,%xmm2 493 paddd %xmm5,%xmm1 494 paddd %xmm4,%xmm0 495 pxor %xmm3,%xmm15 496 pxor %xmm2,%xmm14 497 movq %r13,%r10 498 movq %r14,%r11 499 movq %r15,%r12 500 andq $3,%r12 501 movq %r15,%r13 502 andq $-4,%r13 503 movq %r9,%r14 504 shrdq $2,%r9,%r15 505 shrq $2,%r9 506 addq %r13,%r15 507 adcq %r14,%r9 508 addq %r15,%r10 509 adcq %r9,%r11 510 adcq $0,%r12 511 pxor %xmm1,%xmm13 512 pxor %xmm0,%xmm12 513.byte 102,69,15,56,0,248 514.byte 102,69,15,56,0,240 515.byte 102,69,15,56,0,232 516.byte 102,69,15,56,0,224 517 movdqa 0+80(%rbp),%xmm8 518 paddd %xmm15,%xmm11 519 paddd %xmm14,%xmm10 520 paddd %xmm13,%xmm9 521 paddd %xmm12,%xmm8 522 pxor %xmm11,%xmm7 523 pxor %xmm10,%xmm6 524 pxor %xmm9,%xmm5 525 pxor %xmm8,%xmm4 526 movdqa %xmm8,0+80(%rbp) 527 movdqa %xmm7,%xmm8 528 psrld $20,%xmm8 529 pslld $32-20,%xmm7 530 pxor %xmm8,%xmm7 531 movdqa %xmm6,%xmm8 532 psrld $20,%xmm8 533 pslld $32-20,%xmm6 534 pxor %xmm8,%xmm6 535 movdqa %xmm5,%xmm8 536 psrld $20,%xmm8 537 pslld $32-20,%xmm5 538 pxor %xmm8,%xmm5 539 movdqa %xmm4,%xmm8 540 psrld $20,%xmm8 541 pslld $32-20,%xmm4 542 pxor %xmm8,%xmm4 543 movdqa .Lrol8(%rip),%xmm8 544 paddd %xmm7,%xmm3 545 paddd %xmm6,%xmm2 546 paddd %xmm5,%xmm1 547 paddd %xmm4,%xmm0 548 pxor %xmm3,%xmm15 549 pxor %xmm2,%xmm14 550 pxor %xmm1,%xmm13 551 pxor %xmm0,%xmm12 552.byte 102,69,15,56,0,248 553.byte 102,69,15,56,0,240 554.byte 102,69,15,56,0,232 555.byte 102,69,15,56,0,224 556 movdqa 0+80(%rbp),%xmm8 557 paddd %xmm15,%xmm11 558 paddd %xmm14,%xmm10 559 paddd %xmm13,%xmm9 560 paddd %xmm12,%xmm8 561 pxor %xmm11,%xmm7 562 pxor %xmm10,%xmm6 563 pxor %xmm9,%xmm5 564 pxor %xmm8,%xmm4 565 movdqa %xmm8,0+80(%rbp) 566 movdqa %xmm7,%xmm8 567 psrld $25,%xmm8 568 pslld $32-25,%xmm7 569 pxor %xmm8,%xmm7 570 movdqa %xmm6,%xmm8 571 psrld $25,%xmm8 572 pslld $32-25,%xmm6 573 pxor %xmm8,%xmm6 574 movdqa %xmm5,%xmm8 575 psrld $25,%xmm8 576 pslld $32-25,%xmm5 577 pxor %xmm8,%xmm5 578 movdqa %xmm4,%xmm8 579 psrld $25,%xmm8 580 pslld $32-25,%xmm4 581 pxor %xmm8,%xmm4 582 movdqa 0+80(%rbp),%xmm8 583.byte 102,15,58,15,255,12 584.byte 102,69,15,58,15,219,8 585.byte 102,69,15,58,15,255,4 586.byte 102,15,58,15,246,12 587.byte 102,69,15,58,15,210,8 588.byte 102,69,15,58,15,246,4 589.byte 102,15,58,15,237,12 590.byte 102,69,15,58,15,201,8 591.byte 102,69,15,58,15,237,4 592.byte 102,15,58,15,228,12 593.byte 102,69,15,58,15,192,8 594.byte 102,69,15,58,15,228,4 595 596 decq %rcx 597 jge .Lopen_sse_main_loop_rounds 598 addq 0+0(%r8),%r10 599 adcq 8+0(%r8),%r11 600 adcq $1,%r12 601 movq 0+0+0(%rbp),%rax 602 movq %rax,%r15 603 mulq %r10 604 movq %rax,%r13 605 movq %rdx,%r14 606 movq 0+0+0(%rbp),%rax 607 mulq %r11 608 imulq %r12,%r15 609 addq %rax,%r14 610 adcq %rdx,%r15 611 movq 8+0+0(%rbp),%rax 612 movq %rax,%r9 613 mulq %r10 614 addq %rax,%r14 615 adcq $0,%rdx 616 movq %rdx,%r10 617 movq 8+0+0(%rbp),%rax 618 mulq %r11 619 addq %rax,%r15 620 adcq $0,%rdx 621 imulq %r12,%r9 622 addq %r10,%r15 623 adcq %rdx,%r9 624 movq %r13,%r10 625 movq %r14,%r11 626 movq %r15,%r12 627 andq $3,%r12 628 movq %r15,%r13 629 andq $-4,%r13 630 movq %r9,%r14 631 shrdq $2,%r9,%r15 632 shrq $2,%r9 633 addq %r13,%r15 634 adcq %r14,%r9 635 addq %r15,%r10 636 adcq %r9,%r11 637 adcq $0,%r12 638 639 leaq 16(%r8),%r8 640 cmpq $-6,%rcx 641 jg .Lopen_sse_main_loop_rounds 642 paddd .Lchacha20_consts(%rip),%xmm3 643 paddd 0+48(%rbp),%xmm7 644 paddd 0+64(%rbp),%xmm11 645 paddd 0+144(%rbp),%xmm15 646 paddd .Lchacha20_consts(%rip),%xmm2 647 paddd 0+48(%rbp),%xmm6 648 paddd 0+64(%rbp),%xmm10 649 paddd 0+128(%rbp),%xmm14 650 paddd .Lchacha20_consts(%rip),%xmm1 651 paddd 0+48(%rbp),%xmm5 652 paddd 0+64(%rbp),%xmm9 653 paddd 0+112(%rbp),%xmm13 654 paddd .Lchacha20_consts(%rip),%xmm0 655 paddd 0+48(%rbp),%xmm4 656 paddd 0+64(%rbp),%xmm8 657 paddd 0+96(%rbp),%xmm12 658 movdqa %xmm12,0+80(%rbp) 659 movdqu 0 + 0(%rsi),%xmm12 660 pxor %xmm3,%xmm12 661 movdqu %xmm12,0 + 0(%rdi) 662 movdqu 16 + 0(%rsi),%xmm12 663 pxor %xmm7,%xmm12 664 movdqu %xmm12,16 + 0(%rdi) 665 movdqu 32 + 0(%rsi),%xmm12 666 pxor %xmm11,%xmm12 667 movdqu %xmm12,32 + 0(%rdi) 668 movdqu 48 + 0(%rsi),%xmm12 669 pxor %xmm15,%xmm12 670 movdqu %xmm12,48 + 0(%rdi) 671 movdqu 0 + 64(%rsi),%xmm3 672 movdqu 16 + 64(%rsi),%xmm7 673 movdqu 32 + 64(%rsi),%xmm11 674 movdqu 48 + 64(%rsi),%xmm15 675 pxor %xmm3,%xmm2 676 pxor %xmm7,%xmm6 677 pxor %xmm11,%xmm10 678 pxor %xmm14,%xmm15 679 movdqu %xmm2,0 + 64(%rdi) 680 movdqu %xmm6,16 + 64(%rdi) 681 movdqu %xmm10,32 + 64(%rdi) 682 movdqu %xmm15,48 + 64(%rdi) 683 movdqu 0 + 128(%rsi),%xmm3 684 movdqu 16 + 128(%rsi),%xmm7 685 movdqu 32 + 128(%rsi),%xmm11 686 movdqu 48 + 128(%rsi),%xmm15 687 pxor %xmm3,%xmm1 688 pxor %xmm7,%xmm5 689 pxor %xmm11,%xmm9 690 pxor %xmm13,%xmm15 691 movdqu %xmm1,0 + 128(%rdi) 692 movdqu %xmm5,16 + 128(%rdi) 693 movdqu %xmm9,32 + 128(%rdi) 694 movdqu %xmm15,48 + 128(%rdi) 695 movdqu 0 + 192(%rsi),%xmm3 696 movdqu 16 + 192(%rsi),%xmm7 697 movdqu 32 + 192(%rsi),%xmm11 698 movdqu 48 + 192(%rsi),%xmm15 699 pxor %xmm3,%xmm0 700 pxor %xmm7,%xmm4 701 pxor %xmm11,%xmm8 702 pxor 0+80(%rbp),%xmm15 703 movdqu %xmm0,0 + 192(%rdi) 704 movdqu %xmm4,16 + 192(%rdi) 705 movdqu %xmm8,32 + 192(%rdi) 706 movdqu %xmm15,48 + 192(%rdi) 707 708 leaq 256(%rsi),%rsi 709 leaq 256(%rdi),%rdi 710 subq $256,%rbx 711 jmp .Lopen_sse_main_loop 712.Lopen_sse_tail: 713 714 testq %rbx,%rbx 715 jz .Lopen_sse_finalize 716 cmpq $192,%rbx 717 ja .Lopen_sse_tail_256 718 cmpq $128,%rbx 719 ja .Lopen_sse_tail_192 720 cmpq $64,%rbx 721 ja .Lopen_sse_tail_128 722 movdqa .Lchacha20_consts(%rip),%xmm0 723 movdqa 0+48(%rbp),%xmm4 724 movdqa 0+64(%rbp),%xmm8 725 movdqa 0+96(%rbp),%xmm12 726 paddd .Lsse_inc(%rip),%xmm12 727 movdqa %xmm12,0+96(%rbp) 728 729 xorq %r8,%r8 730 movq %rbx,%rcx 731 cmpq $16,%rcx 732 jb .Lopen_sse_tail_64_rounds 733.Lopen_sse_tail_64_rounds_and_x1hash: 734 addq 0+0(%rsi,%r8,1),%r10 735 adcq 8+0(%rsi,%r8,1),%r11 736 adcq $1,%r12 737 movq 0+0+0(%rbp),%rax 738 movq %rax,%r15 739 mulq %r10 740 movq %rax,%r13 741 movq %rdx,%r14 742 movq 0+0+0(%rbp),%rax 743 mulq %r11 744 imulq %r12,%r15 745 addq %rax,%r14 746 adcq %rdx,%r15 747 movq 8+0+0(%rbp),%rax 748 movq %rax,%r9 749 mulq %r10 750 addq %rax,%r14 751 adcq $0,%rdx 752 movq %rdx,%r10 753 movq 8+0+0(%rbp),%rax 754 mulq %r11 755 addq %rax,%r15 756 adcq $0,%rdx 757 imulq %r12,%r9 758 addq %r10,%r15 759 adcq %rdx,%r9 760 movq %r13,%r10 761 movq %r14,%r11 762 movq %r15,%r12 763 andq $3,%r12 764 movq %r15,%r13 765 andq $-4,%r13 766 movq %r9,%r14 767 shrdq $2,%r9,%r15 768 shrq $2,%r9 769 addq %r13,%r15 770 adcq %r14,%r9 771 addq %r15,%r10 772 adcq %r9,%r11 773 adcq $0,%r12 774 775 subq $16,%rcx 776.Lopen_sse_tail_64_rounds: 777 addq $16,%r8 778 paddd %xmm4,%xmm0 779 pxor %xmm0,%xmm12 780 pshufb .Lrol16(%rip),%xmm12 781 paddd %xmm12,%xmm8 782 pxor %xmm8,%xmm4 783 movdqa %xmm4,%xmm3 784 pslld $12,%xmm3 785 psrld $20,%xmm4 786 pxor %xmm3,%xmm4 787 paddd %xmm4,%xmm0 788 pxor %xmm0,%xmm12 789 pshufb .Lrol8(%rip),%xmm12 790 paddd %xmm12,%xmm8 791 pxor %xmm8,%xmm4 792 movdqa %xmm4,%xmm3 793 pslld $7,%xmm3 794 psrld $25,%xmm4 795 pxor %xmm3,%xmm4 796.byte 102,15,58,15,228,4 797.byte 102,69,15,58,15,192,8 798.byte 102,69,15,58,15,228,12 799 paddd %xmm4,%xmm0 800 pxor %xmm0,%xmm12 801 pshufb .Lrol16(%rip),%xmm12 802 paddd %xmm12,%xmm8 803 pxor %xmm8,%xmm4 804 movdqa %xmm4,%xmm3 805 pslld $12,%xmm3 806 psrld $20,%xmm4 807 pxor %xmm3,%xmm4 808 paddd %xmm4,%xmm0 809 pxor %xmm0,%xmm12 810 pshufb .Lrol8(%rip),%xmm12 811 paddd %xmm12,%xmm8 812 pxor %xmm8,%xmm4 813 movdqa %xmm4,%xmm3 814 pslld $7,%xmm3 815 psrld $25,%xmm4 816 pxor %xmm3,%xmm4 817.byte 102,15,58,15,228,12 818.byte 102,69,15,58,15,192,8 819.byte 102,69,15,58,15,228,4 820 821 cmpq $16,%rcx 822 jae .Lopen_sse_tail_64_rounds_and_x1hash 823 cmpq $160,%r8 824 jne .Lopen_sse_tail_64_rounds 825 paddd .Lchacha20_consts(%rip),%xmm0 826 paddd 0+48(%rbp),%xmm4 827 paddd 0+64(%rbp),%xmm8 828 paddd 0+96(%rbp),%xmm12 829 830 jmp .Lopen_sse_tail_64_dec_loop 831 832.Lopen_sse_tail_128: 833 movdqa .Lchacha20_consts(%rip),%xmm0 834 movdqa 0+48(%rbp),%xmm4 835 movdqa 0+64(%rbp),%xmm8 836 movdqa %xmm0,%xmm1 837 movdqa %xmm4,%xmm5 838 movdqa %xmm8,%xmm9 839 movdqa 0+96(%rbp),%xmm13 840 paddd .Lsse_inc(%rip),%xmm13 841 movdqa %xmm13,%xmm12 842 paddd .Lsse_inc(%rip),%xmm12 843 movdqa %xmm12,0+96(%rbp) 844 movdqa %xmm13,0+112(%rbp) 845 846 movq %rbx,%rcx 847 andq $-16,%rcx 848 xorq %r8,%r8 849.Lopen_sse_tail_128_rounds_and_x1hash: 850 addq 0+0(%rsi,%r8,1),%r10 851 adcq 8+0(%rsi,%r8,1),%r11 852 adcq $1,%r12 853 movq 0+0+0(%rbp),%rax 854 movq %rax,%r15 855 mulq %r10 856 movq %rax,%r13 857 movq %rdx,%r14 858 movq 0+0+0(%rbp),%rax 859 mulq %r11 860 imulq %r12,%r15 861 addq %rax,%r14 862 adcq %rdx,%r15 863 movq 8+0+0(%rbp),%rax 864 movq %rax,%r9 865 mulq %r10 866 addq %rax,%r14 867 adcq $0,%rdx 868 movq %rdx,%r10 869 movq 8+0+0(%rbp),%rax 870 mulq %r11 871 addq %rax,%r15 872 adcq $0,%rdx 873 imulq %r12,%r9 874 addq %r10,%r15 875 adcq %rdx,%r9 876 movq %r13,%r10 877 movq %r14,%r11 878 movq %r15,%r12 879 andq $3,%r12 880 movq %r15,%r13 881 andq $-4,%r13 882 movq %r9,%r14 883 shrdq $2,%r9,%r15 884 shrq $2,%r9 885 addq %r13,%r15 886 adcq %r14,%r9 887 addq %r15,%r10 888 adcq %r9,%r11 889 adcq $0,%r12 890 891.Lopen_sse_tail_128_rounds: 892 addq $16,%r8 893 paddd %xmm4,%xmm0 894 pxor %xmm0,%xmm12 895 pshufb .Lrol16(%rip),%xmm12 896 paddd %xmm12,%xmm8 897 pxor %xmm8,%xmm4 898 movdqa %xmm4,%xmm3 899 pslld $12,%xmm3 900 psrld $20,%xmm4 901 pxor %xmm3,%xmm4 902 paddd %xmm4,%xmm0 903 pxor %xmm0,%xmm12 904 pshufb .Lrol8(%rip),%xmm12 905 paddd %xmm12,%xmm8 906 pxor %xmm8,%xmm4 907 movdqa %xmm4,%xmm3 908 pslld $7,%xmm3 909 psrld $25,%xmm4 910 pxor %xmm3,%xmm4 911.byte 102,15,58,15,228,4 912.byte 102,69,15,58,15,192,8 913.byte 102,69,15,58,15,228,12 914 paddd %xmm5,%xmm1 915 pxor %xmm1,%xmm13 916 pshufb .Lrol16(%rip),%xmm13 917 paddd %xmm13,%xmm9 918 pxor %xmm9,%xmm5 919 movdqa %xmm5,%xmm3 920 pslld $12,%xmm3 921 psrld $20,%xmm5 922 pxor %xmm3,%xmm5 923 paddd %xmm5,%xmm1 924 pxor %xmm1,%xmm13 925 pshufb .Lrol8(%rip),%xmm13 926 paddd %xmm13,%xmm9 927 pxor %xmm9,%xmm5 928 movdqa %xmm5,%xmm3 929 pslld $7,%xmm3 930 psrld $25,%xmm5 931 pxor %xmm3,%xmm5 932.byte 102,15,58,15,237,4 933.byte 102,69,15,58,15,201,8 934.byte 102,69,15,58,15,237,12 935 paddd %xmm4,%xmm0 936 pxor %xmm0,%xmm12 937 pshufb .Lrol16(%rip),%xmm12 938 paddd %xmm12,%xmm8 939 pxor %xmm8,%xmm4 940 movdqa %xmm4,%xmm3 941 pslld $12,%xmm3 942 psrld $20,%xmm4 943 pxor %xmm3,%xmm4 944 paddd %xmm4,%xmm0 945 pxor %xmm0,%xmm12 946 pshufb .Lrol8(%rip),%xmm12 947 paddd %xmm12,%xmm8 948 pxor %xmm8,%xmm4 949 movdqa %xmm4,%xmm3 950 pslld $7,%xmm3 951 psrld $25,%xmm4 952 pxor %xmm3,%xmm4 953.byte 102,15,58,15,228,12 954.byte 102,69,15,58,15,192,8 955.byte 102,69,15,58,15,228,4 956 paddd %xmm5,%xmm1 957 pxor %xmm1,%xmm13 958 pshufb .Lrol16(%rip),%xmm13 959 paddd %xmm13,%xmm9 960 pxor %xmm9,%xmm5 961 movdqa %xmm5,%xmm3 962 pslld $12,%xmm3 963 psrld $20,%xmm5 964 pxor %xmm3,%xmm5 965 paddd %xmm5,%xmm1 966 pxor %xmm1,%xmm13 967 pshufb .Lrol8(%rip),%xmm13 968 paddd %xmm13,%xmm9 969 pxor %xmm9,%xmm5 970 movdqa %xmm5,%xmm3 971 pslld $7,%xmm3 972 psrld $25,%xmm5 973 pxor %xmm3,%xmm5 974.byte 102,15,58,15,237,12 975.byte 102,69,15,58,15,201,8 976.byte 102,69,15,58,15,237,4 977 978 cmpq %rcx,%r8 979 jb .Lopen_sse_tail_128_rounds_and_x1hash 980 cmpq $160,%r8 981 jne .Lopen_sse_tail_128_rounds 982 paddd .Lchacha20_consts(%rip),%xmm1 983 paddd 0+48(%rbp),%xmm5 984 paddd 0+64(%rbp),%xmm9 985 paddd 0+112(%rbp),%xmm13 986 paddd .Lchacha20_consts(%rip),%xmm0 987 paddd 0+48(%rbp),%xmm4 988 paddd 0+64(%rbp),%xmm8 989 paddd 0+96(%rbp),%xmm12 990 movdqu 0 + 0(%rsi),%xmm3 991 movdqu 16 + 0(%rsi),%xmm7 992 movdqu 32 + 0(%rsi),%xmm11 993 movdqu 48 + 0(%rsi),%xmm15 994 pxor %xmm3,%xmm1 995 pxor %xmm7,%xmm5 996 pxor %xmm11,%xmm9 997 pxor %xmm13,%xmm15 998 movdqu %xmm1,0 + 0(%rdi) 999 movdqu %xmm5,16 + 0(%rdi) 1000 movdqu %xmm9,32 + 0(%rdi) 1001 movdqu %xmm15,48 + 0(%rdi) 1002 1003 subq $64,%rbx 1004 leaq 64(%rsi),%rsi 1005 leaq 64(%rdi),%rdi 1006 jmp .Lopen_sse_tail_64_dec_loop 1007 1008.Lopen_sse_tail_192: 1009 movdqa .Lchacha20_consts(%rip),%xmm0 1010 movdqa 0+48(%rbp),%xmm4 1011 movdqa 0+64(%rbp),%xmm8 1012 movdqa %xmm0,%xmm1 1013 movdqa %xmm4,%xmm5 1014 movdqa %xmm8,%xmm9 1015 movdqa %xmm0,%xmm2 1016 movdqa %xmm4,%xmm6 1017 movdqa %xmm8,%xmm10 1018 movdqa 0+96(%rbp),%xmm14 1019 paddd .Lsse_inc(%rip),%xmm14 1020 movdqa %xmm14,%xmm13 1021 paddd .Lsse_inc(%rip),%xmm13 1022 movdqa %xmm13,%xmm12 1023 paddd .Lsse_inc(%rip),%xmm12 1024 movdqa %xmm12,0+96(%rbp) 1025 movdqa %xmm13,0+112(%rbp) 1026 movdqa %xmm14,0+128(%rbp) 1027 1028 movq %rbx,%rcx 1029 movq $160,%r8 1030 cmpq $160,%rcx 1031 cmovgq %r8,%rcx 1032 andq $-16,%rcx 1033 xorq %r8,%r8 1034.Lopen_sse_tail_192_rounds_and_x1hash: 1035 addq 0+0(%rsi,%r8,1),%r10 1036 adcq 8+0(%rsi,%r8,1),%r11 1037 adcq $1,%r12 1038 movq 0+0+0(%rbp),%rax 1039 movq %rax,%r15 1040 mulq %r10 1041 movq %rax,%r13 1042 movq %rdx,%r14 1043 movq 0+0+0(%rbp),%rax 1044 mulq %r11 1045 imulq %r12,%r15 1046 addq %rax,%r14 1047 adcq %rdx,%r15 1048 movq 8+0+0(%rbp),%rax 1049 movq %rax,%r9 1050 mulq %r10 1051 addq %rax,%r14 1052 adcq $0,%rdx 1053 movq %rdx,%r10 1054 movq 8+0+0(%rbp),%rax 1055 mulq %r11 1056 addq %rax,%r15 1057 adcq $0,%rdx 1058 imulq %r12,%r9 1059 addq %r10,%r15 1060 adcq %rdx,%r9 1061 movq %r13,%r10 1062 movq %r14,%r11 1063 movq %r15,%r12 1064 andq $3,%r12 1065 movq %r15,%r13 1066 andq $-4,%r13 1067 movq %r9,%r14 1068 shrdq $2,%r9,%r15 1069 shrq $2,%r9 1070 addq %r13,%r15 1071 adcq %r14,%r9 1072 addq %r15,%r10 1073 adcq %r9,%r11 1074 adcq $0,%r12 1075 1076.Lopen_sse_tail_192_rounds: 1077 addq $16,%r8 1078 paddd %xmm4,%xmm0 1079 pxor %xmm0,%xmm12 1080 pshufb .Lrol16(%rip),%xmm12 1081 paddd %xmm12,%xmm8 1082 pxor %xmm8,%xmm4 1083 movdqa %xmm4,%xmm3 1084 pslld $12,%xmm3 1085 psrld $20,%xmm4 1086 pxor %xmm3,%xmm4 1087 paddd %xmm4,%xmm0 1088 pxor %xmm0,%xmm12 1089 pshufb .Lrol8(%rip),%xmm12 1090 paddd %xmm12,%xmm8 1091 pxor %xmm8,%xmm4 1092 movdqa %xmm4,%xmm3 1093 pslld $7,%xmm3 1094 psrld $25,%xmm4 1095 pxor %xmm3,%xmm4 1096.byte 102,15,58,15,228,4 1097.byte 102,69,15,58,15,192,8 1098.byte 102,69,15,58,15,228,12 1099 paddd %xmm5,%xmm1 1100 pxor %xmm1,%xmm13 1101 pshufb .Lrol16(%rip),%xmm13 1102 paddd %xmm13,%xmm9 1103 pxor %xmm9,%xmm5 1104 movdqa %xmm5,%xmm3 1105 pslld $12,%xmm3 1106 psrld $20,%xmm5 1107 pxor %xmm3,%xmm5 1108 paddd %xmm5,%xmm1 1109 pxor %xmm1,%xmm13 1110 pshufb .Lrol8(%rip),%xmm13 1111 paddd %xmm13,%xmm9 1112 pxor %xmm9,%xmm5 1113 movdqa %xmm5,%xmm3 1114 pslld $7,%xmm3 1115 psrld $25,%xmm5 1116 pxor %xmm3,%xmm5 1117.byte 102,15,58,15,237,4 1118.byte 102,69,15,58,15,201,8 1119.byte 102,69,15,58,15,237,12 1120 paddd %xmm6,%xmm2 1121 pxor %xmm2,%xmm14 1122 pshufb .Lrol16(%rip),%xmm14 1123 paddd %xmm14,%xmm10 1124 pxor %xmm10,%xmm6 1125 movdqa %xmm6,%xmm3 1126 pslld $12,%xmm3 1127 psrld $20,%xmm6 1128 pxor %xmm3,%xmm6 1129 paddd %xmm6,%xmm2 1130 pxor %xmm2,%xmm14 1131 pshufb .Lrol8(%rip),%xmm14 1132 paddd %xmm14,%xmm10 1133 pxor %xmm10,%xmm6 1134 movdqa %xmm6,%xmm3 1135 pslld $7,%xmm3 1136 psrld $25,%xmm6 1137 pxor %xmm3,%xmm6 1138.byte 102,15,58,15,246,4 1139.byte 102,69,15,58,15,210,8 1140.byte 102,69,15,58,15,246,12 1141 paddd %xmm4,%xmm0 1142 pxor %xmm0,%xmm12 1143 pshufb .Lrol16(%rip),%xmm12 1144 paddd %xmm12,%xmm8 1145 pxor %xmm8,%xmm4 1146 movdqa %xmm4,%xmm3 1147 pslld $12,%xmm3 1148 psrld $20,%xmm4 1149 pxor %xmm3,%xmm4 1150 paddd %xmm4,%xmm0 1151 pxor %xmm0,%xmm12 1152 pshufb .Lrol8(%rip),%xmm12 1153 paddd %xmm12,%xmm8 1154 pxor %xmm8,%xmm4 1155 movdqa %xmm4,%xmm3 1156 pslld $7,%xmm3 1157 psrld $25,%xmm4 1158 pxor %xmm3,%xmm4 1159.byte 102,15,58,15,228,12 1160.byte 102,69,15,58,15,192,8 1161.byte 102,69,15,58,15,228,4 1162 paddd %xmm5,%xmm1 1163 pxor %xmm1,%xmm13 1164 pshufb .Lrol16(%rip),%xmm13 1165 paddd %xmm13,%xmm9 1166 pxor %xmm9,%xmm5 1167 movdqa %xmm5,%xmm3 1168 pslld $12,%xmm3 1169 psrld $20,%xmm5 1170 pxor %xmm3,%xmm5 1171 paddd %xmm5,%xmm1 1172 pxor %xmm1,%xmm13 1173 pshufb .Lrol8(%rip),%xmm13 1174 paddd %xmm13,%xmm9 1175 pxor %xmm9,%xmm5 1176 movdqa %xmm5,%xmm3 1177 pslld $7,%xmm3 1178 psrld $25,%xmm5 1179 pxor %xmm3,%xmm5 1180.byte 102,15,58,15,237,12 1181.byte 102,69,15,58,15,201,8 1182.byte 102,69,15,58,15,237,4 1183 paddd %xmm6,%xmm2 1184 pxor %xmm2,%xmm14 1185 pshufb .Lrol16(%rip),%xmm14 1186 paddd %xmm14,%xmm10 1187 pxor %xmm10,%xmm6 1188 movdqa %xmm6,%xmm3 1189 pslld $12,%xmm3 1190 psrld $20,%xmm6 1191 pxor %xmm3,%xmm6 1192 paddd %xmm6,%xmm2 1193 pxor %xmm2,%xmm14 1194 pshufb .Lrol8(%rip),%xmm14 1195 paddd %xmm14,%xmm10 1196 pxor %xmm10,%xmm6 1197 movdqa %xmm6,%xmm3 1198 pslld $7,%xmm3 1199 psrld $25,%xmm6 1200 pxor %xmm3,%xmm6 1201.byte 102,15,58,15,246,12 1202.byte 102,69,15,58,15,210,8 1203.byte 102,69,15,58,15,246,4 1204 1205 cmpq %rcx,%r8 1206 jb .Lopen_sse_tail_192_rounds_and_x1hash 1207 cmpq $160,%r8 1208 jne .Lopen_sse_tail_192_rounds 1209 cmpq $176,%rbx 1210 jb .Lopen_sse_tail_192_finish 1211 addq 0+160(%rsi),%r10 1212 adcq 8+160(%rsi),%r11 1213 adcq $1,%r12 1214 movq 0+0+0(%rbp),%rax 1215 movq %rax,%r15 1216 mulq %r10 1217 movq %rax,%r13 1218 movq %rdx,%r14 1219 movq 0+0+0(%rbp),%rax 1220 mulq %r11 1221 imulq %r12,%r15 1222 addq %rax,%r14 1223 adcq %rdx,%r15 1224 movq 8+0+0(%rbp),%rax 1225 movq %rax,%r9 1226 mulq %r10 1227 addq %rax,%r14 1228 adcq $0,%rdx 1229 movq %rdx,%r10 1230 movq 8+0+0(%rbp),%rax 1231 mulq %r11 1232 addq %rax,%r15 1233 adcq $0,%rdx 1234 imulq %r12,%r9 1235 addq %r10,%r15 1236 adcq %rdx,%r9 1237 movq %r13,%r10 1238 movq %r14,%r11 1239 movq %r15,%r12 1240 andq $3,%r12 1241 movq %r15,%r13 1242 andq $-4,%r13 1243 movq %r9,%r14 1244 shrdq $2,%r9,%r15 1245 shrq $2,%r9 1246 addq %r13,%r15 1247 adcq %r14,%r9 1248 addq %r15,%r10 1249 adcq %r9,%r11 1250 adcq $0,%r12 1251 1252 cmpq $192,%rbx 1253 jb .Lopen_sse_tail_192_finish 1254 addq 0+176(%rsi),%r10 1255 adcq 8+176(%rsi),%r11 1256 adcq $1,%r12 1257 movq 0+0+0(%rbp),%rax 1258 movq %rax,%r15 1259 mulq %r10 1260 movq %rax,%r13 1261 movq %rdx,%r14 1262 movq 0+0+0(%rbp),%rax 1263 mulq %r11 1264 imulq %r12,%r15 1265 addq %rax,%r14 1266 adcq %rdx,%r15 1267 movq 8+0+0(%rbp),%rax 1268 movq %rax,%r9 1269 mulq %r10 1270 addq %rax,%r14 1271 adcq $0,%rdx 1272 movq %rdx,%r10 1273 movq 8+0+0(%rbp),%rax 1274 mulq %r11 1275 addq %rax,%r15 1276 adcq $0,%rdx 1277 imulq %r12,%r9 1278 addq %r10,%r15 1279 adcq %rdx,%r9 1280 movq %r13,%r10 1281 movq %r14,%r11 1282 movq %r15,%r12 1283 andq $3,%r12 1284 movq %r15,%r13 1285 andq $-4,%r13 1286 movq %r9,%r14 1287 shrdq $2,%r9,%r15 1288 shrq $2,%r9 1289 addq %r13,%r15 1290 adcq %r14,%r9 1291 addq %r15,%r10 1292 adcq %r9,%r11 1293 adcq $0,%r12 1294 1295.Lopen_sse_tail_192_finish: 1296 paddd .Lchacha20_consts(%rip),%xmm2 1297 paddd 0+48(%rbp),%xmm6 1298 paddd 0+64(%rbp),%xmm10 1299 paddd 0+128(%rbp),%xmm14 1300 paddd .Lchacha20_consts(%rip),%xmm1 1301 paddd 0+48(%rbp),%xmm5 1302 paddd 0+64(%rbp),%xmm9 1303 paddd 0+112(%rbp),%xmm13 1304 paddd .Lchacha20_consts(%rip),%xmm0 1305 paddd 0+48(%rbp),%xmm4 1306 paddd 0+64(%rbp),%xmm8 1307 paddd 0+96(%rbp),%xmm12 1308 movdqu 0 + 0(%rsi),%xmm3 1309 movdqu 16 + 0(%rsi),%xmm7 1310 movdqu 32 + 0(%rsi),%xmm11 1311 movdqu 48 + 0(%rsi),%xmm15 1312 pxor %xmm3,%xmm2 1313 pxor %xmm7,%xmm6 1314 pxor %xmm11,%xmm10 1315 pxor %xmm14,%xmm15 1316 movdqu %xmm2,0 + 0(%rdi) 1317 movdqu %xmm6,16 + 0(%rdi) 1318 movdqu %xmm10,32 + 0(%rdi) 1319 movdqu %xmm15,48 + 0(%rdi) 1320 movdqu 0 + 64(%rsi),%xmm3 1321 movdqu 16 + 64(%rsi),%xmm7 1322 movdqu 32 + 64(%rsi),%xmm11 1323 movdqu 48 + 64(%rsi),%xmm15 1324 pxor %xmm3,%xmm1 1325 pxor %xmm7,%xmm5 1326 pxor %xmm11,%xmm9 1327 pxor %xmm13,%xmm15 1328 movdqu %xmm1,0 + 64(%rdi) 1329 movdqu %xmm5,16 + 64(%rdi) 1330 movdqu %xmm9,32 + 64(%rdi) 1331 movdqu %xmm15,48 + 64(%rdi) 1332 1333 subq $128,%rbx 1334 leaq 128(%rsi),%rsi 1335 leaq 128(%rdi),%rdi 1336 jmp .Lopen_sse_tail_64_dec_loop 1337 1338.Lopen_sse_tail_256: 1339 movdqa .Lchacha20_consts(%rip),%xmm0 1340 movdqa 0+48(%rbp),%xmm4 1341 movdqa 0+64(%rbp),%xmm8 1342 movdqa %xmm0,%xmm1 1343 movdqa %xmm4,%xmm5 1344 movdqa %xmm8,%xmm9 1345 movdqa %xmm0,%xmm2 1346 movdqa %xmm4,%xmm6 1347 movdqa %xmm8,%xmm10 1348 movdqa %xmm0,%xmm3 1349 movdqa %xmm4,%xmm7 1350 movdqa %xmm8,%xmm11 1351 movdqa 0+96(%rbp),%xmm15 1352 paddd .Lsse_inc(%rip),%xmm15 1353 movdqa %xmm15,%xmm14 1354 paddd .Lsse_inc(%rip),%xmm14 1355 movdqa %xmm14,%xmm13 1356 paddd .Lsse_inc(%rip),%xmm13 1357 movdqa %xmm13,%xmm12 1358 paddd .Lsse_inc(%rip),%xmm12 1359 movdqa %xmm12,0+96(%rbp) 1360 movdqa %xmm13,0+112(%rbp) 1361 movdqa %xmm14,0+128(%rbp) 1362 movdqa %xmm15,0+144(%rbp) 1363 1364 xorq %r8,%r8 1365.Lopen_sse_tail_256_rounds_and_x1hash: 1366 addq 0+0(%rsi,%r8,1),%r10 1367 adcq 8+0(%rsi,%r8,1),%r11 1368 adcq $1,%r12 1369 movdqa %xmm11,0+80(%rbp) 1370 paddd %xmm4,%xmm0 1371 pxor %xmm0,%xmm12 1372 pshufb .Lrol16(%rip),%xmm12 1373 paddd %xmm12,%xmm8 1374 pxor %xmm8,%xmm4 1375 movdqa %xmm4,%xmm11 1376 pslld $12,%xmm11 1377 psrld $20,%xmm4 1378 pxor %xmm11,%xmm4 1379 paddd %xmm4,%xmm0 1380 pxor %xmm0,%xmm12 1381 pshufb .Lrol8(%rip),%xmm12 1382 paddd %xmm12,%xmm8 1383 pxor %xmm8,%xmm4 1384 movdqa %xmm4,%xmm11 1385 pslld $7,%xmm11 1386 psrld $25,%xmm4 1387 pxor %xmm11,%xmm4 1388.byte 102,15,58,15,228,4 1389.byte 102,69,15,58,15,192,8 1390.byte 102,69,15,58,15,228,12 1391 paddd %xmm5,%xmm1 1392 pxor %xmm1,%xmm13 1393 pshufb .Lrol16(%rip),%xmm13 1394 paddd %xmm13,%xmm9 1395 pxor %xmm9,%xmm5 1396 movdqa %xmm5,%xmm11 1397 pslld $12,%xmm11 1398 psrld $20,%xmm5 1399 pxor %xmm11,%xmm5 1400 paddd %xmm5,%xmm1 1401 pxor %xmm1,%xmm13 1402 pshufb .Lrol8(%rip),%xmm13 1403 paddd %xmm13,%xmm9 1404 pxor %xmm9,%xmm5 1405 movdqa %xmm5,%xmm11 1406 pslld $7,%xmm11 1407 psrld $25,%xmm5 1408 pxor %xmm11,%xmm5 1409.byte 102,15,58,15,237,4 1410.byte 102,69,15,58,15,201,8 1411.byte 102,69,15,58,15,237,12 1412 paddd %xmm6,%xmm2 1413 pxor %xmm2,%xmm14 1414 pshufb .Lrol16(%rip),%xmm14 1415 paddd %xmm14,%xmm10 1416 pxor %xmm10,%xmm6 1417 movdqa %xmm6,%xmm11 1418 pslld $12,%xmm11 1419 psrld $20,%xmm6 1420 pxor %xmm11,%xmm6 1421 paddd %xmm6,%xmm2 1422 pxor %xmm2,%xmm14 1423 pshufb .Lrol8(%rip),%xmm14 1424 paddd %xmm14,%xmm10 1425 pxor %xmm10,%xmm6 1426 movdqa %xmm6,%xmm11 1427 pslld $7,%xmm11 1428 psrld $25,%xmm6 1429 pxor %xmm11,%xmm6 1430.byte 102,15,58,15,246,4 1431.byte 102,69,15,58,15,210,8 1432.byte 102,69,15,58,15,246,12 1433 movdqa 0+80(%rbp),%xmm11 1434 movq 0+0+0(%rbp),%rax 1435 movq %rax,%r15 1436 mulq %r10 1437 movq %rax,%r13 1438 movq %rdx,%r14 1439 movq 0+0+0(%rbp),%rax 1440 mulq %r11 1441 imulq %r12,%r15 1442 addq %rax,%r14 1443 adcq %rdx,%r15 1444 movdqa %xmm9,0+80(%rbp) 1445 paddd %xmm7,%xmm3 1446 pxor %xmm3,%xmm15 1447 pshufb .Lrol16(%rip),%xmm15 1448 paddd %xmm15,%xmm11 1449 pxor %xmm11,%xmm7 1450 movdqa %xmm7,%xmm9 1451 pslld $12,%xmm9 1452 psrld $20,%xmm7 1453 pxor %xmm9,%xmm7 1454 paddd %xmm7,%xmm3 1455 pxor %xmm3,%xmm15 1456 pshufb .Lrol8(%rip),%xmm15 1457 paddd %xmm15,%xmm11 1458 pxor %xmm11,%xmm7 1459 movdqa %xmm7,%xmm9 1460 pslld $7,%xmm9 1461 psrld $25,%xmm7 1462 pxor %xmm9,%xmm7 1463.byte 102,15,58,15,255,4 1464.byte 102,69,15,58,15,219,8 1465.byte 102,69,15,58,15,255,12 1466 movdqa 0+80(%rbp),%xmm9 1467 movq 8+0+0(%rbp),%rax 1468 movq %rax,%r9 1469 mulq %r10 1470 addq %rax,%r14 1471 adcq $0,%rdx 1472 movq %rdx,%r10 1473 movq 8+0+0(%rbp),%rax 1474 mulq %r11 1475 addq %rax,%r15 1476 adcq $0,%rdx 1477 movdqa %xmm11,0+80(%rbp) 1478 paddd %xmm4,%xmm0 1479 pxor %xmm0,%xmm12 1480 pshufb .Lrol16(%rip),%xmm12 1481 paddd %xmm12,%xmm8 1482 pxor %xmm8,%xmm4 1483 movdqa %xmm4,%xmm11 1484 pslld $12,%xmm11 1485 psrld $20,%xmm4 1486 pxor %xmm11,%xmm4 1487 paddd %xmm4,%xmm0 1488 pxor %xmm0,%xmm12 1489 pshufb .Lrol8(%rip),%xmm12 1490 paddd %xmm12,%xmm8 1491 pxor %xmm8,%xmm4 1492 movdqa %xmm4,%xmm11 1493 pslld $7,%xmm11 1494 psrld $25,%xmm4 1495 pxor %xmm11,%xmm4 1496.byte 102,15,58,15,228,12 1497.byte 102,69,15,58,15,192,8 1498.byte 102,69,15,58,15,228,4 1499 paddd %xmm5,%xmm1 1500 pxor %xmm1,%xmm13 1501 pshufb .Lrol16(%rip),%xmm13 1502 paddd %xmm13,%xmm9 1503 pxor %xmm9,%xmm5 1504 movdqa %xmm5,%xmm11 1505 pslld $12,%xmm11 1506 psrld $20,%xmm5 1507 pxor %xmm11,%xmm5 1508 paddd %xmm5,%xmm1 1509 pxor %xmm1,%xmm13 1510 pshufb .Lrol8(%rip),%xmm13 1511 paddd %xmm13,%xmm9 1512 pxor %xmm9,%xmm5 1513 movdqa %xmm5,%xmm11 1514 pslld $7,%xmm11 1515 psrld $25,%xmm5 1516 pxor %xmm11,%xmm5 1517.byte 102,15,58,15,237,12 1518.byte 102,69,15,58,15,201,8 1519.byte 102,69,15,58,15,237,4 1520 imulq %r12,%r9 1521 addq %r10,%r15 1522 adcq %rdx,%r9 1523 paddd %xmm6,%xmm2 1524 pxor %xmm2,%xmm14 1525 pshufb .Lrol16(%rip),%xmm14 1526 paddd %xmm14,%xmm10 1527 pxor %xmm10,%xmm6 1528 movdqa %xmm6,%xmm11 1529 pslld $12,%xmm11 1530 psrld $20,%xmm6 1531 pxor %xmm11,%xmm6 1532 paddd %xmm6,%xmm2 1533 pxor %xmm2,%xmm14 1534 pshufb .Lrol8(%rip),%xmm14 1535 paddd %xmm14,%xmm10 1536 pxor %xmm10,%xmm6 1537 movdqa %xmm6,%xmm11 1538 pslld $7,%xmm11 1539 psrld $25,%xmm6 1540 pxor %xmm11,%xmm6 1541.byte 102,15,58,15,246,12 1542.byte 102,69,15,58,15,210,8 1543.byte 102,69,15,58,15,246,4 1544 movdqa 0+80(%rbp),%xmm11 1545 movq %r13,%r10 1546 movq %r14,%r11 1547 movq %r15,%r12 1548 andq $3,%r12 1549 movq %r15,%r13 1550 andq $-4,%r13 1551 movq %r9,%r14 1552 shrdq $2,%r9,%r15 1553 shrq $2,%r9 1554 addq %r13,%r15 1555 adcq %r14,%r9 1556 addq %r15,%r10 1557 adcq %r9,%r11 1558 adcq $0,%r12 1559 movdqa %xmm9,0+80(%rbp) 1560 paddd %xmm7,%xmm3 1561 pxor %xmm3,%xmm15 1562 pshufb .Lrol16(%rip),%xmm15 1563 paddd %xmm15,%xmm11 1564 pxor %xmm11,%xmm7 1565 movdqa %xmm7,%xmm9 1566 pslld $12,%xmm9 1567 psrld $20,%xmm7 1568 pxor %xmm9,%xmm7 1569 paddd %xmm7,%xmm3 1570 pxor %xmm3,%xmm15 1571 pshufb .Lrol8(%rip),%xmm15 1572 paddd %xmm15,%xmm11 1573 pxor %xmm11,%xmm7 1574 movdqa %xmm7,%xmm9 1575 pslld $7,%xmm9 1576 psrld $25,%xmm7 1577 pxor %xmm9,%xmm7 1578.byte 102,15,58,15,255,12 1579.byte 102,69,15,58,15,219,8 1580.byte 102,69,15,58,15,255,4 1581 movdqa 0+80(%rbp),%xmm9 1582 1583 addq $16,%r8 1584 cmpq $160,%r8 1585 jb .Lopen_sse_tail_256_rounds_and_x1hash 1586 1587 movq %rbx,%rcx 1588 andq $-16,%rcx 1589.Lopen_sse_tail_256_hash: 1590 addq 0+0(%rsi,%r8,1),%r10 1591 adcq 8+0(%rsi,%r8,1),%r11 1592 adcq $1,%r12 1593 movq 0+0+0(%rbp),%rax 1594 movq %rax,%r15 1595 mulq %r10 1596 movq %rax,%r13 1597 movq %rdx,%r14 1598 movq 0+0+0(%rbp),%rax 1599 mulq %r11 1600 imulq %r12,%r15 1601 addq %rax,%r14 1602 adcq %rdx,%r15 1603 movq 8+0+0(%rbp),%rax 1604 movq %rax,%r9 1605 mulq %r10 1606 addq %rax,%r14 1607 adcq $0,%rdx 1608 movq %rdx,%r10 1609 movq 8+0+0(%rbp),%rax 1610 mulq %r11 1611 addq %rax,%r15 1612 adcq $0,%rdx 1613 imulq %r12,%r9 1614 addq %r10,%r15 1615 adcq %rdx,%r9 1616 movq %r13,%r10 1617 movq %r14,%r11 1618 movq %r15,%r12 1619 andq $3,%r12 1620 movq %r15,%r13 1621 andq $-4,%r13 1622 movq %r9,%r14 1623 shrdq $2,%r9,%r15 1624 shrq $2,%r9 1625 addq %r13,%r15 1626 adcq %r14,%r9 1627 addq %r15,%r10 1628 adcq %r9,%r11 1629 adcq $0,%r12 1630 1631 addq $16,%r8 1632 cmpq %rcx,%r8 1633 jb .Lopen_sse_tail_256_hash 1634 paddd .Lchacha20_consts(%rip),%xmm3 1635 paddd 0+48(%rbp),%xmm7 1636 paddd 0+64(%rbp),%xmm11 1637 paddd 0+144(%rbp),%xmm15 1638 paddd .Lchacha20_consts(%rip),%xmm2 1639 paddd 0+48(%rbp),%xmm6 1640 paddd 0+64(%rbp),%xmm10 1641 paddd 0+128(%rbp),%xmm14 1642 paddd .Lchacha20_consts(%rip),%xmm1 1643 paddd 0+48(%rbp),%xmm5 1644 paddd 0+64(%rbp),%xmm9 1645 paddd 0+112(%rbp),%xmm13 1646 paddd .Lchacha20_consts(%rip),%xmm0 1647 paddd 0+48(%rbp),%xmm4 1648 paddd 0+64(%rbp),%xmm8 1649 paddd 0+96(%rbp),%xmm12 1650 movdqa %xmm12,0+80(%rbp) 1651 movdqu 0 + 0(%rsi),%xmm12 1652 pxor %xmm3,%xmm12 1653 movdqu %xmm12,0 + 0(%rdi) 1654 movdqu 16 + 0(%rsi),%xmm12 1655 pxor %xmm7,%xmm12 1656 movdqu %xmm12,16 + 0(%rdi) 1657 movdqu 32 + 0(%rsi),%xmm12 1658 pxor %xmm11,%xmm12 1659 movdqu %xmm12,32 + 0(%rdi) 1660 movdqu 48 + 0(%rsi),%xmm12 1661 pxor %xmm15,%xmm12 1662 movdqu %xmm12,48 + 0(%rdi) 1663 movdqu 0 + 64(%rsi),%xmm3 1664 movdqu 16 + 64(%rsi),%xmm7 1665 movdqu 32 + 64(%rsi),%xmm11 1666 movdqu 48 + 64(%rsi),%xmm15 1667 pxor %xmm3,%xmm2 1668 pxor %xmm7,%xmm6 1669 pxor %xmm11,%xmm10 1670 pxor %xmm14,%xmm15 1671 movdqu %xmm2,0 + 64(%rdi) 1672 movdqu %xmm6,16 + 64(%rdi) 1673 movdqu %xmm10,32 + 64(%rdi) 1674 movdqu %xmm15,48 + 64(%rdi) 1675 movdqu 0 + 128(%rsi),%xmm3 1676 movdqu 16 + 128(%rsi),%xmm7 1677 movdqu 32 + 128(%rsi),%xmm11 1678 movdqu 48 + 128(%rsi),%xmm15 1679 pxor %xmm3,%xmm1 1680 pxor %xmm7,%xmm5 1681 pxor %xmm11,%xmm9 1682 pxor %xmm13,%xmm15 1683 movdqu %xmm1,0 + 128(%rdi) 1684 movdqu %xmm5,16 + 128(%rdi) 1685 movdqu %xmm9,32 + 128(%rdi) 1686 movdqu %xmm15,48 + 128(%rdi) 1687 1688 movdqa 0+80(%rbp),%xmm12 1689 subq $192,%rbx 1690 leaq 192(%rsi),%rsi 1691 leaq 192(%rdi),%rdi 1692 1693 1694.Lopen_sse_tail_64_dec_loop: 1695 cmpq $16,%rbx 1696 jb .Lopen_sse_tail_16_init 1697 subq $16,%rbx 1698 movdqu (%rsi),%xmm3 1699 pxor %xmm3,%xmm0 1700 movdqu %xmm0,(%rdi) 1701 leaq 16(%rsi),%rsi 1702 leaq 16(%rdi),%rdi 1703 movdqa %xmm4,%xmm0 1704 movdqa %xmm8,%xmm4 1705 movdqa %xmm12,%xmm8 1706 jmp .Lopen_sse_tail_64_dec_loop 1707.Lopen_sse_tail_16_init: 1708 movdqa %xmm0,%xmm1 1709 1710 1711.Lopen_sse_tail_16: 1712 testq %rbx,%rbx 1713 jz .Lopen_sse_finalize 1714 1715 1716 1717 pxor %xmm3,%xmm3 1718 leaq -1(%rsi,%rbx,1),%rsi 1719 movq %rbx,%r8 1720.Lopen_sse_tail_16_compose: 1721 pslldq $1,%xmm3 1722 pinsrb $0,(%rsi),%xmm3 1723 subq $1,%rsi 1724 subq $1,%r8 1725 jnz .Lopen_sse_tail_16_compose 1726 1727.byte 102,73,15,126,221 1728 pextrq $1,%xmm3,%r14 1729 1730 pxor %xmm1,%xmm3 1731 1732 1733.Lopen_sse_tail_16_extract: 1734 pextrb $0,%xmm3,(%rdi) 1735 psrldq $1,%xmm3 1736 addq $1,%rdi 1737 subq $1,%rbx 1738 jne .Lopen_sse_tail_16_extract 1739 1740 addq %r13,%r10 1741 adcq %r14,%r11 1742 adcq $1,%r12 1743 movq 0+0+0(%rbp),%rax 1744 movq %rax,%r15 1745 mulq %r10 1746 movq %rax,%r13 1747 movq %rdx,%r14 1748 movq 0+0+0(%rbp),%rax 1749 mulq %r11 1750 imulq %r12,%r15 1751 addq %rax,%r14 1752 adcq %rdx,%r15 1753 movq 8+0+0(%rbp),%rax 1754 movq %rax,%r9 1755 mulq %r10 1756 addq %rax,%r14 1757 adcq $0,%rdx 1758 movq %rdx,%r10 1759 movq 8+0+0(%rbp),%rax 1760 mulq %r11 1761 addq %rax,%r15 1762 adcq $0,%rdx 1763 imulq %r12,%r9 1764 addq %r10,%r15 1765 adcq %rdx,%r9 1766 movq %r13,%r10 1767 movq %r14,%r11 1768 movq %r15,%r12 1769 andq $3,%r12 1770 movq %r15,%r13 1771 andq $-4,%r13 1772 movq %r9,%r14 1773 shrdq $2,%r9,%r15 1774 shrq $2,%r9 1775 addq %r13,%r15 1776 adcq %r14,%r9 1777 addq %r15,%r10 1778 adcq %r9,%r11 1779 adcq $0,%r12 1780 1781 1782.Lopen_sse_finalize: 1783 addq 0+0+32(%rbp),%r10 1784 adcq 8+0+32(%rbp),%r11 1785 adcq $1,%r12 1786 movq 0+0+0(%rbp),%rax 1787 movq %rax,%r15 1788 mulq %r10 1789 movq %rax,%r13 1790 movq %rdx,%r14 1791 movq 0+0+0(%rbp),%rax 1792 mulq %r11 1793 imulq %r12,%r15 1794 addq %rax,%r14 1795 adcq %rdx,%r15 1796 movq 8+0+0(%rbp),%rax 1797 movq %rax,%r9 1798 mulq %r10 1799 addq %rax,%r14 1800 adcq $0,%rdx 1801 movq %rdx,%r10 1802 movq 8+0+0(%rbp),%rax 1803 mulq %r11 1804 addq %rax,%r15 1805 adcq $0,%rdx 1806 imulq %r12,%r9 1807 addq %r10,%r15 1808 adcq %rdx,%r9 1809 movq %r13,%r10 1810 movq %r14,%r11 1811 movq %r15,%r12 1812 andq $3,%r12 1813 movq %r15,%r13 1814 andq $-4,%r13 1815 movq %r9,%r14 1816 shrdq $2,%r9,%r15 1817 shrq $2,%r9 1818 addq %r13,%r15 1819 adcq %r14,%r9 1820 addq %r15,%r10 1821 adcq %r9,%r11 1822 adcq $0,%r12 1823 1824 1825 movq %r10,%r13 1826 movq %r11,%r14 1827 movq %r12,%r15 1828 subq $-5,%r10 1829 sbbq $-1,%r11 1830 sbbq $3,%r12 1831 cmovcq %r13,%r10 1832 cmovcq %r14,%r11 1833 cmovcq %r15,%r12 1834 1835 addq 0+0+16(%rbp),%r10 1836 adcq 8+0+16(%rbp),%r11 1837 1838.cfi_remember_state 1839 addq $288 + 0 + 32,%rsp 1840.cfi_adjust_cfa_offset -(288 + 32) 1841 1842 popq %r9 1843.cfi_adjust_cfa_offset -8 1844.cfi_restore %r9 1845 movq %r10,(%r9) 1846 movq %r11,8(%r9) 1847 popq %r15 1848.cfi_adjust_cfa_offset -8 1849.cfi_restore %r15 1850 popq %r14 1851.cfi_adjust_cfa_offset -8 1852.cfi_restore %r14 1853 popq %r13 1854.cfi_adjust_cfa_offset -8 1855.cfi_restore %r13 1856 popq %r12 1857.cfi_adjust_cfa_offset -8 1858.cfi_restore %r12 1859 popq %rbx 1860.cfi_adjust_cfa_offset -8 1861.cfi_restore %rbx 1862 popq %rbp 1863.cfi_adjust_cfa_offset -8 1864.cfi_restore %rbp 1865 ret 1866 1867.Lopen_sse_128: 1868.cfi_restore_state 1869 movdqu .Lchacha20_consts(%rip),%xmm0 1870 movdqa %xmm0,%xmm1 1871 movdqa %xmm0,%xmm2 1872 movdqu 0(%r9),%xmm4 1873 movdqa %xmm4,%xmm5 1874 movdqa %xmm4,%xmm6 1875 movdqu 16(%r9),%xmm8 1876 movdqa %xmm8,%xmm9 1877 movdqa %xmm8,%xmm10 1878 movdqu 32(%r9),%xmm12 1879 movdqa %xmm12,%xmm13 1880 paddd .Lsse_inc(%rip),%xmm13 1881 movdqa %xmm13,%xmm14 1882 paddd .Lsse_inc(%rip),%xmm14 1883 movdqa %xmm4,%xmm7 1884 movdqa %xmm8,%xmm11 1885 movdqa %xmm13,%xmm15 1886 movq $10,%r10 1887 1888.Lopen_sse_128_rounds: 1889 paddd %xmm4,%xmm0 1890 pxor %xmm0,%xmm12 1891 pshufb .Lrol16(%rip),%xmm12 1892 paddd %xmm12,%xmm8 1893 pxor %xmm8,%xmm4 1894 movdqa %xmm4,%xmm3 1895 pslld $12,%xmm3 1896 psrld $20,%xmm4 1897 pxor %xmm3,%xmm4 1898 paddd %xmm4,%xmm0 1899 pxor %xmm0,%xmm12 1900 pshufb .Lrol8(%rip),%xmm12 1901 paddd %xmm12,%xmm8 1902 pxor %xmm8,%xmm4 1903 movdqa %xmm4,%xmm3 1904 pslld $7,%xmm3 1905 psrld $25,%xmm4 1906 pxor %xmm3,%xmm4 1907.byte 102,15,58,15,228,4 1908.byte 102,69,15,58,15,192,8 1909.byte 102,69,15,58,15,228,12 1910 paddd %xmm5,%xmm1 1911 pxor %xmm1,%xmm13 1912 pshufb .Lrol16(%rip),%xmm13 1913 paddd %xmm13,%xmm9 1914 pxor %xmm9,%xmm5 1915 movdqa %xmm5,%xmm3 1916 pslld $12,%xmm3 1917 psrld $20,%xmm5 1918 pxor %xmm3,%xmm5 1919 paddd %xmm5,%xmm1 1920 pxor %xmm1,%xmm13 1921 pshufb .Lrol8(%rip),%xmm13 1922 paddd %xmm13,%xmm9 1923 pxor %xmm9,%xmm5 1924 movdqa %xmm5,%xmm3 1925 pslld $7,%xmm3 1926 psrld $25,%xmm5 1927 pxor %xmm3,%xmm5 1928.byte 102,15,58,15,237,4 1929.byte 102,69,15,58,15,201,8 1930.byte 102,69,15,58,15,237,12 1931 paddd %xmm6,%xmm2 1932 pxor %xmm2,%xmm14 1933 pshufb .Lrol16(%rip),%xmm14 1934 paddd %xmm14,%xmm10 1935 pxor %xmm10,%xmm6 1936 movdqa %xmm6,%xmm3 1937 pslld $12,%xmm3 1938 psrld $20,%xmm6 1939 pxor %xmm3,%xmm6 1940 paddd %xmm6,%xmm2 1941 pxor %xmm2,%xmm14 1942 pshufb .Lrol8(%rip),%xmm14 1943 paddd %xmm14,%xmm10 1944 pxor %xmm10,%xmm6 1945 movdqa %xmm6,%xmm3 1946 pslld $7,%xmm3 1947 psrld $25,%xmm6 1948 pxor %xmm3,%xmm6 1949.byte 102,15,58,15,246,4 1950.byte 102,69,15,58,15,210,8 1951.byte 102,69,15,58,15,246,12 1952 paddd %xmm4,%xmm0 1953 pxor %xmm0,%xmm12 1954 pshufb .Lrol16(%rip),%xmm12 1955 paddd %xmm12,%xmm8 1956 pxor %xmm8,%xmm4 1957 movdqa %xmm4,%xmm3 1958 pslld $12,%xmm3 1959 psrld $20,%xmm4 1960 pxor %xmm3,%xmm4 1961 paddd %xmm4,%xmm0 1962 pxor %xmm0,%xmm12 1963 pshufb .Lrol8(%rip),%xmm12 1964 paddd %xmm12,%xmm8 1965 pxor %xmm8,%xmm4 1966 movdqa %xmm4,%xmm3 1967 pslld $7,%xmm3 1968 psrld $25,%xmm4 1969 pxor %xmm3,%xmm4 1970.byte 102,15,58,15,228,12 1971.byte 102,69,15,58,15,192,8 1972.byte 102,69,15,58,15,228,4 1973 paddd %xmm5,%xmm1 1974 pxor %xmm1,%xmm13 1975 pshufb .Lrol16(%rip),%xmm13 1976 paddd %xmm13,%xmm9 1977 pxor %xmm9,%xmm5 1978 movdqa %xmm5,%xmm3 1979 pslld $12,%xmm3 1980 psrld $20,%xmm5 1981 pxor %xmm3,%xmm5 1982 paddd %xmm5,%xmm1 1983 pxor %xmm1,%xmm13 1984 pshufb .Lrol8(%rip),%xmm13 1985 paddd %xmm13,%xmm9 1986 pxor %xmm9,%xmm5 1987 movdqa %xmm5,%xmm3 1988 pslld $7,%xmm3 1989 psrld $25,%xmm5 1990 pxor %xmm3,%xmm5 1991.byte 102,15,58,15,237,12 1992.byte 102,69,15,58,15,201,8 1993.byte 102,69,15,58,15,237,4 1994 paddd %xmm6,%xmm2 1995 pxor %xmm2,%xmm14 1996 pshufb .Lrol16(%rip),%xmm14 1997 paddd %xmm14,%xmm10 1998 pxor %xmm10,%xmm6 1999 movdqa %xmm6,%xmm3 2000 pslld $12,%xmm3 2001 psrld $20,%xmm6 2002 pxor %xmm3,%xmm6 2003 paddd %xmm6,%xmm2 2004 pxor %xmm2,%xmm14 2005 pshufb .Lrol8(%rip),%xmm14 2006 paddd %xmm14,%xmm10 2007 pxor %xmm10,%xmm6 2008 movdqa %xmm6,%xmm3 2009 pslld $7,%xmm3 2010 psrld $25,%xmm6 2011 pxor %xmm3,%xmm6 2012.byte 102,15,58,15,246,12 2013.byte 102,69,15,58,15,210,8 2014.byte 102,69,15,58,15,246,4 2015 2016 decq %r10 2017 jnz .Lopen_sse_128_rounds 2018 paddd .Lchacha20_consts(%rip),%xmm0 2019 paddd .Lchacha20_consts(%rip),%xmm1 2020 paddd .Lchacha20_consts(%rip),%xmm2 2021 paddd %xmm7,%xmm4 2022 paddd %xmm7,%xmm5 2023 paddd %xmm7,%xmm6 2024 paddd %xmm11,%xmm9 2025 paddd %xmm11,%xmm10 2026 paddd %xmm15,%xmm13 2027 paddd .Lsse_inc(%rip),%xmm15 2028 paddd %xmm15,%xmm14 2029 2030 pand .Lclamp(%rip),%xmm0 2031 movdqa %xmm0,0+0(%rbp) 2032 movdqa %xmm4,0+16(%rbp) 2033 2034 movq %r8,%r8 2035 call poly_hash_ad_internal 2036.Lopen_sse_128_xor_hash: 2037 cmpq $16,%rbx 2038 jb .Lopen_sse_tail_16 2039 subq $16,%rbx 2040 addq 0+0(%rsi),%r10 2041 adcq 8+0(%rsi),%r11 2042 adcq $1,%r12 2043 2044 2045 movdqu 0(%rsi),%xmm3 2046 pxor %xmm3,%xmm1 2047 movdqu %xmm1,0(%rdi) 2048 leaq 16(%rsi),%rsi 2049 leaq 16(%rdi),%rdi 2050 movq 0+0+0(%rbp),%rax 2051 movq %rax,%r15 2052 mulq %r10 2053 movq %rax,%r13 2054 movq %rdx,%r14 2055 movq 0+0+0(%rbp),%rax 2056 mulq %r11 2057 imulq %r12,%r15 2058 addq %rax,%r14 2059 adcq %rdx,%r15 2060 movq 8+0+0(%rbp),%rax 2061 movq %rax,%r9 2062 mulq %r10 2063 addq %rax,%r14 2064 adcq $0,%rdx 2065 movq %rdx,%r10 2066 movq 8+0+0(%rbp),%rax 2067 mulq %r11 2068 addq %rax,%r15 2069 adcq $0,%rdx 2070 imulq %r12,%r9 2071 addq %r10,%r15 2072 adcq %rdx,%r9 2073 movq %r13,%r10 2074 movq %r14,%r11 2075 movq %r15,%r12 2076 andq $3,%r12 2077 movq %r15,%r13 2078 andq $-4,%r13 2079 movq %r9,%r14 2080 shrdq $2,%r9,%r15 2081 shrq $2,%r9 2082 addq %r13,%r15 2083 adcq %r14,%r9 2084 addq %r15,%r10 2085 adcq %r9,%r11 2086 adcq $0,%r12 2087 2088 2089 movdqa %xmm5,%xmm1 2090 movdqa %xmm9,%xmm5 2091 movdqa %xmm13,%xmm9 2092 movdqa %xmm2,%xmm13 2093 movdqa %xmm6,%xmm2 2094 movdqa %xmm10,%xmm6 2095 movdqa %xmm14,%xmm10 2096 jmp .Lopen_sse_128_xor_hash 2097.size chacha20_poly1305_open, .-chacha20_poly1305_open 2098.cfi_endproc 2099 2100 2101 2102 2103 2104 2105 2106.globl chacha20_poly1305_seal 2107.hidden chacha20_poly1305_seal 2108.type chacha20_poly1305_seal,@function 2109.align 64 2110chacha20_poly1305_seal: 2111.cfi_startproc 2112_CET_ENDBR 2113 pushq %rbp 2114.cfi_adjust_cfa_offset 8 2115.cfi_offset %rbp,-16 2116 pushq %rbx 2117.cfi_adjust_cfa_offset 8 2118.cfi_offset %rbx,-24 2119 pushq %r12 2120.cfi_adjust_cfa_offset 8 2121.cfi_offset %r12,-32 2122 pushq %r13 2123.cfi_adjust_cfa_offset 8 2124.cfi_offset %r13,-40 2125 pushq %r14 2126.cfi_adjust_cfa_offset 8 2127.cfi_offset %r14,-48 2128 pushq %r15 2129.cfi_adjust_cfa_offset 8 2130.cfi_offset %r15,-56 2131 2132 2133 pushq %r9 2134.cfi_adjust_cfa_offset 8 2135.cfi_offset %r9,-64 2136 subq $288 + 0 + 32,%rsp 2137.cfi_adjust_cfa_offset 288 + 32 2138 leaq 32(%rsp),%rbp 2139 andq $-32,%rbp 2140 2141 movq 56(%r9),%rbx 2142 addq %rdx,%rbx 2143 movq %r8,0+0+32(%rbp) 2144 movq %rbx,8+0+32(%rbp) 2145 movq %rdx,%rbx 2146 2147 movl OPENSSL_ia32cap_P+8(%rip),%eax 2148 andl $288,%eax 2149 xorl $288,%eax 2150 jz chacha20_poly1305_seal_avx2 2151 2152 cmpq $128,%rbx 2153 jbe .Lseal_sse_128 2154 2155 movdqa .Lchacha20_consts(%rip),%xmm0 2156 movdqu 0(%r9),%xmm4 2157 movdqu 16(%r9),%xmm8 2158 movdqu 32(%r9),%xmm12 2159 2160 movdqa %xmm0,%xmm1 2161 movdqa %xmm0,%xmm2 2162 movdqa %xmm0,%xmm3 2163 movdqa %xmm4,%xmm5 2164 movdqa %xmm4,%xmm6 2165 movdqa %xmm4,%xmm7 2166 movdqa %xmm8,%xmm9 2167 movdqa %xmm8,%xmm10 2168 movdqa %xmm8,%xmm11 2169 movdqa %xmm12,%xmm15 2170 paddd .Lsse_inc(%rip),%xmm12 2171 movdqa %xmm12,%xmm14 2172 paddd .Lsse_inc(%rip),%xmm12 2173 movdqa %xmm12,%xmm13 2174 paddd .Lsse_inc(%rip),%xmm12 2175 2176 movdqa %xmm4,0+48(%rbp) 2177 movdqa %xmm8,0+64(%rbp) 2178 movdqa %xmm12,0+96(%rbp) 2179 movdqa %xmm13,0+112(%rbp) 2180 movdqa %xmm14,0+128(%rbp) 2181 movdqa %xmm15,0+144(%rbp) 2182 movq $10,%r10 2183.Lseal_sse_init_rounds: 2184 movdqa %xmm8,0+80(%rbp) 2185 movdqa .Lrol16(%rip),%xmm8 2186 paddd %xmm7,%xmm3 2187 paddd %xmm6,%xmm2 2188 paddd %xmm5,%xmm1 2189 paddd %xmm4,%xmm0 2190 pxor %xmm3,%xmm15 2191 pxor %xmm2,%xmm14 2192 pxor %xmm1,%xmm13 2193 pxor %xmm0,%xmm12 2194.byte 102,69,15,56,0,248 2195.byte 102,69,15,56,0,240 2196.byte 102,69,15,56,0,232 2197.byte 102,69,15,56,0,224 2198 movdqa 0+80(%rbp),%xmm8 2199 paddd %xmm15,%xmm11 2200 paddd %xmm14,%xmm10 2201 paddd %xmm13,%xmm9 2202 paddd %xmm12,%xmm8 2203 pxor %xmm11,%xmm7 2204 pxor %xmm10,%xmm6 2205 pxor %xmm9,%xmm5 2206 pxor %xmm8,%xmm4 2207 movdqa %xmm8,0+80(%rbp) 2208 movdqa %xmm7,%xmm8 2209 psrld $20,%xmm8 2210 pslld $32-20,%xmm7 2211 pxor %xmm8,%xmm7 2212 movdqa %xmm6,%xmm8 2213 psrld $20,%xmm8 2214 pslld $32-20,%xmm6 2215 pxor %xmm8,%xmm6 2216 movdqa %xmm5,%xmm8 2217 psrld $20,%xmm8 2218 pslld $32-20,%xmm5 2219 pxor %xmm8,%xmm5 2220 movdqa %xmm4,%xmm8 2221 psrld $20,%xmm8 2222 pslld $32-20,%xmm4 2223 pxor %xmm8,%xmm4 2224 movdqa .Lrol8(%rip),%xmm8 2225 paddd %xmm7,%xmm3 2226 paddd %xmm6,%xmm2 2227 paddd %xmm5,%xmm1 2228 paddd %xmm4,%xmm0 2229 pxor %xmm3,%xmm15 2230 pxor %xmm2,%xmm14 2231 pxor %xmm1,%xmm13 2232 pxor %xmm0,%xmm12 2233.byte 102,69,15,56,0,248 2234.byte 102,69,15,56,0,240 2235.byte 102,69,15,56,0,232 2236.byte 102,69,15,56,0,224 2237 movdqa 0+80(%rbp),%xmm8 2238 paddd %xmm15,%xmm11 2239 paddd %xmm14,%xmm10 2240 paddd %xmm13,%xmm9 2241 paddd %xmm12,%xmm8 2242 pxor %xmm11,%xmm7 2243 pxor %xmm10,%xmm6 2244 pxor %xmm9,%xmm5 2245 pxor %xmm8,%xmm4 2246 movdqa %xmm8,0+80(%rbp) 2247 movdqa %xmm7,%xmm8 2248 psrld $25,%xmm8 2249 pslld $32-25,%xmm7 2250 pxor %xmm8,%xmm7 2251 movdqa %xmm6,%xmm8 2252 psrld $25,%xmm8 2253 pslld $32-25,%xmm6 2254 pxor %xmm8,%xmm6 2255 movdqa %xmm5,%xmm8 2256 psrld $25,%xmm8 2257 pslld $32-25,%xmm5 2258 pxor %xmm8,%xmm5 2259 movdqa %xmm4,%xmm8 2260 psrld $25,%xmm8 2261 pslld $32-25,%xmm4 2262 pxor %xmm8,%xmm4 2263 movdqa 0+80(%rbp),%xmm8 2264.byte 102,15,58,15,255,4 2265.byte 102,69,15,58,15,219,8 2266.byte 102,69,15,58,15,255,12 2267.byte 102,15,58,15,246,4 2268.byte 102,69,15,58,15,210,8 2269.byte 102,69,15,58,15,246,12 2270.byte 102,15,58,15,237,4 2271.byte 102,69,15,58,15,201,8 2272.byte 102,69,15,58,15,237,12 2273.byte 102,15,58,15,228,4 2274.byte 102,69,15,58,15,192,8 2275.byte 102,69,15,58,15,228,12 2276 movdqa %xmm8,0+80(%rbp) 2277 movdqa .Lrol16(%rip),%xmm8 2278 paddd %xmm7,%xmm3 2279 paddd %xmm6,%xmm2 2280 paddd %xmm5,%xmm1 2281 paddd %xmm4,%xmm0 2282 pxor %xmm3,%xmm15 2283 pxor %xmm2,%xmm14 2284 pxor %xmm1,%xmm13 2285 pxor %xmm0,%xmm12 2286.byte 102,69,15,56,0,248 2287.byte 102,69,15,56,0,240 2288.byte 102,69,15,56,0,232 2289.byte 102,69,15,56,0,224 2290 movdqa 0+80(%rbp),%xmm8 2291 paddd %xmm15,%xmm11 2292 paddd %xmm14,%xmm10 2293 paddd %xmm13,%xmm9 2294 paddd %xmm12,%xmm8 2295 pxor %xmm11,%xmm7 2296 pxor %xmm10,%xmm6 2297 pxor %xmm9,%xmm5 2298 pxor %xmm8,%xmm4 2299 movdqa %xmm8,0+80(%rbp) 2300 movdqa %xmm7,%xmm8 2301 psrld $20,%xmm8 2302 pslld $32-20,%xmm7 2303 pxor %xmm8,%xmm7 2304 movdqa %xmm6,%xmm8 2305 psrld $20,%xmm8 2306 pslld $32-20,%xmm6 2307 pxor %xmm8,%xmm6 2308 movdqa %xmm5,%xmm8 2309 psrld $20,%xmm8 2310 pslld $32-20,%xmm5 2311 pxor %xmm8,%xmm5 2312 movdqa %xmm4,%xmm8 2313 psrld $20,%xmm8 2314 pslld $32-20,%xmm4 2315 pxor %xmm8,%xmm4 2316 movdqa .Lrol8(%rip),%xmm8 2317 paddd %xmm7,%xmm3 2318 paddd %xmm6,%xmm2 2319 paddd %xmm5,%xmm1 2320 paddd %xmm4,%xmm0 2321 pxor %xmm3,%xmm15 2322 pxor %xmm2,%xmm14 2323 pxor %xmm1,%xmm13 2324 pxor %xmm0,%xmm12 2325.byte 102,69,15,56,0,248 2326.byte 102,69,15,56,0,240 2327.byte 102,69,15,56,0,232 2328.byte 102,69,15,56,0,224 2329 movdqa 0+80(%rbp),%xmm8 2330 paddd %xmm15,%xmm11 2331 paddd %xmm14,%xmm10 2332 paddd %xmm13,%xmm9 2333 paddd %xmm12,%xmm8 2334 pxor %xmm11,%xmm7 2335 pxor %xmm10,%xmm6 2336 pxor %xmm9,%xmm5 2337 pxor %xmm8,%xmm4 2338 movdqa %xmm8,0+80(%rbp) 2339 movdqa %xmm7,%xmm8 2340 psrld $25,%xmm8 2341 pslld $32-25,%xmm7 2342 pxor %xmm8,%xmm7 2343 movdqa %xmm6,%xmm8 2344 psrld $25,%xmm8 2345 pslld $32-25,%xmm6 2346 pxor %xmm8,%xmm6 2347 movdqa %xmm5,%xmm8 2348 psrld $25,%xmm8 2349 pslld $32-25,%xmm5 2350 pxor %xmm8,%xmm5 2351 movdqa %xmm4,%xmm8 2352 psrld $25,%xmm8 2353 pslld $32-25,%xmm4 2354 pxor %xmm8,%xmm4 2355 movdqa 0+80(%rbp),%xmm8 2356.byte 102,15,58,15,255,12 2357.byte 102,69,15,58,15,219,8 2358.byte 102,69,15,58,15,255,4 2359.byte 102,15,58,15,246,12 2360.byte 102,69,15,58,15,210,8 2361.byte 102,69,15,58,15,246,4 2362.byte 102,15,58,15,237,12 2363.byte 102,69,15,58,15,201,8 2364.byte 102,69,15,58,15,237,4 2365.byte 102,15,58,15,228,12 2366.byte 102,69,15,58,15,192,8 2367.byte 102,69,15,58,15,228,4 2368 2369 decq %r10 2370 jnz .Lseal_sse_init_rounds 2371 paddd .Lchacha20_consts(%rip),%xmm3 2372 paddd 0+48(%rbp),%xmm7 2373 paddd 0+64(%rbp),%xmm11 2374 paddd 0+144(%rbp),%xmm15 2375 paddd .Lchacha20_consts(%rip),%xmm2 2376 paddd 0+48(%rbp),%xmm6 2377 paddd 0+64(%rbp),%xmm10 2378 paddd 0+128(%rbp),%xmm14 2379 paddd .Lchacha20_consts(%rip),%xmm1 2380 paddd 0+48(%rbp),%xmm5 2381 paddd 0+64(%rbp),%xmm9 2382 paddd 0+112(%rbp),%xmm13 2383 paddd .Lchacha20_consts(%rip),%xmm0 2384 paddd 0+48(%rbp),%xmm4 2385 paddd 0+64(%rbp),%xmm8 2386 paddd 0+96(%rbp),%xmm12 2387 2388 2389 pand .Lclamp(%rip),%xmm3 2390 movdqa %xmm3,0+0(%rbp) 2391 movdqa %xmm7,0+16(%rbp) 2392 2393 movq %r8,%r8 2394 call poly_hash_ad_internal 2395 movdqu 0 + 0(%rsi),%xmm3 2396 movdqu 16 + 0(%rsi),%xmm7 2397 movdqu 32 + 0(%rsi),%xmm11 2398 movdqu 48 + 0(%rsi),%xmm15 2399 pxor %xmm3,%xmm2 2400 pxor %xmm7,%xmm6 2401 pxor %xmm11,%xmm10 2402 pxor %xmm14,%xmm15 2403 movdqu %xmm2,0 + 0(%rdi) 2404 movdqu %xmm6,16 + 0(%rdi) 2405 movdqu %xmm10,32 + 0(%rdi) 2406 movdqu %xmm15,48 + 0(%rdi) 2407 movdqu 0 + 64(%rsi),%xmm3 2408 movdqu 16 + 64(%rsi),%xmm7 2409 movdqu 32 + 64(%rsi),%xmm11 2410 movdqu 48 + 64(%rsi),%xmm15 2411 pxor %xmm3,%xmm1 2412 pxor %xmm7,%xmm5 2413 pxor %xmm11,%xmm9 2414 pxor %xmm13,%xmm15 2415 movdqu %xmm1,0 + 64(%rdi) 2416 movdqu %xmm5,16 + 64(%rdi) 2417 movdqu %xmm9,32 + 64(%rdi) 2418 movdqu %xmm15,48 + 64(%rdi) 2419 2420 cmpq $192,%rbx 2421 ja .Lseal_sse_main_init 2422 movq $128,%rcx 2423 subq $128,%rbx 2424 leaq 128(%rsi),%rsi 2425 jmp .Lseal_sse_128_tail_hash 2426.Lseal_sse_main_init: 2427 movdqu 0 + 128(%rsi),%xmm3 2428 movdqu 16 + 128(%rsi),%xmm7 2429 movdqu 32 + 128(%rsi),%xmm11 2430 movdqu 48 + 128(%rsi),%xmm15 2431 pxor %xmm3,%xmm0 2432 pxor %xmm7,%xmm4 2433 pxor %xmm11,%xmm8 2434 pxor %xmm12,%xmm15 2435 movdqu %xmm0,0 + 128(%rdi) 2436 movdqu %xmm4,16 + 128(%rdi) 2437 movdqu %xmm8,32 + 128(%rdi) 2438 movdqu %xmm15,48 + 128(%rdi) 2439 2440 movq $192,%rcx 2441 subq $192,%rbx 2442 leaq 192(%rsi),%rsi 2443 movq $2,%rcx 2444 movq $8,%r8 2445 cmpq $64,%rbx 2446 jbe .Lseal_sse_tail_64 2447 cmpq $128,%rbx 2448 jbe .Lseal_sse_tail_128 2449 cmpq $192,%rbx 2450 jbe .Lseal_sse_tail_192 2451 2452.Lseal_sse_main_loop: 2453 movdqa .Lchacha20_consts(%rip),%xmm0 2454 movdqa 0+48(%rbp),%xmm4 2455 movdqa 0+64(%rbp),%xmm8 2456 movdqa %xmm0,%xmm1 2457 movdqa %xmm4,%xmm5 2458 movdqa %xmm8,%xmm9 2459 movdqa %xmm0,%xmm2 2460 movdqa %xmm4,%xmm6 2461 movdqa %xmm8,%xmm10 2462 movdqa %xmm0,%xmm3 2463 movdqa %xmm4,%xmm7 2464 movdqa %xmm8,%xmm11 2465 movdqa 0+96(%rbp),%xmm15 2466 paddd .Lsse_inc(%rip),%xmm15 2467 movdqa %xmm15,%xmm14 2468 paddd .Lsse_inc(%rip),%xmm14 2469 movdqa %xmm14,%xmm13 2470 paddd .Lsse_inc(%rip),%xmm13 2471 movdqa %xmm13,%xmm12 2472 paddd .Lsse_inc(%rip),%xmm12 2473 movdqa %xmm12,0+96(%rbp) 2474 movdqa %xmm13,0+112(%rbp) 2475 movdqa %xmm14,0+128(%rbp) 2476 movdqa %xmm15,0+144(%rbp) 2477 2478.align 32 2479.Lseal_sse_main_rounds: 2480 movdqa %xmm8,0+80(%rbp) 2481 movdqa .Lrol16(%rip),%xmm8 2482 paddd %xmm7,%xmm3 2483 paddd %xmm6,%xmm2 2484 paddd %xmm5,%xmm1 2485 paddd %xmm4,%xmm0 2486 pxor %xmm3,%xmm15 2487 pxor %xmm2,%xmm14 2488 pxor %xmm1,%xmm13 2489 pxor %xmm0,%xmm12 2490.byte 102,69,15,56,0,248 2491.byte 102,69,15,56,0,240 2492.byte 102,69,15,56,0,232 2493.byte 102,69,15,56,0,224 2494 movdqa 0+80(%rbp),%xmm8 2495 paddd %xmm15,%xmm11 2496 paddd %xmm14,%xmm10 2497 paddd %xmm13,%xmm9 2498 paddd %xmm12,%xmm8 2499 pxor %xmm11,%xmm7 2500 addq 0+0(%rdi),%r10 2501 adcq 8+0(%rdi),%r11 2502 adcq $1,%r12 2503 pxor %xmm10,%xmm6 2504 pxor %xmm9,%xmm5 2505 pxor %xmm8,%xmm4 2506 movdqa %xmm8,0+80(%rbp) 2507 movdqa %xmm7,%xmm8 2508 psrld $20,%xmm8 2509 pslld $32-20,%xmm7 2510 pxor %xmm8,%xmm7 2511 movdqa %xmm6,%xmm8 2512 psrld $20,%xmm8 2513 pslld $32-20,%xmm6 2514 pxor %xmm8,%xmm6 2515 movdqa %xmm5,%xmm8 2516 psrld $20,%xmm8 2517 pslld $32-20,%xmm5 2518 pxor %xmm8,%xmm5 2519 movdqa %xmm4,%xmm8 2520 psrld $20,%xmm8 2521 pslld $32-20,%xmm4 2522 pxor %xmm8,%xmm4 2523 movq 0+0+0(%rbp),%rax 2524 movq %rax,%r15 2525 mulq %r10 2526 movq %rax,%r13 2527 movq %rdx,%r14 2528 movq 0+0+0(%rbp),%rax 2529 mulq %r11 2530 imulq %r12,%r15 2531 addq %rax,%r14 2532 adcq %rdx,%r15 2533 movdqa .Lrol8(%rip),%xmm8 2534 paddd %xmm7,%xmm3 2535 paddd %xmm6,%xmm2 2536 paddd %xmm5,%xmm1 2537 paddd %xmm4,%xmm0 2538 pxor %xmm3,%xmm15 2539 pxor %xmm2,%xmm14 2540 pxor %xmm1,%xmm13 2541 pxor %xmm0,%xmm12 2542.byte 102,69,15,56,0,248 2543.byte 102,69,15,56,0,240 2544.byte 102,69,15,56,0,232 2545.byte 102,69,15,56,0,224 2546 movdqa 0+80(%rbp),%xmm8 2547 paddd %xmm15,%xmm11 2548 paddd %xmm14,%xmm10 2549 paddd %xmm13,%xmm9 2550 paddd %xmm12,%xmm8 2551 pxor %xmm11,%xmm7 2552 pxor %xmm10,%xmm6 2553 movq 8+0+0(%rbp),%rax 2554 movq %rax,%r9 2555 mulq %r10 2556 addq %rax,%r14 2557 adcq $0,%rdx 2558 movq %rdx,%r10 2559 movq 8+0+0(%rbp),%rax 2560 mulq %r11 2561 addq %rax,%r15 2562 adcq $0,%rdx 2563 pxor %xmm9,%xmm5 2564 pxor %xmm8,%xmm4 2565 movdqa %xmm8,0+80(%rbp) 2566 movdqa %xmm7,%xmm8 2567 psrld $25,%xmm8 2568 pslld $32-25,%xmm7 2569 pxor %xmm8,%xmm7 2570 movdqa %xmm6,%xmm8 2571 psrld $25,%xmm8 2572 pslld $32-25,%xmm6 2573 pxor %xmm8,%xmm6 2574 movdqa %xmm5,%xmm8 2575 psrld $25,%xmm8 2576 pslld $32-25,%xmm5 2577 pxor %xmm8,%xmm5 2578 movdqa %xmm4,%xmm8 2579 psrld $25,%xmm8 2580 pslld $32-25,%xmm4 2581 pxor %xmm8,%xmm4 2582 movdqa 0+80(%rbp),%xmm8 2583 imulq %r12,%r9 2584 addq %r10,%r15 2585 adcq %rdx,%r9 2586.byte 102,15,58,15,255,4 2587.byte 102,69,15,58,15,219,8 2588.byte 102,69,15,58,15,255,12 2589.byte 102,15,58,15,246,4 2590.byte 102,69,15,58,15,210,8 2591.byte 102,69,15,58,15,246,12 2592.byte 102,15,58,15,237,4 2593.byte 102,69,15,58,15,201,8 2594.byte 102,69,15,58,15,237,12 2595.byte 102,15,58,15,228,4 2596.byte 102,69,15,58,15,192,8 2597.byte 102,69,15,58,15,228,12 2598 movdqa %xmm8,0+80(%rbp) 2599 movdqa .Lrol16(%rip),%xmm8 2600 paddd %xmm7,%xmm3 2601 paddd %xmm6,%xmm2 2602 paddd %xmm5,%xmm1 2603 paddd %xmm4,%xmm0 2604 pxor %xmm3,%xmm15 2605 pxor %xmm2,%xmm14 2606 movq %r13,%r10 2607 movq %r14,%r11 2608 movq %r15,%r12 2609 andq $3,%r12 2610 movq %r15,%r13 2611 andq $-4,%r13 2612 movq %r9,%r14 2613 shrdq $2,%r9,%r15 2614 shrq $2,%r9 2615 addq %r13,%r15 2616 adcq %r14,%r9 2617 addq %r15,%r10 2618 adcq %r9,%r11 2619 adcq $0,%r12 2620 pxor %xmm1,%xmm13 2621 pxor %xmm0,%xmm12 2622.byte 102,69,15,56,0,248 2623.byte 102,69,15,56,0,240 2624.byte 102,69,15,56,0,232 2625.byte 102,69,15,56,0,224 2626 movdqa 0+80(%rbp),%xmm8 2627 paddd %xmm15,%xmm11 2628 paddd %xmm14,%xmm10 2629 paddd %xmm13,%xmm9 2630 paddd %xmm12,%xmm8 2631 pxor %xmm11,%xmm7 2632 pxor %xmm10,%xmm6 2633 pxor %xmm9,%xmm5 2634 pxor %xmm8,%xmm4 2635 movdqa %xmm8,0+80(%rbp) 2636 movdqa %xmm7,%xmm8 2637 psrld $20,%xmm8 2638 pslld $32-20,%xmm7 2639 pxor %xmm8,%xmm7 2640 movdqa %xmm6,%xmm8 2641 psrld $20,%xmm8 2642 pslld $32-20,%xmm6 2643 pxor %xmm8,%xmm6 2644 movdqa %xmm5,%xmm8 2645 psrld $20,%xmm8 2646 pslld $32-20,%xmm5 2647 pxor %xmm8,%xmm5 2648 movdqa %xmm4,%xmm8 2649 psrld $20,%xmm8 2650 pslld $32-20,%xmm4 2651 pxor %xmm8,%xmm4 2652 movdqa .Lrol8(%rip),%xmm8 2653 paddd %xmm7,%xmm3 2654 paddd %xmm6,%xmm2 2655 paddd %xmm5,%xmm1 2656 paddd %xmm4,%xmm0 2657 pxor %xmm3,%xmm15 2658 pxor %xmm2,%xmm14 2659 pxor %xmm1,%xmm13 2660 pxor %xmm0,%xmm12 2661.byte 102,69,15,56,0,248 2662.byte 102,69,15,56,0,240 2663.byte 102,69,15,56,0,232 2664.byte 102,69,15,56,0,224 2665 movdqa 0+80(%rbp),%xmm8 2666 paddd %xmm15,%xmm11 2667 paddd %xmm14,%xmm10 2668 paddd %xmm13,%xmm9 2669 paddd %xmm12,%xmm8 2670 pxor %xmm11,%xmm7 2671 pxor %xmm10,%xmm6 2672 pxor %xmm9,%xmm5 2673 pxor %xmm8,%xmm4 2674 movdqa %xmm8,0+80(%rbp) 2675 movdqa %xmm7,%xmm8 2676 psrld $25,%xmm8 2677 pslld $32-25,%xmm7 2678 pxor %xmm8,%xmm7 2679 movdqa %xmm6,%xmm8 2680 psrld $25,%xmm8 2681 pslld $32-25,%xmm6 2682 pxor %xmm8,%xmm6 2683 movdqa %xmm5,%xmm8 2684 psrld $25,%xmm8 2685 pslld $32-25,%xmm5 2686 pxor %xmm8,%xmm5 2687 movdqa %xmm4,%xmm8 2688 psrld $25,%xmm8 2689 pslld $32-25,%xmm4 2690 pxor %xmm8,%xmm4 2691 movdqa 0+80(%rbp),%xmm8 2692.byte 102,15,58,15,255,12 2693.byte 102,69,15,58,15,219,8 2694.byte 102,69,15,58,15,255,4 2695.byte 102,15,58,15,246,12 2696.byte 102,69,15,58,15,210,8 2697.byte 102,69,15,58,15,246,4 2698.byte 102,15,58,15,237,12 2699.byte 102,69,15,58,15,201,8 2700.byte 102,69,15,58,15,237,4 2701.byte 102,15,58,15,228,12 2702.byte 102,69,15,58,15,192,8 2703.byte 102,69,15,58,15,228,4 2704 2705 leaq 16(%rdi),%rdi 2706 decq %r8 2707 jge .Lseal_sse_main_rounds 2708 addq 0+0(%rdi),%r10 2709 adcq 8+0(%rdi),%r11 2710 adcq $1,%r12 2711 movq 0+0+0(%rbp),%rax 2712 movq %rax,%r15 2713 mulq %r10 2714 movq %rax,%r13 2715 movq %rdx,%r14 2716 movq 0+0+0(%rbp),%rax 2717 mulq %r11 2718 imulq %r12,%r15 2719 addq %rax,%r14 2720 adcq %rdx,%r15 2721 movq 8+0+0(%rbp),%rax 2722 movq %rax,%r9 2723 mulq %r10 2724 addq %rax,%r14 2725 adcq $0,%rdx 2726 movq %rdx,%r10 2727 movq 8+0+0(%rbp),%rax 2728 mulq %r11 2729 addq %rax,%r15 2730 adcq $0,%rdx 2731 imulq %r12,%r9 2732 addq %r10,%r15 2733 adcq %rdx,%r9 2734 movq %r13,%r10 2735 movq %r14,%r11 2736 movq %r15,%r12 2737 andq $3,%r12 2738 movq %r15,%r13 2739 andq $-4,%r13 2740 movq %r9,%r14 2741 shrdq $2,%r9,%r15 2742 shrq $2,%r9 2743 addq %r13,%r15 2744 adcq %r14,%r9 2745 addq %r15,%r10 2746 adcq %r9,%r11 2747 adcq $0,%r12 2748 2749 leaq 16(%rdi),%rdi 2750 decq %rcx 2751 jg .Lseal_sse_main_rounds 2752 paddd .Lchacha20_consts(%rip),%xmm3 2753 paddd 0+48(%rbp),%xmm7 2754 paddd 0+64(%rbp),%xmm11 2755 paddd 0+144(%rbp),%xmm15 2756 paddd .Lchacha20_consts(%rip),%xmm2 2757 paddd 0+48(%rbp),%xmm6 2758 paddd 0+64(%rbp),%xmm10 2759 paddd 0+128(%rbp),%xmm14 2760 paddd .Lchacha20_consts(%rip),%xmm1 2761 paddd 0+48(%rbp),%xmm5 2762 paddd 0+64(%rbp),%xmm9 2763 paddd 0+112(%rbp),%xmm13 2764 paddd .Lchacha20_consts(%rip),%xmm0 2765 paddd 0+48(%rbp),%xmm4 2766 paddd 0+64(%rbp),%xmm8 2767 paddd 0+96(%rbp),%xmm12 2768 2769 movdqa %xmm14,0+80(%rbp) 2770 movdqa %xmm14,0+80(%rbp) 2771 movdqu 0 + 0(%rsi),%xmm14 2772 pxor %xmm3,%xmm14 2773 movdqu %xmm14,0 + 0(%rdi) 2774 movdqu 16 + 0(%rsi),%xmm14 2775 pxor %xmm7,%xmm14 2776 movdqu %xmm14,16 + 0(%rdi) 2777 movdqu 32 + 0(%rsi),%xmm14 2778 pxor %xmm11,%xmm14 2779 movdqu %xmm14,32 + 0(%rdi) 2780 movdqu 48 + 0(%rsi),%xmm14 2781 pxor %xmm15,%xmm14 2782 movdqu %xmm14,48 + 0(%rdi) 2783 2784 movdqa 0+80(%rbp),%xmm14 2785 movdqu 0 + 64(%rsi),%xmm3 2786 movdqu 16 + 64(%rsi),%xmm7 2787 movdqu 32 + 64(%rsi),%xmm11 2788 movdqu 48 + 64(%rsi),%xmm15 2789 pxor %xmm3,%xmm2 2790 pxor %xmm7,%xmm6 2791 pxor %xmm11,%xmm10 2792 pxor %xmm14,%xmm15 2793 movdqu %xmm2,0 + 64(%rdi) 2794 movdqu %xmm6,16 + 64(%rdi) 2795 movdqu %xmm10,32 + 64(%rdi) 2796 movdqu %xmm15,48 + 64(%rdi) 2797 movdqu 0 + 128(%rsi),%xmm3 2798 movdqu 16 + 128(%rsi),%xmm7 2799 movdqu 32 + 128(%rsi),%xmm11 2800 movdqu 48 + 128(%rsi),%xmm15 2801 pxor %xmm3,%xmm1 2802 pxor %xmm7,%xmm5 2803 pxor %xmm11,%xmm9 2804 pxor %xmm13,%xmm15 2805 movdqu %xmm1,0 + 128(%rdi) 2806 movdqu %xmm5,16 + 128(%rdi) 2807 movdqu %xmm9,32 + 128(%rdi) 2808 movdqu %xmm15,48 + 128(%rdi) 2809 2810 cmpq $256,%rbx 2811 ja .Lseal_sse_main_loop_xor 2812 2813 movq $192,%rcx 2814 subq $192,%rbx 2815 leaq 192(%rsi),%rsi 2816 jmp .Lseal_sse_128_tail_hash 2817.Lseal_sse_main_loop_xor: 2818 movdqu 0 + 192(%rsi),%xmm3 2819 movdqu 16 + 192(%rsi),%xmm7 2820 movdqu 32 + 192(%rsi),%xmm11 2821 movdqu 48 + 192(%rsi),%xmm15 2822 pxor %xmm3,%xmm0 2823 pxor %xmm7,%xmm4 2824 pxor %xmm11,%xmm8 2825 pxor %xmm12,%xmm15 2826 movdqu %xmm0,0 + 192(%rdi) 2827 movdqu %xmm4,16 + 192(%rdi) 2828 movdqu %xmm8,32 + 192(%rdi) 2829 movdqu %xmm15,48 + 192(%rdi) 2830 2831 leaq 256(%rsi),%rsi 2832 subq $256,%rbx 2833 movq $6,%rcx 2834 movq $4,%r8 2835 cmpq $192,%rbx 2836 jg .Lseal_sse_main_loop 2837 movq %rbx,%rcx 2838 testq %rbx,%rbx 2839 je .Lseal_sse_128_tail_hash 2840 movq $6,%rcx 2841 cmpq $128,%rbx 2842 ja .Lseal_sse_tail_192 2843 cmpq $64,%rbx 2844 ja .Lseal_sse_tail_128 2845 2846.Lseal_sse_tail_64: 2847 movdqa .Lchacha20_consts(%rip),%xmm0 2848 movdqa 0+48(%rbp),%xmm4 2849 movdqa 0+64(%rbp),%xmm8 2850 movdqa 0+96(%rbp),%xmm12 2851 paddd .Lsse_inc(%rip),%xmm12 2852 movdqa %xmm12,0+96(%rbp) 2853 2854.Lseal_sse_tail_64_rounds_and_x2hash: 2855 addq 0+0(%rdi),%r10 2856 adcq 8+0(%rdi),%r11 2857 adcq $1,%r12 2858 movq 0+0+0(%rbp),%rax 2859 movq %rax,%r15 2860 mulq %r10 2861 movq %rax,%r13 2862 movq %rdx,%r14 2863 movq 0+0+0(%rbp),%rax 2864 mulq %r11 2865 imulq %r12,%r15 2866 addq %rax,%r14 2867 adcq %rdx,%r15 2868 movq 8+0+0(%rbp),%rax 2869 movq %rax,%r9 2870 mulq %r10 2871 addq %rax,%r14 2872 adcq $0,%rdx 2873 movq %rdx,%r10 2874 movq 8+0+0(%rbp),%rax 2875 mulq %r11 2876 addq %rax,%r15 2877 adcq $0,%rdx 2878 imulq %r12,%r9 2879 addq %r10,%r15 2880 adcq %rdx,%r9 2881 movq %r13,%r10 2882 movq %r14,%r11 2883 movq %r15,%r12 2884 andq $3,%r12 2885 movq %r15,%r13 2886 andq $-4,%r13 2887 movq %r9,%r14 2888 shrdq $2,%r9,%r15 2889 shrq $2,%r9 2890 addq %r13,%r15 2891 adcq %r14,%r9 2892 addq %r15,%r10 2893 adcq %r9,%r11 2894 adcq $0,%r12 2895 2896 leaq 16(%rdi),%rdi 2897.Lseal_sse_tail_64_rounds_and_x1hash: 2898 paddd %xmm4,%xmm0 2899 pxor %xmm0,%xmm12 2900 pshufb .Lrol16(%rip),%xmm12 2901 paddd %xmm12,%xmm8 2902 pxor %xmm8,%xmm4 2903 movdqa %xmm4,%xmm3 2904 pslld $12,%xmm3 2905 psrld $20,%xmm4 2906 pxor %xmm3,%xmm4 2907 paddd %xmm4,%xmm0 2908 pxor %xmm0,%xmm12 2909 pshufb .Lrol8(%rip),%xmm12 2910 paddd %xmm12,%xmm8 2911 pxor %xmm8,%xmm4 2912 movdqa %xmm4,%xmm3 2913 pslld $7,%xmm3 2914 psrld $25,%xmm4 2915 pxor %xmm3,%xmm4 2916.byte 102,15,58,15,228,4 2917.byte 102,69,15,58,15,192,8 2918.byte 102,69,15,58,15,228,12 2919 paddd %xmm4,%xmm0 2920 pxor %xmm0,%xmm12 2921 pshufb .Lrol16(%rip),%xmm12 2922 paddd %xmm12,%xmm8 2923 pxor %xmm8,%xmm4 2924 movdqa %xmm4,%xmm3 2925 pslld $12,%xmm3 2926 psrld $20,%xmm4 2927 pxor %xmm3,%xmm4 2928 paddd %xmm4,%xmm0 2929 pxor %xmm0,%xmm12 2930 pshufb .Lrol8(%rip),%xmm12 2931 paddd %xmm12,%xmm8 2932 pxor %xmm8,%xmm4 2933 movdqa %xmm4,%xmm3 2934 pslld $7,%xmm3 2935 psrld $25,%xmm4 2936 pxor %xmm3,%xmm4 2937.byte 102,15,58,15,228,12 2938.byte 102,69,15,58,15,192,8 2939.byte 102,69,15,58,15,228,4 2940 addq 0+0(%rdi),%r10 2941 adcq 8+0(%rdi),%r11 2942 adcq $1,%r12 2943 movq 0+0+0(%rbp),%rax 2944 movq %rax,%r15 2945 mulq %r10 2946 movq %rax,%r13 2947 movq %rdx,%r14 2948 movq 0+0+0(%rbp),%rax 2949 mulq %r11 2950 imulq %r12,%r15 2951 addq %rax,%r14 2952 adcq %rdx,%r15 2953 movq 8+0+0(%rbp),%rax 2954 movq %rax,%r9 2955 mulq %r10 2956 addq %rax,%r14 2957 adcq $0,%rdx 2958 movq %rdx,%r10 2959 movq 8+0+0(%rbp),%rax 2960 mulq %r11 2961 addq %rax,%r15 2962 adcq $0,%rdx 2963 imulq %r12,%r9 2964 addq %r10,%r15 2965 adcq %rdx,%r9 2966 movq %r13,%r10 2967 movq %r14,%r11 2968 movq %r15,%r12 2969 andq $3,%r12 2970 movq %r15,%r13 2971 andq $-4,%r13 2972 movq %r9,%r14 2973 shrdq $2,%r9,%r15 2974 shrq $2,%r9 2975 addq %r13,%r15 2976 adcq %r14,%r9 2977 addq %r15,%r10 2978 adcq %r9,%r11 2979 adcq $0,%r12 2980 2981 leaq 16(%rdi),%rdi 2982 decq %rcx 2983 jg .Lseal_sse_tail_64_rounds_and_x2hash 2984 decq %r8 2985 jge .Lseal_sse_tail_64_rounds_and_x1hash 2986 paddd .Lchacha20_consts(%rip),%xmm0 2987 paddd 0+48(%rbp),%xmm4 2988 paddd 0+64(%rbp),%xmm8 2989 paddd 0+96(%rbp),%xmm12 2990 2991 jmp .Lseal_sse_128_tail_xor 2992 2993.Lseal_sse_tail_128: 2994 movdqa .Lchacha20_consts(%rip),%xmm0 2995 movdqa 0+48(%rbp),%xmm4 2996 movdqa 0+64(%rbp),%xmm8 2997 movdqa %xmm0,%xmm1 2998 movdqa %xmm4,%xmm5 2999 movdqa %xmm8,%xmm9 3000 movdqa 0+96(%rbp),%xmm13 3001 paddd .Lsse_inc(%rip),%xmm13 3002 movdqa %xmm13,%xmm12 3003 paddd .Lsse_inc(%rip),%xmm12 3004 movdqa %xmm12,0+96(%rbp) 3005 movdqa %xmm13,0+112(%rbp) 3006 3007.Lseal_sse_tail_128_rounds_and_x2hash: 3008 addq 0+0(%rdi),%r10 3009 adcq 8+0(%rdi),%r11 3010 adcq $1,%r12 3011 movq 0+0+0(%rbp),%rax 3012 movq %rax,%r15 3013 mulq %r10 3014 movq %rax,%r13 3015 movq %rdx,%r14 3016 movq 0+0+0(%rbp),%rax 3017 mulq %r11 3018 imulq %r12,%r15 3019 addq %rax,%r14 3020 adcq %rdx,%r15 3021 movq 8+0+0(%rbp),%rax 3022 movq %rax,%r9 3023 mulq %r10 3024 addq %rax,%r14 3025 adcq $0,%rdx 3026 movq %rdx,%r10 3027 movq 8+0+0(%rbp),%rax 3028 mulq %r11 3029 addq %rax,%r15 3030 adcq $0,%rdx 3031 imulq %r12,%r9 3032 addq %r10,%r15 3033 adcq %rdx,%r9 3034 movq %r13,%r10 3035 movq %r14,%r11 3036 movq %r15,%r12 3037 andq $3,%r12 3038 movq %r15,%r13 3039 andq $-4,%r13 3040 movq %r9,%r14 3041 shrdq $2,%r9,%r15 3042 shrq $2,%r9 3043 addq %r13,%r15 3044 adcq %r14,%r9 3045 addq %r15,%r10 3046 adcq %r9,%r11 3047 adcq $0,%r12 3048 3049 leaq 16(%rdi),%rdi 3050.Lseal_sse_tail_128_rounds_and_x1hash: 3051 paddd %xmm4,%xmm0 3052 pxor %xmm0,%xmm12 3053 pshufb .Lrol16(%rip),%xmm12 3054 paddd %xmm12,%xmm8 3055 pxor %xmm8,%xmm4 3056 movdqa %xmm4,%xmm3 3057 pslld $12,%xmm3 3058 psrld $20,%xmm4 3059 pxor %xmm3,%xmm4 3060 paddd %xmm4,%xmm0 3061 pxor %xmm0,%xmm12 3062 pshufb .Lrol8(%rip),%xmm12 3063 paddd %xmm12,%xmm8 3064 pxor %xmm8,%xmm4 3065 movdqa %xmm4,%xmm3 3066 pslld $7,%xmm3 3067 psrld $25,%xmm4 3068 pxor %xmm3,%xmm4 3069.byte 102,15,58,15,228,4 3070.byte 102,69,15,58,15,192,8 3071.byte 102,69,15,58,15,228,12 3072 paddd %xmm5,%xmm1 3073 pxor %xmm1,%xmm13 3074 pshufb .Lrol16(%rip),%xmm13 3075 paddd %xmm13,%xmm9 3076 pxor %xmm9,%xmm5 3077 movdqa %xmm5,%xmm3 3078 pslld $12,%xmm3 3079 psrld $20,%xmm5 3080 pxor %xmm3,%xmm5 3081 paddd %xmm5,%xmm1 3082 pxor %xmm1,%xmm13 3083 pshufb .Lrol8(%rip),%xmm13 3084 paddd %xmm13,%xmm9 3085 pxor %xmm9,%xmm5 3086 movdqa %xmm5,%xmm3 3087 pslld $7,%xmm3 3088 psrld $25,%xmm5 3089 pxor %xmm3,%xmm5 3090.byte 102,15,58,15,237,4 3091.byte 102,69,15,58,15,201,8 3092.byte 102,69,15,58,15,237,12 3093 addq 0+0(%rdi),%r10 3094 adcq 8+0(%rdi),%r11 3095 adcq $1,%r12 3096 movq 0+0+0(%rbp),%rax 3097 movq %rax,%r15 3098 mulq %r10 3099 movq %rax,%r13 3100 movq %rdx,%r14 3101 movq 0+0+0(%rbp),%rax 3102 mulq %r11 3103 imulq %r12,%r15 3104 addq %rax,%r14 3105 adcq %rdx,%r15 3106 movq 8+0+0(%rbp),%rax 3107 movq %rax,%r9 3108 mulq %r10 3109 addq %rax,%r14 3110 adcq $0,%rdx 3111 movq %rdx,%r10 3112 movq 8+0+0(%rbp),%rax 3113 mulq %r11 3114 addq %rax,%r15 3115 adcq $0,%rdx 3116 imulq %r12,%r9 3117 addq %r10,%r15 3118 adcq %rdx,%r9 3119 movq %r13,%r10 3120 movq %r14,%r11 3121 movq %r15,%r12 3122 andq $3,%r12 3123 movq %r15,%r13 3124 andq $-4,%r13 3125 movq %r9,%r14 3126 shrdq $2,%r9,%r15 3127 shrq $2,%r9 3128 addq %r13,%r15 3129 adcq %r14,%r9 3130 addq %r15,%r10 3131 adcq %r9,%r11 3132 adcq $0,%r12 3133 paddd %xmm4,%xmm0 3134 pxor %xmm0,%xmm12 3135 pshufb .Lrol16(%rip),%xmm12 3136 paddd %xmm12,%xmm8 3137 pxor %xmm8,%xmm4 3138 movdqa %xmm4,%xmm3 3139 pslld $12,%xmm3 3140 psrld $20,%xmm4 3141 pxor %xmm3,%xmm4 3142 paddd %xmm4,%xmm0 3143 pxor %xmm0,%xmm12 3144 pshufb .Lrol8(%rip),%xmm12 3145 paddd %xmm12,%xmm8 3146 pxor %xmm8,%xmm4 3147 movdqa %xmm4,%xmm3 3148 pslld $7,%xmm3 3149 psrld $25,%xmm4 3150 pxor %xmm3,%xmm4 3151.byte 102,15,58,15,228,12 3152.byte 102,69,15,58,15,192,8 3153.byte 102,69,15,58,15,228,4 3154 paddd %xmm5,%xmm1 3155 pxor %xmm1,%xmm13 3156 pshufb .Lrol16(%rip),%xmm13 3157 paddd %xmm13,%xmm9 3158 pxor %xmm9,%xmm5 3159 movdqa %xmm5,%xmm3 3160 pslld $12,%xmm3 3161 psrld $20,%xmm5 3162 pxor %xmm3,%xmm5 3163 paddd %xmm5,%xmm1 3164 pxor %xmm1,%xmm13 3165 pshufb .Lrol8(%rip),%xmm13 3166 paddd %xmm13,%xmm9 3167 pxor %xmm9,%xmm5 3168 movdqa %xmm5,%xmm3 3169 pslld $7,%xmm3 3170 psrld $25,%xmm5 3171 pxor %xmm3,%xmm5 3172.byte 102,15,58,15,237,12 3173.byte 102,69,15,58,15,201,8 3174.byte 102,69,15,58,15,237,4 3175 3176 leaq 16(%rdi),%rdi 3177 decq %rcx 3178 jg .Lseal_sse_tail_128_rounds_and_x2hash 3179 decq %r8 3180 jge .Lseal_sse_tail_128_rounds_and_x1hash 3181 paddd .Lchacha20_consts(%rip),%xmm1 3182 paddd 0+48(%rbp),%xmm5 3183 paddd 0+64(%rbp),%xmm9 3184 paddd 0+112(%rbp),%xmm13 3185 paddd .Lchacha20_consts(%rip),%xmm0 3186 paddd 0+48(%rbp),%xmm4 3187 paddd 0+64(%rbp),%xmm8 3188 paddd 0+96(%rbp),%xmm12 3189 movdqu 0 + 0(%rsi),%xmm3 3190 movdqu 16 + 0(%rsi),%xmm7 3191 movdqu 32 + 0(%rsi),%xmm11 3192 movdqu 48 + 0(%rsi),%xmm15 3193 pxor %xmm3,%xmm1 3194 pxor %xmm7,%xmm5 3195 pxor %xmm11,%xmm9 3196 pxor %xmm13,%xmm15 3197 movdqu %xmm1,0 + 0(%rdi) 3198 movdqu %xmm5,16 + 0(%rdi) 3199 movdqu %xmm9,32 + 0(%rdi) 3200 movdqu %xmm15,48 + 0(%rdi) 3201 3202 movq $64,%rcx 3203 subq $64,%rbx 3204 leaq 64(%rsi),%rsi 3205 jmp .Lseal_sse_128_tail_hash 3206 3207.Lseal_sse_tail_192: 3208 movdqa .Lchacha20_consts(%rip),%xmm0 3209 movdqa 0+48(%rbp),%xmm4 3210 movdqa 0+64(%rbp),%xmm8 3211 movdqa %xmm0,%xmm1 3212 movdqa %xmm4,%xmm5 3213 movdqa %xmm8,%xmm9 3214 movdqa %xmm0,%xmm2 3215 movdqa %xmm4,%xmm6 3216 movdqa %xmm8,%xmm10 3217 movdqa 0+96(%rbp),%xmm14 3218 paddd .Lsse_inc(%rip),%xmm14 3219 movdqa %xmm14,%xmm13 3220 paddd .Lsse_inc(%rip),%xmm13 3221 movdqa %xmm13,%xmm12 3222 paddd .Lsse_inc(%rip),%xmm12 3223 movdqa %xmm12,0+96(%rbp) 3224 movdqa %xmm13,0+112(%rbp) 3225 movdqa %xmm14,0+128(%rbp) 3226 3227.Lseal_sse_tail_192_rounds_and_x2hash: 3228 addq 0+0(%rdi),%r10 3229 adcq 8+0(%rdi),%r11 3230 adcq $1,%r12 3231 movq 0+0+0(%rbp),%rax 3232 movq %rax,%r15 3233 mulq %r10 3234 movq %rax,%r13 3235 movq %rdx,%r14 3236 movq 0+0+0(%rbp),%rax 3237 mulq %r11 3238 imulq %r12,%r15 3239 addq %rax,%r14 3240 adcq %rdx,%r15 3241 movq 8+0+0(%rbp),%rax 3242 movq %rax,%r9 3243 mulq %r10 3244 addq %rax,%r14 3245 adcq $0,%rdx 3246 movq %rdx,%r10 3247 movq 8+0+0(%rbp),%rax 3248 mulq %r11 3249 addq %rax,%r15 3250 adcq $0,%rdx 3251 imulq %r12,%r9 3252 addq %r10,%r15 3253 adcq %rdx,%r9 3254 movq %r13,%r10 3255 movq %r14,%r11 3256 movq %r15,%r12 3257 andq $3,%r12 3258 movq %r15,%r13 3259 andq $-4,%r13 3260 movq %r9,%r14 3261 shrdq $2,%r9,%r15 3262 shrq $2,%r9 3263 addq %r13,%r15 3264 adcq %r14,%r9 3265 addq %r15,%r10 3266 adcq %r9,%r11 3267 adcq $0,%r12 3268 3269 leaq 16(%rdi),%rdi 3270.Lseal_sse_tail_192_rounds_and_x1hash: 3271 paddd %xmm4,%xmm0 3272 pxor %xmm0,%xmm12 3273 pshufb .Lrol16(%rip),%xmm12 3274 paddd %xmm12,%xmm8 3275 pxor %xmm8,%xmm4 3276 movdqa %xmm4,%xmm3 3277 pslld $12,%xmm3 3278 psrld $20,%xmm4 3279 pxor %xmm3,%xmm4 3280 paddd %xmm4,%xmm0 3281 pxor %xmm0,%xmm12 3282 pshufb .Lrol8(%rip),%xmm12 3283 paddd %xmm12,%xmm8 3284 pxor %xmm8,%xmm4 3285 movdqa %xmm4,%xmm3 3286 pslld $7,%xmm3 3287 psrld $25,%xmm4 3288 pxor %xmm3,%xmm4 3289.byte 102,15,58,15,228,4 3290.byte 102,69,15,58,15,192,8 3291.byte 102,69,15,58,15,228,12 3292 paddd %xmm5,%xmm1 3293 pxor %xmm1,%xmm13 3294 pshufb .Lrol16(%rip),%xmm13 3295 paddd %xmm13,%xmm9 3296 pxor %xmm9,%xmm5 3297 movdqa %xmm5,%xmm3 3298 pslld $12,%xmm3 3299 psrld $20,%xmm5 3300 pxor %xmm3,%xmm5 3301 paddd %xmm5,%xmm1 3302 pxor %xmm1,%xmm13 3303 pshufb .Lrol8(%rip),%xmm13 3304 paddd %xmm13,%xmm9 3305 pxor %xmm9,%xmm5 3306 movdqa %xmm5,%xmm3 3307 pslld $7,%xmm3 3308 psrld $25,%xmm5 3309 pxor %xmm3,%xmm5 3310.byte 102,15,58,15,237,4 3311.byte 102,69,15,58,15,201,8 3312.byte 102,69,15,58,15,237,12 3313 paddd %xmm6,%xmm2 3314 pxor %xmm2,%xmm14 3315 pshufb .Lrol16(%rip),%xmm14 3316 paddd %xmm14,%xmm10 3317 pxor %xmm10,%xmm6 3318 movdqa %xmm6,%xmm3 3319 pslld $12,%xmm3 3320 psrld $20,%xmm6 3321 pxor %xmm3,%xmm6 3322 paddd %xmm6,%xmm2 3323 pxor %xmm2,%xmm14 3324 pshufb .Lrol8(%rip),%xmm14 3325 paddd %xmm14,%xmm10 3326 pxor %xmm10,%xmm6 3327 movdqa %xmm6,%xmm3 3328 pslld $7,%xmm3 3329 psrld $25,%xmm6 3330 pxor %xmm3,%xmm6 3331.byte 102,15,58,15,246,4 3332.byte 102,69,15,58,15,210,8 3333.byte 102,69,15,58,15,246,12 3334 addq 0+0(%rdi),%r10 3335 adcq 8+0(%rdi),%r11 3336 adcq $1,%r12 3337 movq 0+0+0(%rbp),%rax 3338 movq %rax,%r15 3339 mulq %r10 3340 movq %rax,%r13 3341 movq %rdx,%r14 3342 movq 0+0+0(%rbp),%rax 3343 mulq %r11 3344 imulq %r12,%r15 3345 addq %rax,%r14 3346 adcq %rdx,%r15 3347 movq 8+0+0(%rbp),%rax 3348 movq %rax,%r9 3349 mulq %r10 3350 addq %rax,%r14 3351 adcq $0,%rdx 3352 movq %rdx,%r10 3353 movq 8+0+0(%rbp),%rax 3354 mulq %r11 3355 addq %rax,%r15 3356 adcq $0,%rdx 3357 imulq %r12,%r9 3358 addq %r10,%r15 3359 adcq %rdx,%r9 3360 movq %r13,%r10 3361 movq %r14,%r11 3362 movq %r15,%r12 3363 andq $3,%r12 3364 movq %r15,%r13 3365 andq $-4,%r13 3366 movq %r9,%r14 3367 shrdq $2,%r9,%r15 3368 shrq $2,%r9 3369 addq %r13,%r15 3370 adcq %r14,%r9 3371 addq %r15,%r10 3372 adcq %r9,%r11 3373 adcq $0,%r12 3374 paddd %xmm4,%xmm0 3375 pxor %xmm0,%xmm12 3376 pshufb .Lrol16(%rip),%xmm12 3377 paddd %xmm12,%xmm8 3378 pxor %xmm8,%xmm4 3379 movdqa %xmm4,%xmm3 3380 pslld $12,%xmm3 3381 psrld $20,%xmm4 3382 pxor %xmm3,%xmm4 3383 paddd %xmm4,%xmm0 3384 pxor %xmm0,%xmm12 3385 pshufb .Lrol8(%rip),%xmm12 3386 paddd %xmm12,%xmm8 3387 pxor %xmm8,%xmm4 3388 movdqa %xmm4,%xmm3 3389 pslld $7,%xmm3 3390 psrld $25,%xmm4 3391 pxor %xmm3,%xmm4 3392.byte 102,15,58,15,228,12 3393.byte 102,69,15,58,15,192,8 3394.byte 102,69,15,58,15,228,4 3395 paddd %xmm5,%xmm1 3396 pxor %xmm1,%xmm13 3397 pshufb .Lrol16(%rip),%xmm13 3398 paddd %xmm13,%xmm9 3399 pxor %xmm9,%xmm5 3400 movdqa %xmm5,%xmm3 3401 pslld $12,%xmm3 3402 psrld $20,%xmm5 3403 pxor %xmm3,%xmm5 3404 paddd %xmm5,%xmm1 3405 pxor %xmm1,%xmm13 3406 pshufb .Lrol8(%rip),%xmm13 3407 paddd %xmm13,%xmm9 3408 pxor %xmm9,%xmm5 3409 movdqa %xmm5,%xmm3 3410 pslld $7,%xmm3 3411 psrld $25,%xmm5 3412 pxor %xmm3,%xmm5 3413.byte 102,15,58,15,237,12 3414.byte 102,69,15,58,15,201,8 3415.byte 102,69,15,58,15,237,4 3416 paddd %xmm6,%xmm2 3417 pxor %xmm2,%xmm14 3418 pshufb .Lrol16(%rip),%xmm14 3419 paddd %xmm14,%xmm10 3420 pxor %xmm10,%xmm6 3421 movdqa %xmm6,%xmm3 3422 pslld $12,%xmm3 3423 psrld $20,%xmm6 3424 pxor %xmm3,%xmm6 3425 paddd %xmm6,%xmm2 3426 pxor %xmm2,%xmm14 3427 pshufb .Lrol8(%rip),%xmm14 3428 paddd %xmm14,%xmm10 3429 pxor %xmm10,%xmm6 3430 movdqa %xmm6,%xmm3 3431 pslld $7,%xmm3 3432 psrld $25,%xmm6 3433 pxor %xmm3,%xmm6 3434.byte 102,15,58,15,246,12 3435.byte 102,69,15,58,15,210,8 3436.byte 102,69,15,58,15,246,4 3437 3438 leaq 16(%rdi),%rdi 3439 decq %rcx 3440 jg .Lseal_sse_tail_192_rounds_and_x2hash 3441 decq %r8 3442 jge .Lseal_sse_tail_192_rounds_and_x1hash 3443 paddd .Lchacha20_consts(%rip),%xmm2 3444 paddd 0+48(%rbp),%xmm6 3445 paddd 0+64(%rbp),%xmm10 3446 paddd 0+128(%rbp),%xmm14 3447 paddd .Lchacha20_consts(%rip),%xmm1 3448 paddd 0+48(%rbp),%xmm5 3449 paddd 0+64(%rbp),%xmm9 3450 paddd 0+112(%rbp),%xmm13 3451 paddd .Lchacha20_consts(%rip),%xmm0 3452 paddd 0+48(%rbp),%xmm4 3453 paddd 0+64(%rbp),%xmm8 3454 paddd 0+96(%rbp),%xmm12 3455 movdqu 0 + 0(%rsi),%xmm3 3456 movdqu 16 + 0(%rsi),%xmm7 3457 movdqu 32 + 0(%rsi),%xmm11 3458 movdqu 48 + 0(%rsi),%xmm15 3459 pxor %xmm3,%xmm2 3460 pxor %xmm7,%xmm6 3461 pxor %xmm11,%xmm10 3462 pxor %xmm14,%xmm15 3463 movdqu %xmm2,0 + 0(%rdi) 3464 movdqu %xmm6,16 + 0(%rdi) 3465 movdqu %xmm10,32 + 0(%rdi) 3466 movdqu %xmm15,48 + 0(%rdi) 3467 movdqu 0 + 64(%rsi),%xmm3 3468 movdqu 16 + 64(%rsi),%xmm7 3469 movdqu 32 + 64(%rsi),%xmm11 3470 movdqu 48 + 64(%rsi),%xmm15 3471 pxor %xmm3,%xmm1 3472 pxor %xmm7,%xmm5 3473 pxor %xmm11,%xmm9 3474 pxor %xmm13,%xmm15 3475 movdqu %xmm1,0 + 64(%rdi) 3476 movdqu %xmm5,16 + 64(%rdi) 3477 movdqu %xmm9,32 + 64(%rdi) 3478 movdqu %xmm15,48 + 64(%rdi) 3479 3480 movq $128,%rcx 3481 subq $128,%rbx 3482 leaq 128(%rsi),%rsi 3483 3484.Lseal_sse_128_tail_hash: 3485 cmpq $16,%rcx 3486 jb .Lseal_sse_128_tail_xor 3487 addq 0+0(%rdi),%r10 3488 adcq 8+0(%rdi),%r11 3489 adcq $1,%r12 3490 movq 0+0+0(%rbp),%rax 3491 movq %rax,%r15 3492 mulq %r10 3493 movq %rax,%r13 3494 movq %rdx,%r14 3495 movq 0+0+0(%rbp),%rax 3496 mulq %r11 3497 imulq %r12,%r15 3498 addq %rax,%r14 3499 adcq %rdx,%r15 3500 movq 8+0+0(%rbp),%rax 3501 movq %rax,%r9 3502 mulq %r10 3503 addq %rax,%r14 3504 adcq $0,%rdx 3505 movq %rdx,%r10 3506 movq 8+0+0(%rbp),%rax 3507 mulq %r11 3508 addq %rax,%r15 3509 adcq $0,%rdx 3510 imulq %r12,%r9 3511 addq %r10,%r15 3512 adcq %rdx,%r9 3513 movq %r13,%r10 3514 movq %r14,%r11 3515 movq %r15,%r12 3516 andq $3,%r12 3517 movq %r15,%r13 3518 andq $-4,%r13 3519 movq %r9,%r14 3520 shrdq $2,%r9,%r15 3521 shrq $2,%r9 3522 addq %r13,%r15 3523 adcq %r14,%r9 3524 addq %r15,%r10 3525 adcq %r9,%r11 3526 adcq $0,%r12 3527 3528 subq $16,%rcx 3529 leaq 16(%rdi),%rdi 3530 jmp .Lseal_sse_128_tail_hash 3531 3532.Lseal_sse_128_tail_xor: 3533 cmpq $16,%rbx 3534 jb .Lseal_sse_tail_16 3535 subq $16,%rbx 3536 3537 movdqu 0(%rsi),%xmm3 3538 pxor %xmm3,%xmm0 3539 movdqu %xmm0,0(%rdi) 3540 3541 addq 0(%rdi),%r10 3542 adcq 8(%rdi),%r11 3543 adcq $1,%r12 3544 leaq 16(%rsi),%rsi 3545 leaq 16(%rdi),%rdi 3546 movq 0+0+0(%rbp),%rax 3547 movq %rax,%r15 3548 mulq %r10 3549 movq %rax,%r13 3550 movq %rdx,%r14 3551 movq 0+0+0(%rbp),%rax 3552 mulq %r11 3553 imulq %r12,%r15 3554 addq %rax,%r14 3555 adcq %rdx,%r15 3556 movq 8+0+0(%rbp),%rax 3557 movq %rax,%r9 3558 mulq %r10 3559 addq %rax,%r14 3560 adcq $0,%rdx 3561 movq %rdx,%r10 3562 movq 8+0+0(%rbp),%rax 3563 mulq %r11 3564 addq %rax,%r15 3565 adcq $0,%rdx 3566 imulq %r12,%r9 3567 addq %r10,%r15 3568 adcq %rdx,%r9 3569 movq %r13,%r10 3570 movq %r14,%r11 3571 movq %r15,%r12 3572 andq $3,%r12 3573 movq %r15,%r13 3574 andq $-4,%r13 3575 movq %r9,%r14 3576 shrdq $2,%r9,%r15 3577 shrq $2,%r9 3578 addq %r13,%r15 3579 adcq %r14,%r9 3580 addq %r15,%r10 3581 adcq %r9,%r11 3582 adcq $0,%r12 3583 3584 3585 movdqa %xmm4,%xmm0 3586 movdqa %xmm8,%xmm4 3587 movdqa %xmm12,%xmm8 3588 movdqa %xmm1,%xmm12 3589 movdqa %xmm5,%xmm1 3590 movdqa %xmm9,%xmm5 3591 movdqa %xmm13,%xmm9 3592 jmp .Lseal_sse_128_tail_xor 3593 3594.Lseal_sse_tail_16: 3595 testq %rbx,%rbx 3596 jz .Lprocess_blocks_of_extra_in 3597 3598 movq %rbx,%r8 3599 movq %rbx,%rcx 3600 leaq -1(%rsi,%rbx,1),%rsi 3601 pxor %xmm15,%xmm15 3602.Lseal_sse_tail_16_compose: 3603 pslldq $1,%xmm15 3604 pinsrb $0,(%rsi),%xmm15 3605 leaq -1(%rsi),%rsi 3606 decq %rcx 3607 jne .Lseal_sse_tail_16_compose 3608 3609 3610 pxor %xmm0,%xmm15 3611 3612 3613 movq %rbx,%rcx 3614 movdqu %xmm15,%xmm0 3615.Lseal_sse_tail_16_extract: 3616 pextrb $0,%xmm0,(%rdi) 3617 psrldq $1,%xmm0 3618 addq $1,%rdi 3619 subq $1,%rcx 3620 jnz .Lseal_sse_tail_16_extract 3621 3622 3623 3624 3625 3626 3627 3628 3629 movq 288 + 0 + 32(%rsp),%r9 3630 movq 56(%r9),%r14 3631 movq 48(%r9),%r13 3632 testq %r14,%r14 3633 jz .Lprocess_partial_block 3634 3635 movq $16,%r15 3636 subq %rbx,%r15 3637 cmpq %r15,%r14 3638 3639 jge .Lload_extra_in 3640 movq %r14,%r15 3641 3642.Lload_extra_in: 3643 3644 3645 leaq -1(%r13,%r15,1),%rsi 3646 3647 3648 addq %r15,%r13 3649 subq %r15,%r14 3650 movq %r13,48(%r9) 3651 movq %r14,56(%r9) 3652 3653 3654 3655 addq %r15,%r8 3656 3657 3658 pxor %xmm11,%xmm11 3659.Lload_extra_load_loop: 3660 pslldq $1,%xmm11 3661 pinsrb $0,(%rsi),%xmm11 3662 leaq -1(%rsi),%rsi 3663 subq $1,%r15 3664 jnz .Lload_extra_load_loop 3665 3666 3667 3668 3669 movq %rbx,%r15 3670 3671.Lload_extra_shift_loop: 3672 pslldq $1,%xmm11 3673 subq $1,%r15 3674 jnz .Lload_extra_shift_loop 3675 3676 3677 3678 3679 leaq .Land_masks(%rip),%r15 3680 shlq $4,%rbx 3681 pand -16(%r15,%rbx,1),%xmm15 3682 3683 3684 por %xmm11,%xmm15 3685 3686 3687 3688.byte 102,77,15,126,253 3689 pextrq $1,%xmm15,%r14 3690 addq %r13,%r10 3691 adcq %r14,%r11 3692 adcq $1,%r12 3693 movq 0+0+0(%rbp),%rax 3694 movq %rax,%r15 3695 mulq %r10 3696 movq %rax,%r13 3697 movq %rdx,%r14 3698 movq 0+0+0(%rbp),%rax 3699 mulq %r11 3700 imulq %r12,%r15 3701 addq %rax,%r14 3702 adcq %rdx,%r15 3703 movq 8+0+0(%rbp),%rax 3704 movq %rax,%r9 3705 mulq %r10 3706 addq %rax,%r14 3707 adcq $0,%rdx 3708 movq %rdx,%r10 3709 movq 8+0+0(%rbp),%rax 3710 mulq %r11 3711 addq %rax,%r15 3712 adcq $0,%rdx 3713 imulq %r12,%r9 3714 addq %r10,%r15 3715 adcq %rdx,%r9 3716 movq %r13,%r10 3717 movq %r14,%r11 3718 movq %r15,%r12 3719 andq $3,%r12 3720 movq %r15,%r13 3721 andq $-4,%r13 3722 movq %r9,%r14 3723 shrdq $2,%r9,%r15 3724 shrq $2,%r9 3725 addq %r13,%r15 3726 adcq %r14,%r9 3727 addq %r15,%r10 3728 adcq %r9,%r11 3729 adcq $0,%r12 3730 3731 3732.Lprocess_blocks_of_extra_in: 3733 3734 movq 288+32+0 (%rsp),%r9 3735 movq 48(%r9),%rsi 3736 movq 56(%r9),%r8 3737 movq %r8,%rcx 3738 shrq $4,%r8 3739 3740.Lprocess_extra_hash_loop: 3741 jz process_extra_in_trailer 3742 addq 0+0(%rsi),%r10 3743 adcq 8+0(%rsi),%r11 3744 adcq $1,%r12 3745 movq 0+0+0(%rbp),%rax 3746 movq %rax,%r15 3747 mulq %r10 3748 movq %rax,%r13 3749 movq %rdx,%r14 3750 movq 0+0+0(%rbp),%rax 3751 mulq %r11 3752 imulq %r12,%r15 3753 addq %rax,%r14 3754 adcq %rdx,%r15 3755 movq 8+0+0(%rbp),%rax 3756 movq %rax,%r9 3757 mulq %r10 3758 addq %rax,%r14 3759 adcq $0,%rdx 3760 movq %rdx,%r10 3761 movq 8+0+0(%rbp),%rax 3762 mulq %r11 3763 addq %rax,%r15 3764 adcq $0,%rdx 3765 imulq %r12,%r9 3766 addq %r10,%r15 3767 adcq %rdx,%r9 3768 movq %r13,%r10 3769 movq %r14,%r11 3770 movq %r15,%r12 3771 andq $3,%r12 3772 movq %r15,%r13 3773 andq $-4,%r13 3774 movq %r9,%r14 3775 shrdq $2,%r9,%r15 3776 shrq $2,%r9 3777 addq %r13,%r15 3778 adcq %r14,%r9 3779 addq %r15,%r10 3780 adcq %r9,%r11 3781 adcq $0,%r12 3782 3783 leaq 16(%rsi),%rsi 3784 subq $1,%r8 3785 jmp .Lprocess_extra_hash_loop 3786process_extra_in_trailer: 3787 andq $15,%rcx 3788 movq %rcx,%rbx 3789 jz .Ldo_length_block 3790 leaq -1(%rsi,%rcx,1),%rsi 3791 3792.Lprocess_extra_in_trailer_load: 3793 pslldq $1,%xmm15 3794 pinsrb $0,(%rsi),%xmm15 3795 leaq -1(%rsi),%rsi 3796 subq $1,%rcx 3797 jnz .Lprocess_extra_in_trailer_load 3798 3799.Lprocess_partial_block: 3800 3801 leaq .Land_masks(%rip),%r15 3802 shlq $4,%rbx 3803 pand -16(%r15,%rbx,1),%xmm15 3804.byte 102,77,15,126,253 3805 pextrq $1,%xmm15,%r14 3806 addq %r13,%r10 3807 adcq %r14,%r11 3808 adcq $1,%r12 3809 movq 0+0+0(%rbp),%rax 3810 movq %rax,%r15 3811 mulq %r10 3812 movq %rax,%r13 3813 movq %rdx,%r14 3814 movq 0+0+0(%rbp),%rax 3815 mulq %r11 3816 imulq %r12,%r15 3817 addq %rax,%r14 3818 adcq %rdx,%r15 3819 movq 8+0+0(%rbp),%rax 3820 movq %rax,%r9 3821 mulq %r10 3822 addq %rax,%r14 3823 adcq $0,%rdx 3824 movq %rdx,%r10 3825 movq 8+0+0(%rbp),%rax 3826 mulq %r11 3827 addq %rax,%r15 3828 adcq $0,%rdx 3829 imulq %r12,%r9 3830 addq %r10,%r15 3831 adcq %rdx,%r9 3832 movq %r13,%r10 3833 movq %r14,%r11 3834 movq %r15,%r12 3835 andq $3,%r12 3836 movq %r15,%r13 3837 andq $-4,%r13 3838 movq %r9,%r14 3839 shrdq $2,%r9,%r15 3840 shrq $2,%r9 3841 addq %r13,%r15 3842 adcq %r14,%r9 3843 addq %r15,%r10 3844 adcq %r9,%r11 3845 adcq $0,%r12 3846 3847 3848.Ldo_length_block: 3849 addq 0+0+32(%rbp),%r10 3850 adcq 8+0+32(%rbp),%r11 3851 adcq $1,%r12 3852 movq 0+0+0(%rbp),%rax 3853 movq %rax,%r15 3854 mulq %r10 3855 movq %rax,%r13 3856 movq %rdx,%r14 3857 movq 0+0+0(%rbp),%rax 3858 mulq %r11 3859 imulq %r12,%r15 3860 addq %rax,%r14 3861 adcq %rdx,%r15 3862 movq 8+0+0(%rbp),%rax 3863 movq %rax,%r9 3864 mulq %r10 3865 addq %rax,%r14 3866 adcq $0,%rdx 3867 movq %rdx,%r10 3868 movq 8+0+0(%rbp),%rax 3869 mulq %r11 3870 addq %rax,%r15 3871 adcq $0,%rdx 3872 imulq %r12,%r9 3873 addq %r10,%r15 3874 adcq %rdx,%r9 3875 movq %r13,%r10 3876 movq %r14,%r11 3877 movq %r15,%r12 3878 andq $3,%r12 3879 movq %r15,%r13 3880 andq $-4,%r13 3881 movq %r9,%r14 3882 shrdq $2,%r9,%r15 3883 shrq $2,%r9 3884 addq %r13,%r15 3885 adcq %r14,%r9 3886 addq %r15,%r10 3887 adcq %r9,%r11 3888 adcq $0,%r12 3889 3890 3891 movq %r10,%r13 3892 movq %r11,%r14 3893 movq %r12,%r15 3894 subq $-5,%r10 3895 sbbq $-1,%r11 3896 sbbq $3,%r12 3897 cmovcq %r13,%r10 3898 cmovcq %r14,%r11 3899 cmovcq %r15,%r12 3900 3901 addq 0+0+16(%rbp),%r10 3902 adcq 8+0+16(%rbp),%r11 3903 3904.cfi_remember_state 3905 addq $288 + 0 + 32,%rsp 3906.cfi_adjust_cfa_offset -(288 + 32) 3907 3908 popq %r9 3909.cfi_adjust_cfa_offset -8 3910.cfi_restore %r9 3911 movq %r10,(%r9) 3912 movq %r11,8(%r9) 3913 popq %r15 3914.cfi_adjust_cfa_offset -8 3915.cfi_restore %r15 3916 popq %r14 3917.cfi_adjust_cfa_offset -8 3918.cfi_restore %r14 3919 popq %r13 3920.cfi_adjust_cfa_offset -8 3921.cfi_restore %r13 3922 popq %r12 3923.cfi_adjust_cfa_offset -8 3924.cfi_restore %r12 3925 popq %rbx 3926.cfi_adjust_cfa_offset -8 3927.cfi_restore %rbx 3928 popq %rbp 3929.cfi_adjust_cfa_offset -8 3930.cfi_restore %rbp 3931 ret 3932 3933.Lseal_sse_128: 3934.cfi_restore_state 3935 movdqu .Lchacha20_consts(%rip),%xmm0 3936 movdqa %xmm0,%xmm1 3937 movdqa %xmm0,%xmm2 3938 movdqu 0(%r9),%xmm4 3939 movdqa %xmm4,%xmm5 3940 movdqa %xmm4,%xmm6 3941 movdqu 16(%r9),%xmm8 3942 movdqa %xmm8,%xmm9 3943 movdqa %xmm8,%xmm10 3944 movdqu 32(%r9),%xmm14 3945 movdqa %xmm14,%xmm12 3946 paddd .Lsse_inc(%rip),%xmm12 3947 movdqa %xmm12,%xmm13 3948 paddd .Lsse_inc(%rip),%xmm13 3949 movdqa %xmm4,%xmm7 3950 movdqa %xmm8,%xmm11 3951 movdqa %xmm12,%xmm15 3952 movq $10,%r10 3953 3954.Lseal_sse_128_rounds: 3955 paddd %xmm4,%xmm0 3956 pxor %xmm0,%xmm12 3957 pshufb .Lrol16(%rip),%xmm12 3958 paddd %xmm12,%xmm8 3959 pxor %xmm8,%xmm4 3960 movdqa %xmm4,%xmm3 3961 pslld $12,%xmm3 3962 psrld $20,%xmm4 3963 pxor %xmm3,%xmm4 3964 paddd %xmm4,%xmm0 3965 pxor %xmm0,%xmm12 3966 pshufb .Lrol8(%rip),%xmm12 3967 paddd %xmm12,%xmm8 3968 pxor %xmm8,%xmm4 3969 movdqa %xmm4,%xmm3 3970 pslld $7,%xmm3 3971 psrld $25,%xmm4 3972 pxor %xmm3,%xmm4 3973.byte 102,15,58,15,228,4 3974.byte 102,69,15,58,15,192,8 3975.byte 102,69,15,58,15,228,12 3976 paddd %xmm5,%xmm1 3977 pxor %xmm1,%xmm13 3978 pshufb .Lrol16(%rip),%xmm13 3979 paddd %xmm13,%xmm9 3980 pxor %xmm9,%xmm5 3981 movdqa %xmm5,%xmm3 3982 pslld $12,%xmm3 3983 psrld $20,%xmm5 3984 pxor %xmm3,%xmm5 3985 paddd %xmm5,%xmm1 3986 pxor %xmm1,%xmm13 3987 pshufb .Lrol8(%rip),%xmm13 3988 paddd %xmm13,%xmm9 3989 pxor %xmm9,%xmm5 3990 movdqa %xmm5,%xmm3 3991 pslld $7,%xmm3 3992 psrld $25,%xmm5 3993 pxor %xmm3,%xmm5 3994.byte 102,15,58,15,237,4 3995.byte 102,69,15,58,15,201,8 3996.byte 102,69,15,58,15,237,12 3997 paddd %xmm6,%xmm2 3998 pxor %xmm2,%xmm14 3999 pshufb .Lrol16(%rip),%xmm14 4000 paddd %xmm14,%xmm10 4001 pxor %xmm10,%xmm6 4002 movdqa %xmm6,%xmm3 4003 pslld $12,%xmm3 4004 psrld $20,%xmm6 4005 pxor %xmm3,%xmm6 4006 paddd %xmm6,%xmm2 4007 pxor %xmm2,%xmm14 4008 pshufb .Lrol8(%rip),%xmm14 4009 paddd %xmm14,%xmm10 4010 pxor %xmm10,%xmm6 4011 movdqa %xmm6,%xmm3 4012 pslld $7,%xmm3 4013 psrld $25,%xmm6 4014 pxor %xmm3,%xmm6 4015.byte 102,15,58,15,246,4 4016.byte 102,69,15,58,15,210,8 4017.byte 102,69,15,58,15,246,12 4018 paddd %xmm4,%xmm0 4019 pxor %xmm0,%xmm12 4020 pshufb .Lrol16(%rip),%xmm12 4021 paddd %xmm12,%xmm8 4022 pxor %xmm8,%xmm4 4023 movdqa %xmm4,%xmm3 4024 pslld $12,%xmm3 4025 psrld $20,%xmm4 4026 pxor %xmm3,%xmm4 4027 paddd %xmm4,%xmm0 4028 pxor %xmm0,%xmm12 4029 pshufb .Lrol8(%rip),%xmm12 4030 paddd %xmm12,%xmm8 4031 pxor %xmm8,%xmm4 4032 movdqa %xmm4,%xmm3 4033 pslld $7,%xmm3 4034 psrld $25,%xmm4 4035 pxor %xmm3,%xmm4 4036.byte 102,15,58,15,228,12 4037.byte 102,69,15,58,15,192,8 4038.byte 102,69,15,58,15,228,4 4039 paddd %xmm5,%xmm1 4040 pxor %xmm1,%xmm13 4041 pshufb .Lrol16(%rip),%xmm13 4042 paddd %xmm13,%xmm9 4043 pxor %xmm9,%xmm5 4044 movdqa %xmm5,%xmm3 4045 pslld $12,%xmm3 4046 psrld $20,%xmm5 4047 pxor %xmm3,%xmm5 4048 paddd %xmm5,%xmm1 4049 pxor %xmm1,%xmm13 4050 pshufb .Lrol8(%rip),%xmm13 4051 paddd %xmm13,%xmm9 4052 pxor %xmm9,%xmm5 4053 movdqa %xmm5,%xmm3 4054 pslld $7,%xmm3 4055 psrld $25,%xmm5 4056 pxor %xmm3,%xmm5 4057.byte 102,15,58,15,237,12 4058.byte 102,69,15,58,15,201,8 4059.byte 102,69,15,58,15,237,4 4060 paddd %xmm6,%xmm2 4061 pxor %xmm2,%xmm14 4062 pshufb .Lrol16(%rip),%xmm14 4063 paddd %xmm14,%xmm10 4064 pxor %xmm10,%xmm6 4065 movdqa %xmm6,%xmm3 4066 pslld $12,%xmm3 4067 psrld $20,%xmm6 4068 pxor %xmm3,%xmm6 4069 paddd %xmm6,%xmm2 4070 pxor %xmm2,%xmm14 4071 pshufb .Lrol8(%rip),%xmm14 4072 paddd %xmm14,%xmm10 4073 pxor %xmm10,%xmm6 4074 movdqa %xmm6,%xmm3 4075 pslld $7,%xmm3 4076 psrld $25,%xmm6 4077 pxor %xmm3,%xmm6 4078.byte 102,15,58,15,246,12 4079.byte 102,69,15,58,15,210,8 4080.byte 102,69,15,58,15,246,4 4081 4082 decq %r10 4083 jnz .Lseal_sse_128_rounds 4084 paddd .Lchacha20_consts(%rip),%xmm0 4085 paddd .Lchacha20_consts(%rip),%xmm1 4086 paddd .Lchacha20_consts(%rip),%xmm2 4087 paddd %xmm7,%xmm4 4088 paddd %xmm7,%xmm5 4089 paddd %xmm7,%xmm6 4090 paddd %xmm11,%xmm8 4091 paddd %xmm11,%xmm9 4092 paddd %xmm15,%xmm12 4093 paddd .Lsse_inc(%rip),%xmm15 4094 paddd %xmm15,%xmm13 4095 4096 pand .Lclamp(%rip),%xmm2 4097 movdqa %xmm2,0+0(%rbp) 4098 movdqa %xmm6,0+16(%rbp) 4099 4100 movq %r8,%r8 4101 call poly_hash_ad_internal 4102 jmp .Lseal_sse_128_tail_xor 4103.size chacha20_poly1305_seal, .-chacha20_poly1305_seal 4104.cfi_endproc 4105 4106 4107.type chacha20_poly1305_open_avx2,@function 4108.align 64 4109chacha20_poly1305_open_avx2: 4110.cfi_startproc 4111 4112 4113.cfi_adjust_cfa_offset 8 4114.cfi_offset %rbp,-16 4115.cfi_adjust_cfa_offset 8 4116.cfi_offset %rbx,-24 4117.cfi_adjust_cfa_offset 8 4118.cfi_offset %r12,-32 4119.cfi_adjust_cfa_offset 8 4120.cfi_offset %r13,-40 4121.cfi_adjust_cfa_offset 8 4122.cfi_offset %r14,-48 4123.cfi_adjust_cfa_offset 8 4124.cfi_offset %r15,-56 4125.cfi_adjust_cfa_offset 8 4126.cfi_offset %r9,-64 4127.cfi_adjust_cfa_offset 288 + 32 4128 4129 vzeroupper 4130 vmovdqa .Lchacha20_consts(%rip),%ymm0 4131 vbroadcasti128 0(%r9),%ymm4 4132 vbroadcasti128 16(%r9),%ymm8 4133 vbroadcasti128 32(%r9),%ymm12 4134 vpaddd .Lavx2_init(%rip),%ymm12,%ymm12 4135 cmpq $192,%rbx 4136 jbe .Lopen_avx2_192 4137 cmpq $320,%rbx 4138 jbe .Lopen_avx2_320 4139 4140 vmovdqa %ymm4,0+64(%rbp) 4141 vmovdqa %ymm8,0+96(%rbp) 4142 vmovdqa %ymm12,0+160(%rbp) 4143 movq $10,%r10 4144.Lopen_avx2_init_rounds: 4145 vpaddd %ymm4,%ymm0,%ymm0 4146 vpxor %ymm0,%ymm12,%ymm12 4147 vpshufb .Lrol16(%rip),%ymm12,%ymm12 4148 vpaddd %ymm12,%ymm8,%ymm8 4149 vpxor %ymm8,%ymm4,%ymm4 4150 vpsrld $20,%ymm4,%ymm3 4151 vpslld $12,%ymm4,%ymm4 4152 vpxor %ymm3,%ymm4,%ymm4 4153 vpaddd %ymm4,%ymm0,%ymm0 4154 vpxor %ymm0,%ymm12,%ymm12 4155 vpshufb .Lrol8(%rip),%ymm12,%ymm12 4156 vpaddd %ymm12,%ymm8,%ymm8 4157 vpxor %ymm8,%ymm4,%ymm4 4158 vpslld $7,%ymm4,%ymm3 4159 vpsrld $25,%ymm4,%ymm4 4160 vpxor %ymm3,%ymm4,%ymm4 4161 vpalignr $12,%ymm12,%ymm12,%ymm12 4162 vpalignr $8,%ymm8,%ymm8,%ymm8 4163 vpalignr $4,%ymm4,%ymm4,%ymm4 4164 vpaddd %ymm4,%ymm0,%ymm0 4165 vpxor %ymm0,%ymm12,%ymm12 4166 vpshufb .Lrol16(%rip),%ymm12,%ymm12 4167 vpaddd %ymm12,%ymm8,%ymm8 4168 vpxor %ymm8,%ymm4,%ymm4 4169 vpsrld $20,%ymm4,%ymm3 4170 vpslld $12,%ymm4,%ymm4 4171 vpxor %ymm3,%ymm4,%ymm4 4172 vpaddd %ymm4,%ymm0,%ymm0 4173 vpxor %ymm0,%ymm12,%ymm12 4174 vpshufb .Lrol8(%rip),%ymm12,%ymm12 4175 vpaddd %ymm12,%ymm8,%ymm8 4176 vpxor %ymm8,%ymm4,%ymm4 4177 vpslld $7,%ymm4,%ymm3 4178 vpsrld $25,%ymm4,%ymm4 4179 vpxor %ymm3,%ymm4,%ymm4 4180 vpalignr $4,%ymm12,%ymm12,%ymm12 4181 vpalignr $8,%ymm8,%ymm8,%ymm8 4182 vpalignr $12,%ymm4,%ymm4,%ymm4 4183 4184 decq %r10 4185 jne .Lopen_avx2_init_rounds 4186 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 4187 vpaddd 0+64(%rbp),%ymm4,%ymm4 4188 vpaddd 0+96(%rbp),%ymm8,%ymm8 4189 vpaddd 0+160(%rbp),%ymm12,%ymm12 4190 4191 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 4192 4193 vpand .Lclamp(%rip),%ymm3,%ymm3 4194 vmovdqa %ymm3,0+0(%rbp) 4195 4196 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 4197 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 4198 4199 movq %r8,%r8 4200 call poly_hash_ad_internal 4201 4202 xorq %rcx,%rcx 4203.Lopen_avx2_init_hash: 4204 addq 0+0(%rsi,%rcx,1),%r10 4205 adcq 8+0(%rsi,%rcx,1),%r11 4206 adcq $1,%r12 4207 movq 0+0+0(%rbp),%rax 4208 movq %rax,%r15 4209 mulq %r10 4210 movq %rax,%r13 4211 movq %rdx,%r14 4212 movq 0+0+0(%rbp),%rax 4213 mulq %r11 4214 imulq %r12,%r15 4215 addq %rax,%r14 4216 adcq %rdx,%r15 4217 movq 8+0+0(%rbp),%rax 4218 movq %rax,%r9 4219 mulq %r10 4220 addq %rax,%r14 4221 adcq $0,%rdx 4222 movq %rdx,%r10 4223 movq 8+0+0(%rbp),%rax 4224 mulq %r11 4225 addq %rax,%r15 4226 adcq $0,%rdx 4227 imulq %r12,%r9 4228 addq %r10,%r15 4229 adcq %rdx,%r9 4230 movq %r13,%r10 4231 movq %r14,%r11 4232 movq %r15,%r12 4233 andq $3,%r12 4234 movq %r15,%r13 4235 andq $-4,%r13 4236 movq %r9,%r14 4237 shrdq $2,%r9,%r15 4238 shrq $2,%r9 4239 addq %r13,%r15 4240 adcq %r14,%r9 4241 addq %r15,%r10 4242 adcq %r9,%r11 4243 adcq $0,%r12 4244 4245 addq $16,%rcx 4246 cmpq $64,%rcx 4247 jne .Lopen_avx2_init_hash 4248 4249 vpxor 0(%rsi),%ymm0,%ymm0 4250 vpxor 32(%rsi),%ymm4,%ymm4 4251 4252 vmovdqu %ymm0,0(%rdi) 4253 vmovdqu %ymm4,32(%rdi) 4254 leaq 64(%rsi),%rsi 4255 leaq 64(%rdi),%rdi 4256 subq $64,%rbx 4257.Lopen_avx2_main_loop: 4258 4259 cmpq $512,%rbx 4260 jb .Lopen_avx2_main_loop_done 4261 vmovdqa .Lchacha20_consts(%rip),%ymm0 4262 vmovdqa 0+64(%rbp),%ymm4 4263 vmovdqa 0+96(%rbp),%ymm8 4264 vmovdqa %ymm0,%ymm1 4265 vmovdqa %ymm4,%ymm5 4266 vmovdqa %ymm8,%ymm9 4267 vmovdqa %ymm0,%ymm2 4268 vmovdqa %ymm4,%ymm6 4269 vmovdqa %ymm8,%ymm10 4270 vmovdqa %ymm0,%ymm3 4271 vmovdqa %ymm4,%ymm7 4272 vmovdqa %ymm8,%ymm11 4273 vmovdqa .Lavx2_inc(%rip),%ymm12 4274 vpaddd 0+160(%rbp),%ymm12,%ymm15 4275 vpaddd %ymm15,%ymm12,%ymm14 4276 vpaddd %ymm14,%ymm12,%ymm13 4277 vpaddd %ymm13,%ymm12,%ymm12 4278 vmovdqa %ymm15,0+256(%rbp) 4279 vmovdqa %ymm14,0+224(%rbp) 4280 vmovdqa %ymm13,0+192(%rbp) 4281 vmovdqa %ymm12,0+160(%rbp) 4282 4283 xorq %rcx,%rcx 4284.Lopen_avx2_main_loop_rounds: 4285 addq 0+0(%rsi,%rcx,1),%r10 4286 adcq 8+0(%rsi,%rcx,1),%r11 4287 adcq $1,%r12 4288 vmovdqa %ymm8,0+128(%rbp) 4289 vmovdqa .Lrol16(%rip),%ymm8 4290 vpaddd %ymm7,%ymm3,%ymm3 4291 vpaddd %ymm6,%ymm2,%ymm2 4292 vpaddd %ymm5,%ymm1,%ymm1 4293 vpaddd %ymm4,%ymm0,%ymm0 4294 vpxor %ymm3,%ymm15,%ymm15 4295 vpxor %ymm2,%ymm14,%ymm14 4296 vpxor %ymm1,%ymm13,%ymm13 4297 vpxor %ymm0,%ymm12,%ymm12 4298 movq 0+0+0(%rbp),%rdx 4299 movq %rdx,%r15 4300 mulxq %r10,%r13,%r14 4301 mulxq %r11,%rax,%rdx 4302 imulq %r12,%r15 4303 addq %rax,%r14 4304 adcq %rdx,%r15 4305 vpshufb %ymm8,%ymm15,%ymm15 4306 vpshufb %ymm8,%ymm14,%ymm14 4307 vpshufb %ymm8,%ymm13,%ymm13 4308 vpshufb %ymm8,%ymm12,%ymm12 4309 vpaddd %ymm15,%ymm11,%ymm11 4310 vpaddd %ymm14,%ymm10,%ymm10 4311 vpaddd %ymm13,%ymm9,%ymm9 4312 vpaddd 0+128(%rbp),%ymm12,%ymm8 4313 vpxor %ymm11,%ymm7,%ymm7 4314 movq 8+0+0(%rbp),%rdx 4315 mulxq %r10,%r10,%rax 4316 addq %r10,%r14 4317 mulxq %r11,%r11,%r9 4318 adcq %r11,%r15 4319 adcq $0,%r9 4320 imulq %r12,%rdx 4321 vpxor %ymm10,%ymm6,%ymm6 4322 vpxor %ymm9,%ymm5,%ymm5 4323 vpxor %ymm8,%ymm4,%ymm4 4324 vmovdqa %ymm8,0+128(%rbp) 4325 vpsrld $20,%ymm7,%ymm8 4326 vpslld $32-20,%ymm7,%ymm7 4327 vpxor %ymm8,%ymm7,%ymm7 4328 vpsrld $20,%ymm6,%ymm8 4329 vpslld $32-20,%ymm6,%ymm6 4330 vpxor %ymm8,%ymm6,%ymm6 4331 vpsrld $20,%ymm5,%ymm8 4332 vpslld $32-20,%ymm5,%ymm5 4333 addq %rax,%r15 4334 adcq %rdx,%r9 4335 vpxor %ymm8,%ymm5,%ymm5 4336 vpsrld $20,%ymm4,%ymm8 4337 vpslld $32-20,%ymm4,%ymm4 4338 vpxor %ymm8,%ymm4,%ymm4 4339 vmovdqa .Lrol8(%rip),%ymm8 4340 vpaddd %ymm7,%ymm3,%ymm3 4341 vpaddd %ymm6,%ymm2,%ymm2 4342 vpaddd %ymm5,%ymm1,%ymm1 4343 vpaddd %ymm4,%ymm0,%ymm0 4344 vpxor %ymm3,%ymm15,%ymm15 4345 movq %r13,%r10 4346 movq %r14,%r11 4347 movq %r15,%r12 4348 andq $3,%r12 4349 movq %r15,%r13 4350 andq $-4,%r13 4351 movq %r9,%r14 4352 shrdq $2,%r9,%r15 4353 shrq $2,%r9 4354 addq %r13,%r15 4355 adcq %r14,%r9 4356 addq %r15,%r10 4357 adcq %r9,%r11 4358 adcq $0,%r12 4359 vpxor %ymm2,%ymm14,%ymm14 4360 vpxor %ymm1,%ymm13,%ymm13 4361 vpxor %ymm0,%ymm12,%ymm12 4362 vpshufb %ymm8,%ymm15,%ymm15 4363 vpshufb %ymm8,%ymm14,%ymm14 4364 vpshufb %ymm8,%ymm13,%ymm13 4365 vpshufb %ymm8,%ymm12,%ymm12 4366 vpaddd %ymm15,%ymm11,%ymm11 4367 vpaddd %ymm14,%ymm10,%ymm10 4368 addq 0+16(%rsi,%rcx,1),%r10 4369 adcq 8+16(%rsi,%rcx,1),%r11 4370 adcq $1,%r12 4371 vpaddd %ymm13,%ymm9,%ymm9 4372 vpaddd 0+128(%rbp),%ymm12,%ymm8 4373 vpxor %ymm11,%ymm7,%ymm7 4374 vpxor %ymm10,%ymm6,%ymm6 4375 vpxor %ymm9,%ymm5,%ymm5 4376 vpxor %ymm8,%ymm4,%ymm4 4377 vmovdqa %ymm8,0+128(%rbp) 4378 vpsrld $25,%ymm7,%ymm8 4379 movq 0+0+0(%rbp),%rdx 4380 movq %rdx,%r15 4381 mulxq %r10,%r13,%r14 4382 mulxq %r11,%rax,%rdx 4383 imulq %r12,%r15 4384 addq %rax,%r14 4385 adcq %rdx,%r15 4386 vpslld $32-25,%ymm7,%ymm7 4387 vpxor %ymm8,%ymm7,%ymm7 4388 vpsrld $25,%ymm6,%ymm8 4389 vpslld $32-25,%ymm6,%ymm6 4390 vpxor %ymm8,%ymm6,%ymm6 4391 vpsrld $25,%ymm5,%ymm8 4392 vpslld $32-25,%ymm5,%ymm5 4393 vpxor %ymm8,%ymm5,%ymm5 4394 vpsrld $25,%ymm4,%ymm8 4395 vpslld $32-25,%ymm4,%ymm4 4396 vpxor %ymm8,%ymm4,%ymm4 4397 vmovdqa 0+128(%rbp),%ymm8 4398 vpalignr $4,%ymm7,%ymm7,%ymm7 4399 vpalignr $8,%ymm11,%ymm11,%ymm11 4400 vpalignr $12,%ymm15,%ymm15,%ymm15 4401 vpalignr $4,%ymm6,%ymm6,%ymm6 4402 vpalignr $8,%ymm10,%ymm10,%ymm10 4403 vpalignr $12,%ymm14,%ymm14,%ymm14 4404 movq 8+0+0(%rbp),%rdx 4405 mulxq %r10,%r10,%rax 4406 addq %r10,%r14 4407 mulxq %r11,%r11,%r9 4408 adcq %r11,%r15 4409 adcq $0,%r9 4410 imulq %r12,%rdx 4411 vpalignr $4,%ymm5,%ymm5,%ymm5 4412 vpalignr $8,%ymm9,%ymm9,%ymm9 4413 vpalignr $12,%ymm13,%ymm13,%ymm13 4414 vpalignr $4,%ymm4,%ymm4,%ymm4 4415 vpalignr $8,%ymm8,%ymm8,%ymm8 4416 vpalignr $12,%ymm12,%ymm12,%ymm12 4417 vmovdqa %ymm8,0+128(%rbp) 4418 vmovdqa .Lrol16(%rip),%ymm8 4419 vpaddd %ymm7,%ymm3,%ymm3 4420 vpaddd %ymm6,%ymm2,%ymm2 4421 vpaddd %ymm5,%ymm1,%ymm1 4422 vpaddd %ymm4,%ymm0,%ymm0 4423 vpxor %ymm3,%ymm15,%ymm15 4424 vpxor %ymm2,%ymm14,%ymm14 4425 vpxor %ymm1,%ymm13,%ymm13 4426 vpxor %ymm0,%ymm12,%ymm12 4427 vpshufb %ymm8,%ymm15,%ymm15 4428 vpshufb %ymm8,%ymm14,%ymm14 4429 addq %rax,%r15 4430 adcq %rdx,%r9 4431 vpshufb %ymm8,%ymm13,%ymm13 4432 vpshufb %ymm8,%ymm12,%ymm12 4433 vpaddd %ymm15,%ymm11,%ymm11 4434 vpaddd %ymm14,%ymm10,%ymm10 4435 vpaddd %ymm13,%ymm9,%ymm9 4436 vpaddd 0+128(%rbp),%ymm12,%ymm8 4437 vpxor %ymm11,%ymm7,%ymm7 4438 vpxor %ymm10,%ymm6,%ymm6 4439 vpxor %ymm9,%ymm5,%ymm5 4440 movq %r13,%r10 4441 movq %r14,%r11 4442 movq %r15,%r12 4443 andq $3,%r12 4444 movq %r15,%r13 4445 andq $-4,%r13 4446 movq %r9,%r14 4447 shrdq $2,%r9,%r15 4448 shrq $2,%r9 4449 addq %r13,%r15 4450 adcq %r14,%r9 4451 addq %r15,%r10 4452 adcq %r9,%r11 4453 adcq $0,%r12 4454 vpxor %ymm8,%ymm4,%ymm4 4455 vmovdqa %ymm8,0+128(%rbp) 4456 vpsrld $20,%ymm7,%ymm8 4457 vpslld $32-20,%ymm7,%ymm7 4458 vpxor %ymm8,%ymm7,%ymm7 4459 vpsrld $20,%ymm6,%ymm8 4460 vpslld $32-20,%ymm6,%ymm6 4461 vpxor %ymm8,%ymm6,%ymm6 4462 addq 0+32(%rsi,%rcx,1),%r10 4463 adcq 8+32(%rsi,%rcx,1),%r11 4464 adcq $1,%r12 4465 4466 leaq 48(%rcx),%rcx 4467 vpsrld $20,%ymm5,%ymm8 4468 vpslld $32-20,%ymm5,%ymm5 4469 vpxor %ymm8,%ymm5,%ymm5 4470 vpsrld $20,%ymm4,%ymm8 4471 vpslld $32-20,%ymm4,%ymm4 4472 vpxor %ymm8,%ymm4,%ymm4 4473 vmovdqa .Lrol8(%rip),%ymm8 4474 vpaddd %ymm7,%ymm3,%ymm3 4475 vpaddd %ymm6,%ymm2,%ymm2 4476 vpaddd %ymm5,%ymm1,%ymm1 4477 vpaddd %ymm4,%ymm0,%ymm0 4478 vpxor %ymm3,%ymm15,%ymm15 4479 vpxor %ymm2,%ymm14,%ymm14 4480 vpxor %ymm1,%ymm13,%ymm13 4481 vpxor %ymm0,%ymm12,%ymm12 4482 vpshufb %ymm8,%ymm15,%ymm15 4483 vpshufb %ymm8,%ymm14,%ymm14 4484 vpshufb %ymm8,%ymm13,%ymm13 4485 movq 0+0+0(%rbp),%rdx 4486 movq %rdx,%r15 4487 mulxq %r10,%r13,%r14 4488 mulxq %r11,%rax,%rdx 4489 imulq %r12,%r15 4490 addq %rax,%r14 4491 adcq %rdx,%r15 4492 vpshufb %ymm8,%ymm12,%ymm12 4493 vpaddd %ymm15,%ymm11,%ymm11 4494 vpaddd %ymm14,%ymm10,%ymm10 4495 vpaddd %ymm13,%ymm9,%ymm9 4496 vpaddd 0+128(%rbp),%ymm12,%ymm8 4497 vpxor %ymm11,%ymm7,%ymm7 4498 vpxor %ymm10,%ymm6,%ymm6 4499 vpxor %ymm9,%ymm5,%ymm5 4500 movq 8+0+0(%rbp),%rdx 4501 mulxq %r10,%r10,%rax 4502 addq %r10,%r14 4503 mulxq %r11,%r11,%r9 4504 adcq %r11,%r15 4505 adcq $0,%r9 4506 imulq %r12,%rdx 4507 vpxor %ymm8,%ymm4,%ymm4 4508 vmovdqa %ymm8,0+128(%rbp) 4509 vpsrld $25,%ymm7,%ymm8 4510 vpslld $32-25,%ymm7,%ymm7 4511 vpxor %ymm8,%ymm7,%ymm7 4512 vpsrld $25,%ymm6,%ymm8 4513 vpslld $32-25,%ymm6,%ymm6 4514 vpxor %ymm8,%ymm6,%ymm6 4515 addq %rax,%r15 4516 adcq %rdx,%r9 4517 vpsrld $25,%ymm5,%ymm8 4518 vpslld $32-25,%ymm5,%ymm5 4519 vpxor %ymm8,%ymm5,%ymm5 4520 vpsrld $25,%ymm4,%ymm8 4521 vpslld $32-25,%ymm4,%ymm4 4522 vpxor %ymm8,%ymm4,%ymm4 4523 vmovdqa 0+128(%rbp),%ymm8 4524 vpalignr $12,%ymm7,%ymm7,%ymm7 4525 vpalignr $8,%ymm11,%ymm11,%ymm11 4526 vpalignr $4,%ymm15,%ymm15,%ymm15 4527 vpalignr $12,%ymm6,%ymm6,%ymm6 4528 vpalignr $8,%ymm10,%ymm10,%ymm10 4529 vpalignr $4,%ymm14,%ymm14,%ymm14 4530 vpalignr $12,%ymm5,%ymm5,%ymm5 4531 vpalignr $8,%ymm9,%ymm9,%ymm9 4532 vpalignr $4,%ymm13,%ymm13,%ymm13 4533 vpalignr $12,%ymm4,%ymm4,%ymm4 4534 vpalignr $8,%ymm8,%ymm8,%ymm8 4535 movq %r13,%r10 4536 movq %r14,%r11 4537 movq %r15,%r12 4538 andq $3,%r12 4539 movq %r15,%r13 4540 andq $-4,%r13 4541 movq %r9,%r14 4542 shrdq $2,%r9,%r15 4543 shrq $2,%r9 4544 addq %r13,%r15 4545 adcq %r14,%r9 4546 addq %r15,%r10 4547 adcq %r9,%r11 4548 adcq $0,%r12 4549 vpalignr $4,%ymm12,%ymm12,%ymm12 4550 4551 cmpq $60*8,%rcx 4552 jne .Lopen_avx2_main_loop_rounds 4553 vpaddd .Lchacha20_consts(%rip),%ymm3,%ymm3 4554 vpaddd 0+64(%rbp),%ymm7,%ymm7 4555 vpaddd 0+96(%rbp),%ymm11,%ymm11 4556 vpaddd 0+256(%rbp),%ymm15,%ymm15 4557 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 4558 vpaddd 0+64(%rbp),%ymm6,%ymm6 4559 vpaddd 0+96(%rbp),%ymm10,%ymm10 4560 vpaddd 0+224(%rbp),%ymm14,%ymm14 4561 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 4562 vpaddd 0+64(%rbp),%ymm5,%ymm5 4563 vpaddd 0+96(%rbp),%ymm9,%ymm9 4564 vpaddd 0+192(%rbp),%ymm13,%ymm13 4565 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 4566 vpaddd 0+64(%rbp),%ymm4,%ymm4 4567 vpaddd 0+96(%rbp),%ymm8,%ymm8 4568 vpaddd 0+160(%rbp),%ymm12,%ymm12 4569 4570 vmovdqa %ymm0,0+128(%rbp) 4571 addq 0+60*8(%rsi),%r10 4572 adcq 8+60*8(%rsi),%r11 4573 adcq $1,%r12 4574 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 4575 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 4576 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 4577 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 4578 vpxor 0+0(%rsi),%ymm0,%ymm0 4579 vpxor 32+0(%rsi),%ymm3,%ymm3 4580 vpxor 64+0(%rsi),%ymm7,%ymm7 4581 vpxor 96+0(%rsi),%ymm11,%ymm11 4582 vmovdqu %ymm0,0+0(%rdi) 4583 vmovdqu %ymm3,32+0(%rdi) 4584 vmovdqu %ymm7,64+0(%rdi) 4585 vmovdqu %ymm11,96+0(%rdi) 4586 4587 vmovdqa 0+128(%rbp),%ymm0 4588 movq 0+0+0(%rbp),%rax 4589 movq %rax,%r15 4590 mulq %r10 4591 movq %rax,%r13 4592 movq %rdx,%r14 4593 movq 0+0+0(%rbp),%rax 4594 mulq %r11 4595 imulq %r12,%r15 4596 addq %rax,%r14 4597 adcq %rdx,%r15 4598 movq 8+0+0(%rbp),%rax 4599 movq %rax,%r9 4600 mulq %r10 4601 addq %rax,%r14 4602 adcq $0,%rdx 4603 movq %rdx,%r10 4604 movq 8+0+0(%rbp),%rax 4605 mulq %r11 4606 addq %rax,%r15 4607 adcq $0,%rdx 4608 imulq %r12,%r9 4609 addq %r10,%r15 4610 adcq %rdx,%r9 4611 movq %r13,%r10 4612 movq %r14,%r11 4613 movq %r15,%r12 4614 andq $3,%r12 4615 movq %r15,%r13 4616 andq $-4,%r13 4617 movq %r9,%r14 4618 shrdq $2,%r9,%r15 4619 shrq $2,%r9 4620 addq %r13,%r15 4621 adcq %r14,%r9 4622 addq %r15,%r10 4623 adcq %r9,%r11 4624 adcq $0,%r12 4625 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 4626 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 4627 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 4628 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 4629 vpxor 0+128(%rsi),%ymm3,%ymm3 4630 vpxor 32+128(%rsi),%ymm2,%ymm2 4631 vpxor 64+128(%rsi),%ymm6,%ymm6 4632 vpxor 96+128(%rsi),%ymm10,%ymm10 4633 vmovdqu %ymm3,0+128(%rdi) 4634 vmovdqu %ymm2,32+128(%rdi) 4635 vmovdqu %ymm6,64+128(%rdi) 4636 vmovdqu %ymm10,96+128(%rdi) 4637 addq 0+60*8+16(%rsi),%r10 4638 adcq 8+60*8+16(%rsi),%r11 4639 adcq $1,%r12 4640 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 4641 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 4642 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 4643 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 4644 vpxor 0+256(%rsi),%ymm3,%ymm3 4645 vpxor 32+256(%rsi),%ymm1,%ymm1 4646 vpxor 64+256(%rsi),%ymm5,%ymm5 4647 vpxor 96+256(%rsi),%ymm9,%ymm9 4648 vmovdqu %ymm3,0+256(%rdi) 4649 vmovdqu %ymm1,32+256(%rdi) 4650 vmovdqu %ymm5,64+256(%rdi) 4651 vmovdqu %ymm9,96+256(%rdi) 4652 movq 0+0+0(%rbp),%rax 4653 movq %rax,%r15 4654 mulq %r10 4655 movq %rax,%r13 4656 movq %rdx,%r14 4657 movq 0+0+0(%rbp),%rax 4658 mulq %r11 4659 imulq %r12,%r15 4660 addq %rax,%r14 4661 adcq %rdx,%r15 4662 movq 8+0+0(%rbp),%rax 4663 movq %rax,%r9 4664 mulq %r10 4665 addq %rax,%r14 4666 adcq $0,%rdx 4667 movq %rdx,%r10 4668 movq 8+0+0(%rbp),%rax 4669 mulq %r11 4670 addq %rax,%r15 4671 adcq $0,%rdx 4672 imulq %r12,%r9 4673 addq %r10,%r15 4674 adcq %rdx,%r9 4675 movq %r13,%r10 4676 movq %r14,%r11 4677 movq %r15,%r12 4678 andq $3,%r12 4679 movq %r15,%r13 4680 andq $-4,%r13 4681 movq %r9,%r14 4682 shrdq $2,%r9,%r15 4683 shrq $2,%r9 4684 addq %r13,%r15 4685 adcq %r14,%r9 4686 addq %r15,%r10 4687 adcq %r9,%r11 4688 adcq $0,%r12 4689 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 4690 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 4691 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 4692 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 4693 vpxor 0+384(%rsi),%ymm3,%ymm3 4694 vpxor 32+384(%rsi),%ymm0,%ymm0 4695 vpxor 64+384(%rsi),%ymm4,%ymm4 4696 vpxor 96+384(%rsi),%ymm8,%ymm8 4697 vmovdqu %ymm3,0+384(%rdi) 4698 vmovdqu %ymm0,32+384(%rdi) 4699 vmovdqu %ymm4,64+384(%rdi) 4700 vmovdqu %ymm8,96+384(%rdi) 4701 4702 leaq 512(%rsi),%rsi 4703 leaq 512(%rdi),%rdi 4704 subq $512,%rbx 4705 jmp .Lopen_avx2_main_loop 4706.Lopen_avx2_main_loop_done: 4707 testq %rbx,%rbx 4708 vzeroupper 4709 je .Lopen_sse_finalize 4710 4711 cmpq $384,%rbx 4712 ja .Lopen_avx2_tail_512 4713 cmpq $256,%rbx 4714 ja .Lopen_avx2_tail_384 4715 cmpq $128,%rbx 4716 ja .Lopen_avx2_tail_256 4717 vmovdqa .Lchacha20_consts(%rip),%ymm0 4718 vmovdqa 0+64(%rbp),%ymm4 4719 vmovdqa 0+96(%rbp),%ymm8 4720 vmovdqa .Lavx2_inc(%rip),%ymm12 4721 vpaddd 0+160(%rbp),%ymm12,%ymm12 4722 vmovdqa %ymm12,0+160(%rbp) 4723 4724 xorq %r8,%r8 4725 movq %rbx,%rcx 4726 andq $-16,%rcx 4727 testq %rcx,%rcx 4728 je .Lopen_avx2_tail_128_rounds 4729.Lopen_avx2_tail_128_rounds_and_x1hash: 4730 addq 0+0(%rsi,%r8,1),%r10 4731 adcq 8+0(%rsi,%r8,1),%r11 4732 adcq $1,%r12 4733 movq 0+0+0(%rbp),%rax 4734 movq %rax,%r15 4735 mulq %r10 4736 movq %rax,%r13 4737 movq %rdx,%r14 4738 movq 0+0+0(%rbp),%rax 4739 mulq %r11 4740 imulq %r12,%r15 4741 addq %rax,%r14 4742 adcq %rdx,%r15 4743 movq 8+0+0(%rbp),%rax 4744 movq %rax,%r9 4745 mulq %r10 4746 addq %rax,%r14 4747 adcq $0,%rdx 4748 movq %rdx,%r10 4749 movq 8+0+0(%rbp),%rax 4750 mulq %r11 4751 addq %rax,%r15 4752 adcq $0,%rdx 4753 imulq %r12,%r9 4754 addq %r10,%r15 4755 adcq %rdx,%r9 4756 movq %r13,%r10 4757 movq %r14,%r11 4758 movq %r15,%r12 4759 andq $3,%r12 4760 movq %r15,%r13 4761 andq $-4,%r13 4762 movq %r9,%r14 4763 shrdq $2,%r9,%r15 4764 shrq $2,%r9 4765 addq %r13,%r15 4766 adcq %r14,%r9 4767 addq %r15,%r10 4768 adcq %r9,%r11 4769 adcq $0,%r12 4770 4771.Lopen_avx2_tail_128_rounds: 4772 addq $16,%r8 4773 vpaddd %ymm4,%ymm0,%ymm0 4774 vpxor %ymm0,%ymm12,%ymm12 4775 vpshufb .Lrol16(%rip),%ymm12,%ymm12 4776 vpaddd %ymm12,%ymm8,%ymm8 4777 vpxor %ymm8,%ymm4,%ymm4 4778 vpsrld $20,%ymm4,%ymm3 4779 vpslld $12,%ymm4,%ymm4 4780 vpxor %ymm3,%ymm4,%ymm4 4781 vpaddd %ymm4,%ymm0,%ymm0 4782 vpxor %ymm0,%ymm12,%ymm12 4783 vpshufb .Lrol8(%rip),%ymm12,%ymm12 4784 vpaddd %ymm12,%ymm8,%ymm8 4785 vpxor %ymm8,%ymm4,%ymm4 4786 vpslld $7,%ymm4,%ymm3 4787 vpsrld $25,%ymm4,%ymm4 4788 vpxor %ymm3,%ymm4,%ymm4 4789 vpalignr $12,%ymm12,%ymm12,%ymm12 4790 vpalignr $8,%ymm8,%ymm8,%ymm8 4791 vpalignr $4,%ymm4,%ymm4,%ymm4 4792 vpaddd %ymm4,%ymm0,%ymm0 4793 vpxor %ymm0,%ymm12,%ymm12 4794 vpshufb .Lrol16(%rip),%ymm12,%ymm12 4795 vpaddd %ymm12,%ymm8,%ymm8 4796 vpxor %ymm8,%ymm4,%ymm4 4797 vpsrld $20,%ymm4,%ymm3 4798 vpslld $12,%ymm4,%ymm4 4799 vpxor %ymm3,%ymm4,%ymm4 4800 vpaddd %ymm4,%ymm0,%ymm0 4801 vpxor %ymm0,%ymm12,%ymm12 4802 vpshufb .Lrol8(%rip),%ymm12,%ymm12 4803 vpaddd %ymm12,%ymm8,%ymm8 4804 vpxor %ymm8,%ymm4,%ymm4 4805 vpslld $7,%ymm4,%ymm3 4806 vpsrld $25,%ymm4,%ymm4 4807 vpxor %ymm3,%ymm4,%ymm4 4808 vpalignr $4,%ymm12,%ymm12,%ymm12 4809 vpalignr $8,%ymm8,%ymm8,%ymm8 4810 vpalignr $12,%ymm4,%ymm4,%ymm4 4811 4812 cmpq %rcx,%r8 4813 jb .Lopen_avx2_tail_128_rounds_and_x1hash 4814 cmpq $160,%r8 4815 jne .Lopen_avx2_tail_128_rounds 4816 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 4817 vpaddd 0+64(%rbp),%ymm4,%ymm4 4818 vpaddd 0+96(%rbp),%ymm8,%ymm8 4819 vpaddd 0+160(%rbp),%ymm12,%ymm12 4820 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 4821 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 4822 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 4823 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 4824 vmovdqa %ymm3,%ymm8 4825 4826 jmp .Lopen_avx2_tail_128_xor 4827 4828.Lopen_avx2_tail_256: 4829 vmovdqa .Lchacha20_consts(%rip),%ymm0 4830 vmovdqa 0+64(%rbp),%ymm4 4831 vmovdqa 0+96(%rbp),%ymm8 4832 vmovdqa %ymm0,%ymm1 4833 vmovdqa %ymm4,%ymm5 4834 vmovdqa %ymm8,%ymm9 4835 vmovdqa .Lavx2_inc(%rip),%ymm12 4836 vpaddd 0+160(%rbp),%ymm12,%ymm13 4837 vpaddd %ymm13,%ymm12,%ymm12 4838 vmovdqa %ymm12,0+160(%rbp) 4839 vmovdqa %ymm13,0+192(%rbp) 4840 4841 movq %rbx,0+128(%rbp) 4842 movq %rbx,%rcx 4843 subq $128,%rcx 4844 shrq $4,%rcx 4845 movq $10,%r8 4846 cmpq $10,%rcx 4847 cmovgq %r8,%rcx 4848 movq %rsi,%rbx 4849 xorq %r8,%r8 4850.Lopen_avx2_tail_256_rounds_and_x1hash: 4851 addq 0+0(%rbx),%r10 4852 adcq 8+0(%rbx),%r11 4853 adcq $1,%r12 4854 movq 0+0+0(%rbp),%rdx 4855 movq %rdx,%r15 4856 mulxq %r10,%r13,%r14 4857 mulxq %r11,%rax,%rdx 4858 imulq %r12,%r15 4859 addq %rax,%r14 4860 adcq %rdx,%r15 4861 movq 8+0+0(%rbp),%rdx 4862 mulxq %r10,%r10,%rax 4863 addq %r10,%r14 4864 mulxq %r11,%r11,%r9 4865 adcq %r11,%r15 4866 adcq $0,%r9 4867 imulq %r12,%rdx 4868 addq %rax,%r15 4869 adcq %rdx,%r9 4870 movq %r13,%r10 4871 movq %r14,%r11 4872 movq %r15,%r12 4873 andq $3,%r12 4874 movq %r15,%r13 4875 andq $-4,%r13 4876 movq %r9,%r14 4877 shrdq $2,%r9,%r15 4878 shrq $2,%r9 4879 addq %r13,%r15 4880 adcq %r14,%r9 4881 addq %r15,%r10 4882 adcq %r9,%r11 4883 adcq $0,%r12 4884 4885 leaq 16(%rbx),%rbx 4886.Lopen_avx2_tail_256_rounds: 4887 vpaddd %ymm4,%ymm0,%ymm0 4888 vpxor %ymm0,%ymm12,%ymm12 4889 vpshufb .Lrol16(%rip),%ymm12,%ymm12 4890 vpaddd %ymm12,%ymm8,%ymm8 4891 vpxor %ymm8,%ymm4,%ymm4 4892 vpsrld $20,%ymm4,%ymm3 4893 vpslld $12,%ymm4,%ymm4 4894 vpxor %ymm3,%ymm4,%ymm4 4895 vpaddd %ymm4,%ymm0,%ymm0 4896 vpxor %ymm0,%ymm12,%ymm12 4897 vpshufb .Lrol8(%rip),%ymm12,%ymm12 4898 vpaddd %ymm12,%ymm8,%ymm8 4899 vpxor %ymm8,%ymm4,%ymm4 4900 vpslld $7,%ymm4,%ymm3 4901 vpsrld $25,%ymm4,%ymm4 4902 vpxor %ymm3,%ymm4,%ymm4 4903 vpalignr $12,%ymm12,%ymm12,%ymm12 4904 vpalignr $8,%ymm8,%ymm8,%ymm8 4905 vpalignr $4,%ymm4,%ymm4,%ymm4 4906 vpaddd %ymm5,%ymm1,%ymm1 4907 vpxor %ymm1,%ymm13,%ymm13 4908 vpshufb .Lrol16(%rip),%ymm13,%ymm13 4909 vpaddd %ymm13,%ymm9,%ymm9 4910 vpxor %ymm9,%ymm5,%ymm5 4911 vpsrld $20,%ymm5,%ymm3 4912 vpslld $12,%ymm5,%ymm5 4913 vpxor %ymm3,%ymm5,%ymm5 4914 vpaddd %ymm5,%ymm1,%ymm1 4915 vpxor %ymm1,%ymm13,%ymm13 4916 vpshufb .Lrol8(%rip),%ymm13,%ymm13 4917 vpaddd %ymm13,%ymm9,%ymm9 4918 vpxor %ymm9,%ymm5,%ymm5 4919 vpslld $7,%ymm5,%ymm3 4920 vpsrld $25,%ymm5,%ymm5 4921 vpxor %ymm3,%ymm5,%ymm5 4922 vpalignr $12,%ymm13,%ymm13,%ymm13 4923 vpalignr $8,%ymm9,%ymm9,%ymm9 4924 vpalignr $4,%ymm5,%ymm5,%ymm5 4925 4926 incq %r8 4927 vpaddd %ymm4,%ymm0,%ymm0 4928 vpxor %ymm0,%ymm12,%ymm12 4929 vpshufb .Lrol16(%rip),%ymm12,%ymm12 4930 vpaddd %ymm12,%ymm8,%ymm8 4931 vpxor %ymm8,%ymm4,%ymm4 4932 vpsrld $20,%ymm4,%ymm3 4933 vpslld $12,%ymm4,%ymm4 4934 vpxor %ymm3,%ymm4,%ymm4 4935 vpaddd %ymm4,%ymm0,%ymm0 4936 vpxor %ymm0,%ymm12,%ymm12 4937 vpshufb .Lrol8(%rip),%ymm12,%ymm12 4938 vpaddd %ymm12,%ymm8,%ymm8 4939 vpxor %ymm8,%ymm4,%ymm4 4940 vpslld $7,%ymm4,%ymm3 4941 vpsrld $25,%ymm4,%ymm4 4942 vpxor %ymm3,%ymm4,%ymm4 4943 vpalignr $4,%ymm12,%ymm12,%ymm12 4944 vpalignr $8,%ymm8,%ymm8,%ymm8 4945 vpalignr $12,%ymm4,%ymm4,%ymm4 4946 vpaddd %ymm5,%ymm1,%ymm1 4947 vpxor %ymm1,%ymm13,%ymm13 4948 vpshufb .Lrol16(%rip),%ymm13,%ymm13 4949 vpaddd %ymm13,%ymm9,%ymm9 4950 vpxor %ymm9,%ymm5,%ymm5 4951 vpsrld $20,%ymm5,%ymm3 4952 vpslld $12,%ymm5,%ymm5 4953 vpxor %ymm3,%ymm5,%ymm5 4954 vpaddd %ymm5,%ymm1,%ymm1 4955 vpxor %ymm1,%ymm13,%ymm13 4956 vpshufb .Lrol8(%rip),%ymm13,%ymm13 4957 vpaddd %ymm13,%ymm9,%ymm9 4958 vpxor %ymm9,%ymm5,%ymm5 4959 vpslld $7,%ymm5,%ymm3 4960 vpsrld $25,%ymm5,%ymm5 4961 vpxor %ymm3,%ymm5,%ymm5 4962 vpalignr $4,%ymm13,%ymm13,%ymm13 4963 vpalignr $8,%ymm9,%ymm9,%ymm9 4964 vpalignr $12,%ymm5,%ymm5,%ymm5 4965 vpaddd %ymm6,%ymm2,%ymm2 4966 vpxor %ymm2,%ymm14,%ymm14 4967 vpshufb .Lrol16(%rip),%ymm14,%ymm14 4968 vpaddd %ymm14,%ymm10,%ymm10 4969 vpxor %ymm10,%ymm6,%ymm6 4970 vpsrld $20,%ymm6,%ymm3 4971 vpslld $12,%ymm6,%ymm6 4972 vpxor %ymm3,%ymm6,%ymm6 4973 vpaddd %ymm6,%ymm2,%ymm2 4974 vpxor %ymm2,%ymm14,%ymm14 4975 vpshufb .Lrol8(%rip),%ymm14,%ymm14 4976 vpaddd %ymm14,%ymm10,%ymm10 4977 vpxor %ymm10,%ymm6,%ymm6 4978 vpslld $7,%ymm6,%ymm3 4979 vpsrld $25,%ymm6,%ymm6 4980 vpxor %ymm3,%ymm6,%ymm6 4981 vpalignr $4,%ymm14,%ymm14,%ymm14 4982 vpalignr $8,%ymm10,%ymm10,%ymm10 4983 vpalignr $12,%ymm6,%ymm6,%ymm6 4984 4985 cmpq %rcx,%r8 4986 jb .Lopen_avx2_tail_256_rounds_and_x1hash 4987 cmpq $10,%r8 4988 jne .Lopen_avx2_tail_256_rounds 4989 movq %rbx,%r8 4990 subq %rsi,%rbx 4991 movq %rbx,%rcx 4992 movq 0+128(%rbp),%rbx 4993.Lopen_avx2_tail_256_hash: 4994 addq $16,%rcx 4995 cmpq %rbx,%rcx 4996 jg .Lopen_avx2_tail_256_done 4997 addq 0+0(%r8),%r10 4998 adcq 8+0(%r8),%r11 4999 adcq $1,%r12 5000 movq 0+0+0(%rbp),%rdx 5001 movq %rdx,%r15 5002 mulxq %r10,%r13,%r14 5003 mulxq %r11,%rax,%rdx 5004 imulq %r12,%r15 5005 addq %rax,%r14 5006 adcq %rdx,%r15 5007 movq 8+0+0(%rbp),%rdx 5008 mulxq %r10,%r10,%rax 5009 addq %r10,%r14 5010 mulxq %r11,%r11,%r9 5011 adcq %r11,%r15 5012 adcq $0,%r9 5013 imulq %r12,%rdx 5014 addq %rax,%r15 5015 adcq %rdx,%r9 5016 movq %r13,%r10 5017 movq %r14,%r11 5018 movq %r15,%r12 5019 andq $3,%r12 5020 movq %r15,%r13 5021 andq $-4,%r13 5022 movq %r9,%r14 5023 shrdq $2,%r9,%r15 5024 shrq $2,%r9 5025 addq %r13,%r15 5026 adcq %r14,%r9 5027 addq %r15,%r10 5028 adcq %r9,%r11 5029 adcq $0,%r12 5030 5031 leaq 16(%r8),%r8 5032 jmp .Lopen_avx2_tail_256_hash 5033.Lopen_avx2_tail_256_done: 5034 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 5035 vpaddd 0+64(%rbp),%ymm5,%ymm5 5036 vpaddd 0+96(%rbp),%ymm9,%ymm9 5037 vpaddd 0+192(%rbp),%ymm13,%ymm13 5038 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 5039 vpaddd 0+64(%rbp),%ymm4,%ymm4 5040 vpaddd 0+96(%rbp),%ymm8,%ymm8 5041 vpaddd 0+160(%rbp),%ymm12,%ymm12 5042 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5043 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5044 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5045 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5046 vpxor 0+0(%rsi),%ymm3,%ymm3 5047 vpxor 32+0(%rsi),%ymm1,%ymm1 5048 vpxor 64+0(%rsi),%ymm5,%ymm5 5049 vpxor 96+0(%rsi),%ymm9,%ymm9 5050 vmovdqu %ymm3,0+0(%rdi) 5051 vmovdqu %ymm1,32+0(%rdi) 5052 vmovdqu %ymm5,64+0(%rdi) 5053 vmovdqu %ymm9,96+0(%rdi) 5054 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5055 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5056 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5057 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5058 vmovdqa %ymm3,%ymm8 5059 5060 leaq 128(%rsi),%rsi 5061 leaq 128(%rdi),%rdi 5062 subq $128,%rbx 5063 jmp .Lopen_avx2_tail_128_xor 5064 5065.Lopen_avx2_tail_384: 5066 vmovdqa .Lchacha20_consts(%rip),%ymm0 5067 vmovdqa 0+64(%rbp),%ymm4 5068 vmovdqa 0+96(%rbp),%ymm8 5069 vmovdqa %ymm0,%ymm1 5070 vmovdqa %ymm4,%ymm5 5071 vmovdqa %ymm8,%ymm9 5072 vmovdqa %ymm0,%ymm2 5073 vmovdqa %ymm4,%ymm6 5074 vmovdqa %ymm8,%ymm10 5075 vmovdqa .Lavx2_inc(%rip),%ymm12 5076 vpaddd 0+160(%rbp),%ymm12,%ymm14 5077 vpaddd %ymm14,%ymm12,%ymm13 5078 vpaddd %ymm13,%ymm12,%ymm12 5079 vmovdqa %ymm12,0+160(%rbp) 5080 vmovdqa %ymm13,0+192(%rbp) 5081 vmovdqa %ymm14,0+224(%rbp) 5082 5083 movq %rbx,0+128(%rbp) 5084 movq %rbx,%rcx 5085 subq $256,%rcx 5086 shrq $4,%rcx 5087 addq $6,%rcx 5088 movq $10,%r8 5089 cmpq $10,%rcx 5090 cmovgq %r8,%rcx 5091 movq %rsi,%rbx 5092 xorq %r8,%r8 5093.Lopen_avx2_tail_384_rounds_and_x2hash: 5094 addq 0+0(%rbx),%r10 5095 adcq 8+0(%rbx),%r11 5096 adcq $1,%r12 5097 movq 0+0+0(%rbp),%rdx 5098 movq %rdx,%r15 5099 mulxq %r10,%r13,%r14 5100 mulxq %r11,%rax,%rdx 5101 imulq %r12,%r15 5102 addq %rax,%r14 5103 adcq %rdx,%r15 5104 movq 8+0+0(%rbp),%rdx 5105 mulxq %r10,%r10,%rax 5106 addq %r10,%r14 5107 mulxq %r11,%r11,%r9 5108 adcq %r11,%r15 5109 adcq $0,%r9 5110 imulq %r12,%rdx 5111 addq %rax,%r15 5112 adcq %rdx,%r9 5113 movq %r13,%r10 5114 movq %r14,%r11 5115 movq %r15,%r12 5116 andq $3,%r12 5117 movq %r15,%r13 5118 andq $-4,%r13 5119 movq %r9,%r14 5120 shrdq $2,%r9,%r15 5121 shrq $2,%r9 5122 addq %r13,%r15 5123 adcq %r14,%r9 5124 addq %r15,%r10 5125 adcq %r9,%r11 5126 adcq $0,%r12 5127 5128 leaq 16(%rbx),%rbx 5129.Lopen_avx2_tail_384_rounds_and_x1hash: 5130 vpaddd %ymm6,%ymm2,%ymm2 5131 vpxor %ymm2,%ymm14,%ymm14 5132 vpshufb .Lrol16(%rip),%ymm14,%ymm14 5133 vpaddd %ymm14,%ymm10,%ymm10 5134 vpxor %ymm10,%ymm6,%ymm6 5135 vpsrld $20,%ymm6,%ymm3 5136 vpslld $12,%ymm6,%ymm6 5137 vpxor %ymm3,%ymm6,%ymm6 5138 vpaddd %ymm6,%ymm2,%ymm2 5139 vpxor %ymm2,%ymm14,%ymm14 5140 vpshufb .Lrol8(%rip),%ymm14,%ymm14 5141 vpaddd %ymm14,%ymm10,%ymm10 5142 vpxor %ymm10,%ymm6,%ymm6 5143 vpslld $7,%ymm6,%ymm3 5144 vpsrld $25,%ymm6,%ymm6 5145 vpxor %ymm3,%ymm6,%ymm6 5146 vpalignr $12,%ymm14,%ymm14,%ymm14 5147 vpalignr $8,%ymm10,%ymm10,%ymm10 5148 vpalignr $4,%ymm6,%ymm6,%ymm6 5149 vpaddd %ymm5,%ymm1,%ymm1 5150 vpxor %ymm1,%ymm13,%ymm13 5151 vpshufb .Lrol16(%rip),%ymm13,%ymm13 5152 vpaddd %ymm13,%ymm9,%ymm9 5153 vpxor %ymm9,%ymm5,%ymm5 5154 vpsrld $20,%ymm5,%ymm3 5155 vpslld $12,%ymm5,%ymm5 5156 vpxor %ymm3,%ymm5,%ymm5 5157 vpaddd %ymm5,%ymm1,%ymm1 5158 vpxor %ymm1,%ymm13,%ymm13 5159 vpshufb .Lrol8(%rip),%ymm13,%ymm13 5160 vpaddd %ymm13,%ymm9,%ymm9 5161 vpxor %ymm9,%ymm5,%ymm5 5162 vpslld $7,%ymm5,%ymm3 5163 vpsrld $25,%ymm5,%ymm5 5164 vpxor %ymm3,%ymm5,%ymm5 5165 vpalignr $12,%ymm13,%ymm13,%ymm13 5166 vpalignr $8,%ymm9,%ymm9,%ymm9 5167 vpalignr $4,%ymm5,%ymm5,%ymm5 5168 vpaddd %ymm4,%ymm0,%ymm0 5169 vpxor %ymm0,%ymm12,%ymm12 5170 vpshufb .Lrol16(%rip),%ymm12,%ymm12 5171 vpaddd %ymm12,%ymm8,%ymm8 5172 vpxor %ymm8,%ymm4,%ymm4 5173 vpsrld $20,%ymm4,%ymm3 5174 vpslld $12,%ymm4,%ymm4 5175 vpxor %ymm3,%ymm4,%ymm4 5176 vpaddd %ymm4,%ymm0,%ymm0 5177 vpxor %ymm0,%ymm12,%ymm12 5178 vpshufb .Lrol8(%rip),%ymm12,%ymm12 5179 vpaddd %ymm12,%ymm8,%ymm8 5180 vpxor %ymm8,%ymm4,%ymm4 5181 vpslld $7,%ymm4,%ymm3 5182 vpsrld $25,%ymm4,%ymm4 5183 vpxor %ymm3,%ymm4,%ymm4 5184 vpalignr $12,%ymm12,%ymm12,%ymm12 5185 vpalignr $8,%ymm8,%ymm8,%ymm8 5186 vpalignr $4,%ymm4,%ymm4,%ymm4 5187 addq 0+0(%rbx),%r10 5188 adcq 8+0(%rbx),%r11 5189 adcq $1,%r12 5190 movq 0+0+0(%rbp),%rax 5191 movq %rax,%r15 5192 mulq %r10 5193 movq %rax,%r13 5194 movq %rdx,%r14 5195 movq 0+0+0(%rbp),%rax 5196 mulq %r11 5197 imulq %r12,%r15 5198 addq %rax,%r14 5199 adcq %rdx,%r15 5200 movq 8+0+0(%rbp),%rax 5201 movq %rax,%r9 5202 mulq %r10 5203 addq %rax,%r14 5204 adcq $0,%rdx 5205 movq %rdx,%r10 5206 movq 8+0+0(%rbp),%rax 5207 mulq %r11 5208 addq %rax,%r15 5209 adcq $0,%rdx 5210 imulq %r12,%r9 5211 addq %r10,%r15 5212 adcq %rdx,%r9 5213 movq %r13,%r10 5214 movq %r14,%r11 5215 movq %r15,%r12 5216 andq $3,%r12 5217 movq %r15,%r13 5218 andq $-4,%r13 5219 movq %r9,%r14 5220 shrdq $2,%r9,%r15 5221 shrq $2,%r9 5222 addq %r13,%r15 5223 adcq %r14,%r9 5224 addq %r15,%r10 5225 adcq %r9,%r11 5226 adcq $0,%r12 5227 5228 leaq 16(%rbx),%rbx 5229 incq %r8 5230 vpaddd %ymm6,%ymm2,%ymm2 5231 vpxor %ymm2,%ymm14,%ymm14 5232 vpshufb .Lrol16(%rip),%ymm14,%ymm14 5233 vpaddd %ymm14,%ymm10,%ymm10 5234 vpxor %ymm10,%ymm6,%ymm6 5235 vpsrld $20,%ymm6,%ymm3 5236 vpslld $12,%ymm6,%ymm6 5237 vpxor %ymm3,%ymm6,%ymm6 5238 vpaddd %ymm6,%ymm2,%ymm2 5239 vpxor %ymm2,%ymm14,%ymm14 5240 vpshufb .Lrol8(%rip),%ymm14,%ymm14 5241 vpaddd %ymm14,%ymm10,%ymm10 5242 vpxor %ymm10,%ymm6,%ymm6 5243 vpslld $7,%ymm6,%ymm3 5244 vpsrld $25,%ymm6,%ymm6 5245 vpxor %ymm3,%ymm6,%ymm6 5246 vpalignr $4,%ymm14,%ymm14,%ymm14 5247 vpalignr $8,%ymm10,%ymm10,%ymm10 5248 vpalignr $12,%ymm6,%ymm6,%ymm6 5249 vpaddd %ymm5,%ymm1,%ymm1 5250 vpxor %ymm1,%ymm13,%ymm13 5251 vpshufb .Lrol16(%rip),%ymm13,%ymm13 5252 vpaddd %ymm13,%ymm9,%ymm9 5253 vpxor %ymm9,%ymm5,%ymm5 5254 vpsrld $20,%ymm5,%ymm3 5255 vpslld $12,%ymm5,%ymm5 5256 vpxor %ymm3,%ymm5,%ymm5 5257 vpaddd %ymm5,%ymm1,%ymm1 5258 vpxor %ymm1,%ymm13,%ymm13 5259 vpshufb .Lrol8(%rip),%ymm13,%ymm13 5260 vpaddd %ymm13,%ymm9,%ymm9 5261 vpxor %ymm9,%ymm5,%ymm5 5262 vpslld $7,%ymm5,%ymm3 5263 vpsrld $25,%ymm5,%ymm5 5264 vpxor %ymm3,%ymm5,%ymm5 5265 vpalignr $4,%ymm13,%ymm13,%ymm13 5266 vpalignr $8,%ymm9,%ymm9,%ymm9 5267 vpalignr $12,%ymm5,%ymm5,%ymm5 5268 vpaddd %ymm4,%ymm0,%ymm0 5269 vpxor %ymm0,%ymm12,%ymm12 5270 vpshufb .Lrol16(%rip),%ymm12,%ymm12 5271 vpaddd %ymm12,%ymm8,%ymm8 5272 vpxor %ymm8,%ymm4,%ymm4 5273 vpsrld $20,%ymm4,%ymm3 5274 vpslld $12,%ymm4,%ymm4 5275 vpxor %ymm3,%ymm4,%ymm4 5276 vpaddd %ymm4,%ymm0,%ymm0 5277 vpxor %ymm0,%ymm12,%ymm12 5278 vpshufb .Lrol8(%rip),%ymm12,%ymm12 5279 vpaddd %ymm12,%ymm8,%ymm8 5280 vpxor %ymm8,%ymm4,%ymm4 5281 vpslld $7,%ymm4,%ymm3 5282 vpsrld $25,%ymm4,%ymm4 5283 vpxor %ymm3,%ymm4,%ymm4 5284 vpalignr $4,%ymm12,%ymm12,%ymm12 5285 vpalignr $8,%ymm8,%ymm8,%ymm8 5286 vpalignr $12,%ymm4,%ymm4,%ymm4 5287 5288 cmpq %rcx,%r8 5289 jb .Lopen_avx2_tail_384_rounds_and_x2hash 5290 cmpq $10,%r8 5291 jne .Lopen_avx2_tail_384_rounds_and_x1hash 5292 movq %rbx,%r8 5293 subq %rsi,%rbx 5294 movq %rbx,%rcx 5295 movq 0+128(%rbp),%rbx 5296.Lopen_avx2_384_tail_hash: 5297 addq $16,%rcx 5298 cmpq %rbx,%rcx 5299 jg .Lopen_avx2_384_tail_done 5300 addq 0+0(%r8),%r10 5301 adcq 8+0(%r8),%r11 5302 adcq $1,%r12 5303 movq 0+0+0(%rbp),%rdx 5304 movq %rdx,%r15 5305 mulxq %r10,%r13,%r14 5306 mulxq %r11,%rax,%rdx 5307 imulq %r12,%r15 5308 addq %rax,%r14 5309 adcq %rdx,%r15 5310 movq 8+0+0(%rbp),%rdx 5311 mulxq %r10,%r10,%rax 5312 addq %r10,%r14 5313 mulxq %r11,%r11,%r9 5314 adcq %r11,%r15 5315 adcq $0,%r9 5316 imulq %r12,%rdx 5317 addq %rax,%r15 5318 adcq %rdx,%r9 5319 movq %r13,%r10 5320 movq %r14,%r11 5321 movq %r15,%r12 5322 andq $3,%r12 5323 movq %r15,%r13 5324 andq $-4,%r13 5325 movq %r9,%r14 5326 shrdq $2,%r9,%r15 5327 shrq $2,%r9 5328 addq %r13,%r15 5329 adcq %r14,%r9 5330 addq %r15,%r10 5331 adcq %r9,%r11 5332 adcq $0,%r12 5333 5334 leaq 16(%r8),%r8 5335 jmp .Lopen_avx2_384_tail_hash 5336.Lopen_avx2_384_tail_done: 5337 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 5338 vpaddd 0+64(%rbp),%ymm6,%ymm6 5339 vpaddd 0+96(%rbp),%ymm10,%ymm10 5340 vpaddd 0+224(%rbp),%ymm14,%ymm14 5341 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 5342 vpaddd 0+64(%rbp),%ymm5,%ymm5 5343 vpaddd 0+96(%rbp),%ymm9,%ymm9 5344 vpaddd 0+192(%rbp),%ymm13,%ymm13 5345 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 5346 vpaddd 0+64(%rbp),%ymm4,%ymm4 5347 vpaddd 0+96(%rbp),%ymm8,%ymm8 5348 vpaddd 0+160(%rbp),%ymm12,%ymm12 5349 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 5350 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 5351 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 5352 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 5353 vpxor 0+0(%rsi),%ymm3,%ymm3 5354 vpxor 32+0(%rsi),%ymm2,%ymm2 5355 vpxor 64+0(%rsi),%ymm6,%ymm6 5356 vpxor 96+0(%rsi),%ymm10,%ymm10 5357 vmovdqu %ymm3,0+0(%rdi) 5358 vmovdqu %ymm2,32+0(%rdi) 5359 vmovdqu %ymm6,64+0(%rdi) 5360 vmovdqu %ymm10,96+0(%rdi) 5361 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5362 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5363 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5364 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5365 vpxor 0+128(%rsi),%ymm3,%ymm3 5366 vpxor 32+128(%rsi),%ymm1,%ymm1 5367 vpxor 64+128(%rsi),%ymm5,%ymm5 5368 vpxor 96+128(%rsi),%ymm9,%ymm9 5369 vmovdqu %ymm3,0+128(%rdi) 5370 vmovdqu %ymm1,32+128(%rdi) 5371 vmovdqu %ymm5,64+128(%rdi) 5372 vmovdqu %ymm9,96+128(%rdi) 5373 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5374 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5375 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5376 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5377 vmovdqa %ymm3,%ymm8 5378 5379 leaq 256(%rsi),%rsi 5380 leaq 256(%rdi),%rdi 5381 subq $256,%rbx 5382 jmp .Lopen_avx2_tail_128_xor 5383 5384.Lopen_avx2_tail_512: 5385 vmovdqa .Lchacha20_consts(%rip),%ymm0 5386 vmovdqa 0+64(%rbp),%ymm4 5387 vmovdqa 0+96(%rbp),%ymm8 5388 vmovdqa %ymm0,%ymm1 5389 vmovdqa %ymm4,%ymm5 5390 vmovdqa %ymm8,%ymm9 5391 vmovdqa %ymm0,%ymm2 5392 vmovdqa %ymm4,%ymm6 5393 vmovdqa %ymm8,%ymm10 5394 vmovdqa %ymm0,%ymm3 5395 vmovdqa %ymm4,%ymm7 5396 vmovdqa %ymm8,%ymm11 5397 vmovdqa .Lavx2_inc(%rip),%ymm12 5398 vpaddd 0+160(%rbp),%ymm12,%ymm15 5399 vpaddd %ymm15,%ymm12,%ymm14 5400 vpaddd %ymm14,%ymm12,%ymm13 5401 vpaddd %ymm13,%ymm12,%ymm12 5402 vmovdqa %ymm15,0+256(%rbp) 5403 vmovdqa %ymm14,0+224(%rbp) 5404 vmovdqa %ymm13,0+192(%rbp) 5405 vmovdqa %ymm12,0+160(%rbp) 5406 5407 xorq %rcx,%rcx 5408 movq %rsi,%r8 5409.Lopen_avx2_tail_512_rounds_and_x2hash: 5410 addq 0+0(%r8),%r10 5411 adcq 8+0(%r8),%r11 5412 adcq $1,%r12 5413 movq 0+0+0(%rbp),%rax 5414 movq %rax,%r15 5415 mulq %r10 5416 movq %rax,%r13 5417 movq %rdx,%r14 5418 movq 0+0+0(%rbp),%rax 5419 mulq %r11 5420 imulq %r12,%r15 5421 addq %rax,%r14 5422 adcq %rdx,%r15 5423 movq 8+0+0(%rbp),%rax 5424 movq %rax,%r9 5425 mulq %r10 5426 addq %rax,%r14 5427 adcq $0,%rdx 5428 movq %rdx,%r10 5429 movq 8+0+0(%rbp),%rax 5430 mulq %r11 5431 addq %rax,%r15 5432 adcq $0,%rdx 5433 imulq %r12,%r9 5434 addq %r10,%r15 5435 adcq %rdx,%r9 5436 movq %r13,%r10 5437 movq %r14,%r11 5438 movq %r15,%r12 5439 andq $3,%r12 5440 movq %r15,%r13 5441 andq $-4,%r13 5442 movq %r9,%r14 5443 shrdq $2,%r9,%r15 5444 shrq $2,%r9 5445 addq %r13,%r15 5446 adcq %r14,%r9 5447 addq %r15,%r10 5448 adcq %r9,%r11 5449 adcq $0,%r12 5450 5451 leaq 16(%r8),%r8 5452.Lopen_avx2_tail_512_rounds_and_x1hash: 5453 vmovdqa %ymm8,0+128(%rbp) 5454 vmovdqa .Lrol16(%rip),%ymm8 5455 vpaddd %ymm7,%ymm3,%ymm3 5456 vpaddd %ymm6,%ymm2,%ymm2 5457 vpaddd %ymm5,%ymm1,%ymm1 5458 vpaddd %ymm4,%ymm0,%ymm0 5459 vpxor %ymm3,%ymm15,%ymm15 5460 vpxor %ymm2,%ymm14,%ymm14 5461 vpxor %ymm1,%ymm13,%ymm13 5462 vpxor %ymm0,%ymm12,%ymm12 5463 vpshufb %ymm8,%ymm15,%ymm15 5464 vpshufb %ymm8,%ymm14,%ymm14 5465 vpshufb %ymm8,%ymm13,%ymm13 5466 vpshufb %ymm8,%ymm12,%ymm12 5467 vpaddd %ymm15,%ymm11,%ymm11 5468 vpaddd %ymm14,%ymm10,%ymm10 5469 vpaddd %ymm13,%ymm9,%ymm9 5470 vpaddd 0+128(%rbp),%ymm12,%ymm8 5471 vpxor %ymm11,%ymm7,%ymm7 5472 vpxor %ymm10,%ymm6,%ymm6 5473 vpxor %ymm9,%ymm5,%ymm5 5474 vpxor %ymm8,%ymm4,%ymm4 5475 vmovdqa %ymm8,0+128(%rbp) 5476 vpsrld $20,%ymm7,%ymm8 5477 vpslld $32-20,%ymm7,%ymm7 5478 vpxor %ymm8,%ymm7,%ymm7 5479 vpsrld $20,%ymm6,%ymm8 5480 vpslld $32-20,%ymm6,%ymm6 5481 vpxor %ymm8,%ymm6,%ymm6 5482 vpsrld $20,%ymm5,%ymm8 5483 vpslld $32-20,%ymm5,%ymm5 5484 vpxor %ymm8,%ymm5,%ymm5 5485 vpsrld $20,%ymm4,%ymm8 5486 vpslld $32-20,%ymm4,%ymm4 5487 vpxor %ymm8,%ymm4,%ymm4 5488 vmovdqa .Lrol8(%rip),%ymm8 5489 vpaddd %ymm7,%ymm3,%ymm3 5490 addq 0+0(%r8),%r10 5491 adcq 8+0(%r8),%r11 5492 adcq $1,%r12 5493 movq 0+0+0(%rbp),%rdx 5494 movq %rdx,%r15 5495 mulxq %r10,%r13,%r14 5496 mulxq %r11,%rax,%rdx 5497 imulq %r12,%r15 5498 addq %rax,%r14 5499 adcq %rdx,%r15 5500 movq 8+0+0(%rbp),%rdx 5501 mulxq %r10,%r10,%rax 5502 addq %r10,%r14 5503 mulxq %r11,%r11,%r9 5504 adcq %r11,%r15 5505 adcq $0,%r9 5506 imulq %r12,%rdx 5507 addq %rax,%r15 5508 adcq %rdx,%r9 5509 movq %r13,%r10 5510 movq %r14,%r11 5511 movq %r15,%r12 5512 andq $3,%r12 5513 movq %r15,%r13 5514 andq $-4,%r13 5515 movq %r9,%r14 5516 shrdq $2,%r9,%r15 5517 shrq $2,%r9 5518 addq %r13,%r15 5519 adcq %r14,%r9 5520 addq %r15,%r10 5521 adcq %r9,%r11 5522 adcq $0,%r12 5523 vpaddd %ymm6,%ymm2,%ymm2 5524 vpaddd %ymm5,%ymm1,%ymm1 5525 vpaddd %ymm4,%ymm0,%ymm0 5526 vpxor %ymm3,%ymm15,%ymm15 5527 vpxor %ymm2,%ymm14,%ymm14 5528 vpxor %ymm1,%ymm13,%ymm13 5529 vpxor %ymm0,%ymm12,%ymm12 5530 vpshufb %ymm8,%ymm15,%ymm15 5531 vpshufb %ymm8,%ymm14,%ymm14 5532 vpshufb %ymm8,%ymm13,%ymm13 5533 vpshufb %ymm8,%ymm12,%ymm12 5534 vpaddd %ymm15,%ymm11,%ymm11 5535 vpaddd %ymm14,%ymm10,%ymm10 5536 vpaddd %ymm13,%ymm9,%ymm9 5537 vpaddd 0+128(%rbp),%ymm12,%ymm8 5538 vpxor %ymm11,%ymm7,%ymm7 5539 vpxor %ymm10,%ymm6,%ymm6 5540 vpxor %ymm9,%ymm5,%ymm5 5541 vpxor %ymm8,%ymm4,%ymm4 5542 vmovdqa %ymm8,0+128(%rbp) 5543 vpsrld $25,%ymm7,%ymm8 5544 vpslld $32-25,%ymm7,%ymm7 5545 vpxor %ymm8,%ymm7,%ymm7 5546 vpsrld $25,%ymm6,%ymm8 5547 vpslld $32-25,%ymm6,%ymm6 5548 vpxor %ymm8,%ymm6,%ymm6 5549 vpsrld $25,%ymm5,%ymm8 5550 vpslld $32-25,%ymm5,%ymm5 5551 vpxor %ymm8,%ymm5,%ymm5 5552 vpsrld $25,%ymm4,%ymm8 5553 vpslld $32-25,%ymm4,%ymm4 5554 vpxor %ymm8,%ymm4,%ymm4 5555 vmovdqa 0+128(%rbp),%ymm8 5556 vpalignr $4,%ymm7,%ymm7,%ymm7 5557 vpalignr $8,%ymm11,%ymm11,%ymm11 5558 vpalignr $12,%ymm15,%ymm15,%ymm15 5559 vpalignr $4,%ymm6,%ymm6,%ymm6 5560 vpalignr $8,%ymm10,%ymm10,%ymm10 5561 vpalignr $12,%ymm14,%ymm14,%ymm14 5562 vpalignr $4,%ymm5,%ymm5,%ymm5 5563 vpalignr $8,%ymm9,%ymm9,%ymm9 5564 vpalignr $12,%ymm13,%ymm13,%ymm13 5565 vpalignr $4,%ymm4,%ymm4,%ymm4 5566 vpalignr $8,%ymm8,%ymm8,%ymm8 5567 vpalignr $12,%ymm12,%ymm12,%ymm12 5568 vmovdqa %ymm8,0+128(%rbp) 5569 vmovdqa .Lrol16(%rip),%ymm8 5570 vpaddd %ymm7,%ymm3,%ymm3 5571 addq 0+16(%r8),%r10 5572 adcq 8+16(%r8),%r11 5573 adcq $1,%r12 5574 movq 0+0+0(%rbp),%rdx 5575 movq %rdx,%r15 5576 mulxq %r10,%r13,%r14 5577 mulxq %r11,%rax,%rdx 5578 imulq %r12,%r15 5579 addq %rax,%r14 5580 adcq %rdx,%r15 5581 movq 8+0+0(%rbp),%rdx 5582 mulxq %r10,%r10,%rax 5583 addq %r10,%r14 5584 mulxq %r11,%r11,%r9 5585 adcq %r11,%r15 5586 adcq $0,%r9 5587 imulq %r12,%rdx 5588 addq %rax,%r15 5589 adcq %rdx,%r9 5590 movq %r13,%r10 5591 movq %r14,%r11 5592 movq %r15,%r12 5593 andq $3,%r12 5594 movq %r15,%r13 5595 andq $-4,%r13 5596 movq %r9,%r14 5597 shrdq $2,%r9,%r15 5598 shrq $2,%r9 5599 addq %r13,%r15 5600 adcq %r14,%r9 5601 addq %r15,%r10 5602 adcq %r9,%r11 5603 adcq $0,%r12 5604 5605 leaq 32(%r8),%r8 5606 vpaddd %ymm6,%ymm2,%ymm2 5607 vpaddd %ymm5,%ymm1,%ymm1 5608 vpaddd %ymm4,%ymm0,%ymm0 5609 vpxor %ymm3,%ymm15,%ymm15 5610 vpxor %ymm2,%ymm14,%ymm14 5611 vpxor %ymm1,%ymm13,%ymm13 5612 vpxor %ymm0,%ymm12,%ymm12 5613 vpshufb %ymm8,%ymm15,%ymm15 5614 vpshufb %ymm8,%ymm14,%ymm14 5615 vpshufb %ymm8,%ymm13,%ymm13 5616 vpshufb %ymm8,%ymm12,%ymm12 5617 vpaddd %ymm15,%ymm11,%ymm11 5618 vpaddd %ymm14,%ymm10,%ymm10 5619 vpaddd %ymm13,%ymm9,%ymm9 5620 vpaddd 0+128(%rbp),%ymm12,%ymm8 5621 vpxor %ymm11,%ymm7,%ymm7 5622 vpxor %ymm10,%ymm6,%ymm6 5623 vpxor %ymm9,%ymm5,%ymm5 5624 vpxor %ymm8,%ymm4,%ymm4 5625 vmovdqa %ymm8,0+128(%rbp) 5626 vpsrld $20,%ymm7,%ymm8 5627 vpslld $32-20,%ymm7,%ymm7 5628 vpxor %ymm8,%ymm7,%ymm7 5629 vpsrld $20,%ymm6,%ymm8 5630 vpslld $32-20,%ymm6,%ymm6 5631 vpxor %ymm8,%ymm6,%ymm6 5632 vpsrld $20,%ymm5,%ymm8 5633 vpslld $32-20,%ymm5,%ymm5 5634 vpxor %ymm8,%ymm5,%ymm5 5635 vpsrld $20,%ymm4,%ymm8 5636 vpslld $32-20,%ymm4,%ymm4 5637 vpxor %ymm8,%ymm4,%ymm4 5638 vmovdqa .Lrol8(%rip),%ymm8 5639 vpaddd %ymm7,%ymm3,%ymm3 5640 vpaddd %ymm6,%ymm2,%ymm2 5641 vpaddd %ymm5,%ymm1,%ymm1 5642 vpaddd %ymm4,%ymm0,%ymm0 5643 vpxor %ymm3,%ymm15,%ymm15 5644 vpxor %ymm2,%ymm14,%ymm14 5645 vpxor %ymm1,%ymm13,%ymm13 5646 vpxor %ymm0,%ymm12,%ymm12 5647 vpshufb %ymm8,%ymm15,%ymm15 5648 vpshufb %ymm8,%ymm14,%ymm14 5649 vpshufb %ymm8,%ymm13,%ymm13 5650 vpshufb %ymm8,%ymm12,%ymm12 5651 vpaddd %ymm15,%ymm11,%ymm11 5652 vpaddd %ymm14,%ymm10,%ymm10 5653 vpaddd %ymm13,%ymm9,%ymm9 5654 vpaddd 0+128(%rbp),%ymm12,%ymm8 5655 vpxor %ymm11,%ymm7,%ymm7 5656 vpxor %ymm10,%ymm6,%ymm6 5657 vpxor %ymm9,%ymm5,%ymm5 5658 vpxor %ymm8,%ymm4,%ymm4 5659 vmovdqa %ymm8,0+128(%rbp) 5660 vpsrld $25,%ymm7,%ymm8 5661 vpslld $32-25,%ymm7,%ymm7 5662 vpxor %ymm8,%ymm7,%ymm7 5663 vpsrld $25,%ymm6,%ymm8 5664 vpslld $32-25,%ymm6,%ymm6 5665 vpxor %ymm8,%ymm6,%ymm6 5666 vpsrld $25,%ymm5,%ymm8 5667 vpslld $32-25,%ymm5,%ymm5 5668 vpxor %ymm8,%ymm5,%ymm5 5669 vpsrld $25,%ymm4,%ymm8 5670 vpslld $32-25,%ymm4,%ymm4 5671 vpxor %ymm8,%ymm4,%ymm4 5672 vmovdqa 0+128(%rbp),%ymm8 5673 vpalignr $12,%ymm7,%ymm7,%ymm7 5674 vpalignr $8,%ymm11,%ymm11,%ymm11 5675 vpalignr $4,%ymm15,%ymm15,%ymm15 5676 vpalignr $12,%ymm6,%ymm6,%ymm6 5677 vpalignr $8,%ymm10,%ymm10,%ymm10 5678 vpalignr $4,%ymm14,%ymm14,%ymm14 5679 vpalignr $12,%ymm5,%ymm5,%ymm5 5680 vpalignr $8,%ymm9,%ymm9,%ymm9 5681 vpalignr $4,%ymm13,%ymm13,%ymm13 5682 vpalignr $12,%ymm4,%ymm4,%ymm4 5683 vpalignr $8,%ymm8,%ymm8,%ymm8 5684 vpalignr $4,%ymm12,%ymm12,%ymm12 5685 5686 incq %rcx 5687 cmpq $4,%rcx 5688 jl .Lopen_avx2_tail_512_rounds_and_x2hash 5689 cmpq $10,%rcx 5690 jne .Lopen_avx2_tail_512_rounds_and_x1hash 5691 movq %rbx,%rcx 5692 subq $384,%rcx 5693 andq $-16,%rcx 5694.Lopen_avx2_tail_512_hash: 5695 testq %rcx,%rcx 5696 je .Lopen_avx2_tail_512_done 5697 addq 0+0(%r8),%r10 5698 adcq 8+0(%r8),%r11 5699 adcq $1,%r12 5700 movq 0+0+0(%rbp),%rdx 5701 movq %rdx,%r15 5702 mulxq %r10,%r13,%r14 5703 mulxq %r11,%rax,%rdx 5704 imulq %r12,%r15 5705 addq %rax,%r14 5706 adcq %rdx,%r15 5707 movq 8+0+0(%rbp),%rdx 5708 mulxq %r10,%r10,%rax 5709 addq %r10,%r14 5710 mulxq %r11,%r11,%r9 5711 adcq %r11,%r15 5712 adcq $0,%r9 5713 imulq %r12,%rdx 5714 addq %rax,%r15 5715 adcq %rdx,%r9 5716 movq %r13,%r10 5717 movq %r14,%r11 5718 movq %r15,%r12 5719 andq $3,%r12 5720 movq %r15,%r13 5721 andq $-4,%r13 5722 movq %r9,%r14 5723 shrdq $2,%r9,%r15 5724 shrq $2,%r9 5725 addq %r13,%r15 5726 adcq %r14,%r9 5727 addq %r15,%r10 5728 adcq %r9,%r11 5729 adcq $0,%r12 5730 5731 leaq 16(%r8),%r8 5732 subq $16,%rcx 5733 jmp .Lopen_avx2_tail_512_hash 5734.Lopen_avx2_tail_512_done: 5735 vpaddd .Lchacha20_consts(%rip),%ymm3,%ymm3 5736 vpaddd 0+64(%rbp),%ymm7,%ymm7 5737 vpaddd 0+96(%rbp),%ymm11,%ymm11 5738 vpaddd 0+256(%rbp),%ymm15,%ymm15 5739 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 5740 vpaddd 0+64(%rbp),%ymm6,%ymm6 5741 vpaddd 0+96(%rbp),%ymm10,%ymm10 5742 vpaddd 0+224(%rbp),%ymm14,%ymm14 5743 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 5744 vpaddd 0+64(%rbp),%ymm5,%ymm5 5745 vpaddd 0+96(%rbp),%ymm9,%ymm9 5746 vpaddd 0+192(%rbp),%ymm13,%ymm13 5747 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 5748 vpaddd 0+64(%rbp),%ymm4,%ymm4 5749 vpaddd 0+96(%rbp),%ymm8,%ymm8 5750 vpaddd 0+160(%rbp),%ymm12,%ymm12 5751 5752 vmovdqa %ymm0,0+128(%rbp) 5753 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 5754 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 5755 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 5756 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 5757 vpxor 0+0(%rsi),%ymm0,%ymm0 5758 vpxor 32+0(%rsi),%ymm3,%ymm3 5759 vpxor 64+0(%rsi),%ymm7,%ymm7 5760 vpxor 96+0(%rsi),%ymm11,%ymm11 5761 vmovdqu %ymm0,0+0(%rdi) 5762 vmovdqu %ymm3,32+0(%rdi) 5763 vmovdqu %ymm7,64+0(%rdi) 5764 vmovdqu %ymm11,96+0(%rdi) 5765 5766 vmovdqa 0+128(%rbp),%ymm0 5767 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 5768 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 5769 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 5770 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 5771 vpxor 0+128(%rsi),%ymm3,%ymm3 5772 vpxor 32+128(%rsi),%ymm2,%ymm2 5773 vpxor 64+128(%rsi),%ymm6,%ymm6 5774 vpxor 96+128(%rsi),%ymm10,%ymm10 5775 vmovdqu %ymm3,0+128(%rdi) 5776 vmovdqu %ymm2,32+128(%rdi) 5777 vmovdqu %ymm6,64+128(%rdi) 5778 vmovdqu %ymm10,96+128(%rdi) 5779 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5780 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5781 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5782 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5783 vpxor 0+256(%rsi),%ymm3,%ymm3 5784 vpxor 32+256(%rsi),%ymm1,%ymm1 5785 vpxor 64+256(%rsi),%ymm5,%ymm5 5786 vpxor 96+256(%rsi),%ymm9,%ymm9 5787 vmovdqu %ymm3,0+256(%rdi) 5788 vmovdqu %ymm1,32+256(%rdi) 5789 vmovdqu %ymm5,64+256(%rdi) 5790 vmovdqu %ymm9,96+256(%rdi) 5791 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5792 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5793 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5794 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5795 vmovdqa %ymm3,%ymm8 5796 5797 leaq 384(%rsi),%rsi 5798 leaq 384(%rdi),%rdi 5799 subq $384,%rbx 5800.Lopen_avx2_tail_128_xor: 5801 cmpq $32,%rbx 5802 jb .Lopen_avx2_tail_32_xor 5803 subq $32,%rbx 5804 vpxor (%rsi),%ymm0,%ymm0 5805 vmovdqu %ymm0,(%rdi) 5806 leaq 32(%rsi),%rsi 5807 leaq 32(%rdi),%rdi 5808 vmovdqa %ymm4,%ymm0 5809 vmovdqa %ymm8,%ymm4 5810 vmovdqa %ymm12,%ymm8 5811 jmp .Lopen_avx2_tail_128_xor 5812.Lopen_avx2_tail_32_xor: 5813 cmpq $16,%rbx 5814 vmovdqa %xmm0,%xmm1 5815 jb .Lopen_avx2_exit 5816 subq $16,%rbx 5817 5818 vpxor (%rsi),%xmm0,%xmm1 5819 vmovdqu %xmm1,(%rdi) 5820 leaq 16(%rsi),%rsi 5821 leaq 16(%rdi),%rdi 5822 vperm2i128 $0x11,%ymm0,%ymm0,%ymm0 5823 vmovdqa %xmm0,%xmm1 5824.Lopen_avx2_exit: 5825 vzeroupper 5826 jmp .Lopen_sse_tail_16 5827 5828.Lopen_avx2_192: 5829 vmovdqa %ymm0,%ymm1 5830 vmovdqa %ymm0,%ymm2 5831 vmovdqa %ymm4,%ymm5 5832 vmovdqa %ymm4,%ymm6 5833 vmovdqa %ymm8,%ymm9 5834 vmovdqa %ymm8,%ymm10 5835 vpaddd .Lavx2_inc(%rip),%ymm12,%ymm13 5836 vmovdqa %ymm12,%ymm11 5837 vmovdqa %ymm13,%ymm15 5838 movq $10,%r10 5839.Lopen_avx2_192_rounds: 5840 vpaddd %ymm4,%ymm0,%ymm0 5841 vpxor %ymm0,%ymm12,%ymm12 5842 vpshufb .Lrol16(%rip),%ymm12,%ymm12 5843 vpaddd %ymm12,%ymm8,%ymm8 5844 vpxor %ymm8,%ymm4,%ymm4 5845 vpsrld $20,%ymm4,%ymm3 5846 vpslld $12,%ymm4,%ymm4 5847 vpxor %ymm3,%ymm4,%ymm4 5848 vpaddd %ymm4,%ymm0,%ymm0 5849 vpxor %ymm0,%ymm12,%ymm12 5850 vpshufb .Lrol8(%rip),%ymm12,%ymm12 5851 vpaddd %ymm12,%ymm8,%ymm8 5852 vpxor %ymm8,%ymm4,%ymm4 5853 vpslld $7,%ymm4,%ymm3 5854 vpsrld $25,%ymm4,%ymm4 5855 vpxor %ymm3,%ymm4,%ymm4 5856 vpalignr $12,%ymm12,%ymm12,%ymm12 5857 vpalignr $8,%ymm8,%ymm8,%ymm8 5858 vpalignr $4,%ymm4,%ymm4,%ymm4 5859 vpaddd %ymm5,%ymm1,%ymm1 5860 vpxor %ymm1,%ymm13,%ymm13 5861 vpshufb .Lrol16(%rip),%ymm13,%ymm13 5862 vpaddd %ymm13,%ymm9,%ymm9 5863 vpxor %ymm9,%ymm5,%ymm5 5864 vpsrld $20,%ymm5,%ymm3 5865 vpslld $12,%ymm5,%ymm5 5866 vpxor %ymm3,%ymm5,%ymm5 5867 vpaddd %ymm5,%ymm1,%ymm1 5868 vpxor %ymm1,%ymm13,%ymm13 5869 vpshufb .Lrol8(%rip),%ymm13,%ymm13 5870 vpaddd %ymm13,%ymm9,%ymm9 5871 vpxor %ymm9,%ymm5,%ymm5 5872 vpslld $7,%ymm5,%ymm3 5873 vpsrld $25,%ymm5,%ymm5 5874 vpxor %ymm3,%ymm5,%ymm5 5875 vpalignr $12,%ymm13,%ymm13,%ymm13 5876 vpalignr $8,%ymm9,%ymm9,%ymm9 5877 vpalignr $4,%ymm5,%ymm5,%ymm5 5878 vpaddd %ymm4,%ymm0,%ymm0 5879 vpxor %ymm0,%ymm12,%ymm12 5880 vpshufb .Lrol16(%rip),%ymm12,%ymm12 5881 vpaddd %ymm12,%ymm8,%ymm8 5882 vpxor %ymm8,%ymm4,%ymm4 5883 vpsrld $20,%ymm4,%ymm3 5884 vpslld $12,%ymm4,%ymm4 5885 vpxor %ymm3,%ymm4,%ymm4 5886 vpaddd %ymm4,%ymm0,%ymm0 5887 vpxor %ymm0,%ymm12,%ymm12 5888 vpshufb .Lrol8(%rip),%ymm12,%ymm12 5889 vpaddd %ymm12,%ymm8,%ymm8 5890 vpxor %ymm8,%ymm4,%ymm4 5891 vpslld $7,%ymm4,%ymm3 5892 vpsrld $25,%ymm4,%ymm4 5893 vpxor %ymm3,%ymm4,%ymm4 5894 vpalignr $4,%ymm12,%ymm12,%ymm12 5895 vpalignr $8,%ymm8,%ymm8,%ymm8 5896 vpalignr $12,%ymm4,%ymm4,%ymm4 5897 vpaddd %ymm5,%ymm1,%ymm1 5898 vpxor %ymm1,%ymm13,%ymm13 5899 vpshufb .Lrol16(%rip),%ymm13,%ymm13 5900 vpaddd %ymm13,%ymm9,%ymm9 5901 vpxor %ymm9,%ymm5,%ymm5 5902 vpsrld $20,%ymm5,%ymm3 5903 vpslld $12,%ymm5,%ymm5 5904 vpxor %ymm3,%ymm5,%ymm5 5905 vpaddd %ymm5,%ymm1,%ymm1 5906 vpxor %ymm1,%ymm13,%ymm13 5907 vpshufb .Lrol8(%rip),%ymm13,%ymm13 5908 vpaddd %ymm13,%ymm9,%ymm9 5909 vpxor %ymm9,%ymm5,%ymm5 5910 vpslld $7,%ymm5,%ymm3 5911 vpsrld $25,%ymm5,%ymm5 5912 vpxor %ymm3,%ymm5,%ymm5 5913 vpalignr $4,%ymm13,%ymm13,%ymm13 5914 vpalignr $8,%ymm9,%ymm9,%ymm9 5915 vpalignr $12,%ymm5,%ymm5,%ymm5 5916 5917 decq %r10 5918 jne .Lopen_avx2_192_rounds 5919 vpaddd %ymm2,%ymm0,%ymm0 5920 vpaddd %ymm2,%ymm1,%ymm1 5921 vpaddd %ymm6,%ymm4,%ymm4 5922 vpaddd %ymm6,%ymm5,%ymm5 5923 vpaddd %ymm10,%ymm8,%ymm8 5924 vpaddd %ymm10,%ymm9,%ymm9 5925 vpaddd %ymm11,%ymm12,%ymm12 5926 vpaddd %ymm15,%ymm13,%ymm13 5927 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 5928 5929 vpand .Lclamp(%rip),%ymm3,%ymm3 5930 vmovdqa %ymm3,0+0(%rbp) 5931 5932 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 5933 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 5934 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 5935 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 5936 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 5937 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 5938.Lopen_avx2_short: 5939 movq %r8,%r8 5940 call poly_hash_ad_internal 5941.Lopen_avx2_short_hash_and_xor_loop: 5942 cmpq $32,%rbx 5943 jb .Lopen_avx2_short_tail_32 5944 subq $32,%rbx 5945 addq 0+0(%rsi),%r10 5946 adcq 8+0(%rsi),%r11 5947 adcq $1,%r12 5948 movq 0+0+0(%rbp),%rax 5949 movq %rax,%r15 5950 mulq %r10 5951 movq %rax,%r13 5952 movq %rdx,%r14 5953 movq 0+0+0(%rbp),%rax 5954 mulq %r11 5955 imulq %r12,%r15 5956 addq %rax,%r14 5957 adcq %rdx,%r15 5958 movq 8+0+0(%rbp),%rax 5959 movq %rax,%r9 5960 mulq %r10 5961 addq %rax,%r14 5962 adcq $0,%rdx 5963 movq %rdx,%r10 5964 movq 8+0+0(%rbp),%rax 5965 mulq %r11 5966 addq %rax,%r15 5967 adcq $0,%rdx 5968 imulq %r12,%r9 5969 addq %r10,%r15 5970 adcq %rdx,%r9 5971 movq %r13,%r10 5972 movq %r14,%r11 5973 movq %r15,%r12 5974 andq $3,%r12 5975 movq %r15,%r13 5976 andq $-4,%r13 5977 movq %r9,%r14 5978 shrdq $2,%r9,%r15 5979 shrq $2,%r9 5980 addq %r13,%r15 5981 adcq %r14,%r9 5982 addq %r15,%r10 5983 adcq %r9,%r11 5984 adcq $0,%r12 5985 addq 0+16(%rsi),%r10 5986 adcq 8+16(%rsi),%r11 5987 adcq $1,%r12 5988 movq 0+0+0(%rbp),%rax 5989 movq %rax,%r15 5990 mulq %r10 5991 movq %rax,%r13 5992 movq %rdx,%r14 5993 movq 0+0+0(%rbp),%rax 5994 mulq %r11 5995 imulq %r12,%r15 5996 addq %rax,%r14 5997 adcq %rdx,%r15 5998 movq 8+0+0(%rbp),%rax 5999 movq %rax,%r9 6000 mulq %r10 6001 addq %rax,%r14 6002 adcq $0,%rdx 6003 movq %rdx,%r10 6004 movq 8+0+0(%rbp),%rax 6005 mulq %r11 6006 addq %rax,%r15 6007 adcq $0,%rdx 6008 imulq %r12,%r9 6009 addq %r10,%r15 6010 adcq %rdx,%r9 6011 movq %r13,%r10 6012 movq %r14,%r11 6013 movq %r15,%r12 6014 andq $3,%r12 6015 movq %r15,%r13 6016 andq $-4,%r13 6017 movq %r9,%r14 6018 shrdq $2,%r9,%r15 6019 shrq $2,%r9 6020 addq %r13,%r15 6021 adcq %r14,%r9 6022 addq %r15,%r10 6023 adcq %r9,%r11 6024 adcq $0,%r12 6025 6026 6027 vpxor (%rsi),%ymm0,%ymm0 6028 vmovdqu %ymm0,(%rdi) 6029 leaq 32(%rsi),%rsi 6030 leaq 32(%rdi),%rdi 6031 6032 vmovdqa %ymm4,%ymm0 6033 vmovdqa %ymm8,%ymm4 6034 vmovdqa %ymm12,%ymm8 6035 vmovdqa %ymm1,%ymm12 6036 vmovdqa %ymm5,%ymm1 6037 vmovdqa %ymm9,%ymm5 6038 vmovdqa %ymm13,%ymm9 6039 vmovdqa %ymm2,%ymm13 6040 vmovdqa %ymm6,%ymm2 6041 jmp .Lopen_avx2_short_hash_and_xor_loop 6042.Lopen_avx2_short_tail_32: 6043 cmpq $16,%rbx 6044 vmovdqa %xmm0,%xmm1 6045 jb .Lopen_avx2_short_tail_32_exit 6046 subq $16,%rbx 6047 addq 0+0(%rsi),%r10 6048 adcq 8+0(%rsi),%r11 6049 adcq $1,%r12 6050 movq 0+0+0(%rbp),%rax 6051 movq %rax,%r15 6052 mulq %r10 6053 movq %rax,%r13 6054 movq %rdx,%r14 6055 movq 0+0+0(%rbp),%rax 6056 mulq %r11 6057 imulq %r12,%r15 6058 addq %rax,%r14 6059 adcq %rdx,%r15 6060 movq 8+0+0(%rbp),%rax 6061 movq %rax,%r9 6062 mulq %r10 6063 addq %rax,%r14 6064 adcq $0,%rdx 6065 movq %rdx,%r10 6066 movq 8+0+0(%rbp),%rax 6067 mulq %r11 6068 addq %rax,%r15 6069 adcq $0,%rdx 6070 imulq %r12,%r9 6071 addq %r10,%r15 6072 adcq %rdx,%r9 6073 movq %r13,%r10 6074 movq %r14,%r11 6075 movq %r15,%r12 6076 andq $3,%r12 6077 movq %r15,%r13 6078 andq $-4,%r13 6079 movq %r9,%r14 6080 shrdq $2,%r9,%r15 6081 shrq $2,%r9 6082 addq %r13,%r15 6083 adcq %r14,%r9 6084 addq %r15,%r10 6085 adcq %r9,%r11 6086 adcq $0,%r12 6087 6088 vpxor (%rsi),%xmm0,%xmm3 6089 vmovdqu %xmm3,(%rdi) 6090 leaq 16(%rsi),%rsi 6091 leaq 16(%rdi),%rdi 6092 vextracti128 $1,%ymm0,%xmm1 6093.Lopen_avx2_short_tail_32_exit: 6094 vzeroupper 6095 jmp .Lopen_sse_tail_16 6096 6097.Lopen_avx2_320: 6098 vmovdqa %ymm0,%ymm1 6099 vmovdqa %ymm0,%ymm2 6100 vmovdqa %ymm4,%ymm5 6101 vmovdqa %ymm4,%ymm6 6102 vmovdqa %ymm8,%ymm9 6103 vmovdqa %ymm8,%ymm10 6104 vpaddd .Lavx2_inc(%rip),%ymm12,%ymm13 6105 vpaddd .Lavx2_inc(%rip),%ymm13,%ymm14 6106 vmovdqa %ymm4,%ymm7 6107 vmovdqa %ymm8,%ymm11 6108 vmovdqa %ymm12,0+160(%rbp) 6109 vmovdqa %ymm13,0+192(%rbp) 6110 vmovdqa %ymm14,0+224(%rbp) 6111 movq $10,%r10 6112.Lopen_avx2_320_rounds: 6113 vpaddd %ymm4,%ymm0,%ymm0 6114 vpxor %ymm0,%ymm12,%ymm12 6115 vpshufb .Lrol16(%rip),%ymm12,%ymm12 6116 vpaddd %ymm12,%ymm8,%ymm8 6117 vpxor %ymm8,%ymm4,%ymm4 6118 vpsrld $20,%ymm4,%ymm3 6119 vpslld $12,%ymm4,%ymm4 6120 vpxor %ymm3,%ymm4,%ymm4 6121 vpaddd %ymm4,%ymm0,%ymm0 6122 vpxor %ymm0,%ymm12,%ymm12 6123 vpshufb .Lrol8(%rip),%ymm12,%ymm12 6124 vpaddd %ymm12,%ymm8,%ymm8 6125 vpxor %ymm8,%ymm4,%ymm4 6126 vpslld $7,%ymm4,%ymm3 6127 vpsrld $25,%ymm4,%ymm4 6128 vpxor %ymm3,%ymm4,%ymm4 6129 vpalignr $12,%ymm12,%ymm12,%ymm12 6130 vpalignr $8,%ymm8,%ymm8,%ymm8 6131 vpalignr $4,%ymm4,%ymm4,%ymm4 6132 vpaddd %ymm5,%ymm1,%ymm1 6133 vpxor %ymm1,%ymm13,%ymm13 6134 vpshufb .Lrol16(%rip),%ymm13,%ymm13 6135 vpaddd %ymm13,%ymm9,%ymm9 6136 vpxor %ymm9,%ymm5,%ymm5 6137 vpsrld $20,%ymm5,%ymm3 6138 vpslld $12,%ymm5,%ymm5 6139 vpxor %ymm3,%ymm5,%ymm5 6140 vpaddd %ymm5,%ymm1,%ymm1 6141 vpxor %ymm1,%ymm13,%ymm13 6142 vpshufb .Lrol8(%rip),%ymm13,%ymm13 6143 vpaddd %ymm13,%ymm9,%ymm9 6144 vpxor %ymm9,%ymm5,%ymm5 6145 vpslld $7,%ymm5,%ymm3 6146 vpsrld $25,%ymm5,%ymm5 6147 vpxor %ymm3,%ymm5,%ymm5 6148 vpalignr $12,%ymm13,%ymm13,%ymm13 6149 vpalignr $8,%ymm9,%ymm9,%ymm9 6150 vpalignr $4,%ymm5,%ymm5,%ymm5 6151 vpaddd %ymm6,%ymm2,%ymm2 6152 vpxor %ymm2,%ymm14,%ymm14 6153 vpshufb .Lrol16(%rip),%ymm14,%ymm14 6154 vpaddd %ymm14,%ymm10,%ymm10 6155 vpxor %ymm10,%ymm6,%ymm6 6156 vpsrld $20,%ymm6,%ymm3 6157 vpslld $12,%ymm6,%ymm6 6158 vpxor %ymm3,%ymm6,%ymm6 6159 vpaddd %ymm6,%ymm2,%ymm2 6160 vpxor %ymm2,%ymm14,%ymm14 6161 vpshufb .Lrol8(%rip),%ymm14,%ymm14 6162 vpaddd %ymm14,%ymm10,%ymm10 6163 vpxor %ymm10,%ymm6,%ymm6 6164 vpslld $7,%ymm6,%ymm3 6165 vpsrld $25,%ymm6,%ymm6 6166 vpxor %ymm3,%ymm6,%ymm6 6167 vpalignr $12,%ymm14,%ymm14,%ymm14 6168 vpalignr $8,%ymm10,%ymm10,%ymm10 6169 vpalignr $4,%ymm6,%ymm6,%ymm6 6170 vpaddd %ymm4,%ymm0,%ymm0 6171 vpxor %ymm0,%ymm12,%ymm12 6172 vpshufb .Lrol16(%rip),%ymm12,%ymm12 6173 vpaddd %ymm12,%ymm8,%ymm8 6174 vpxor %ymm8,%ymm4,%ymm4 6175 vpsrld $20,%ymm4,%ymm3 6176 vpslld $12,%ymm4,%ymm4 6177 vpxor %ymm3,%ymm4,%ymm4 6178 vpaddd %ymm4,%ymm0,%ymm0 6179 vpxor %ymm0,%ymm12,%ymm12 6180 vpshufb .Lrol8(%rip),%ymm12,%ymm12 6181 vpaddd %ymm12,%ymm8,%ymm8 6182 vpxor %ymm8,%ymm4,%ymm4 6183 vpslld $7,%ymm4,%ymm3 6184 vpsrld $25,%ymm4,%ymm4 6185 vpxor %ymm3,%ymm4,%ymm4 6186 vpalignr $4,%ymm12,%ymm12,%ymm12 6187 vpalignr $8,%ymm8,%ymm8,%ymm8 6188 vpalignr $12,%ymm4,%ymm4,%ymm4 6189 vpaddd %ymm5,%ymm1,%ymm1 6190 vpxor %ymm1,%ymm13,%ymm13 6191 vpshufb .Lrol16(%rip),%ymm13,%ymm13 6192 vpaddd %ymm13,%ymm9,%ymm9 6193 vpxor %ymm9,%ymm5,%ymm5 6194 vpsrld $20,%ymm5,%ymm3 6195 vpslld $12,%ymm5,%ymm5 6196 vpxor %ymm3,%ymm5,%ymm5 6197 vpaddd %ymm5,%ymm1,%ymm1 6198 vpxor %ymm1,%ymm13,%ymm13 6199 vpshufb .Lrol8(%rip),%ymm13,%ymm13 6200 vpaddd %ymm13,%ymm9,%ymm9 6201 vpxor %ymm9,%ymm5,%ymm5 6202 vpslld $7,%ymm5,%ymm3 6203 vpsrld $25,%ymm5,%ymm5 6204 vpxor %ymm3,%ymm5,%ymm5 6205 vpalignr $4,%ymm13,%ymm13,%ymm13 6206 vpalignr $8,%ymm9,%ymm9,%ymm9 6207 vpalignr $12,%ymm5,%ymm5,%ymm5 6208 vpaddd %ymm6,%ymm2,%ymm2 6209 vpxor %ymm2,%ymm14,%ymm14 6210 vpshufb .Lrol16(%rip),%ymm14,%ymm14 6211 vpaddd %ymm14,%ymm10,%ymm10 6212 vpxor %ymm10,%ymm6,%ymm6 6213 vpsrld $20,%ymm6,%ymm3 6214 vpslld $12,%ymm6,%ymm6 6215 vpxor %ymm3,%ymm6,%ymm6 6216 vpaddd %ymm6,%ymm2,%ymm2 6217 vpxor %ymm2,%ymm14,%ymm14 6218 vpshufb .Lrol8(%rip),%ymm14,%ymm14 6219 vpaddd %ymm14,%ymm10,%ymm10 6220 vpxor %ymm10,%ymm6,%ymm6 6221 vpslld $7,%ymm6,%ymm3 6222 vpsrld $25,%ymm6,%ymm6 6223 vpxor %ymm3,%ymm6,%ymm6 6224 vpalignr $4,%ymm14,%ymm14,%ymm14 6225 vpalignr $8,%ymm10,%ymm10,%ymm10 6226 vpalignr $12,%ymm6,%ymm6,%ymm6 6227 6228 decq %r10 6229 jne .Lopen_avx2_320_rounds 6230 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 6231 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 6232 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 6233 vpaddd %ymm7,%ymm4,%ymm4 6234 vpaddd %ymm7,%ymm5,%ymm5 6235 vpaddd %ymm7,%ymm6,%ymm6 6236 vpaddd %ymm11,%ymm8,%ymm8 6237 vpaddd %ymm11,%ymm9,%ymm9 6238 vpaddd %ymm11,%ymm10,%ymm10 6239 vpaddd 0+160(%rbp),%ymm12,%ymm12 6240 vpaddd 0+192(%rbp),%ymm13,%ymm13 6241 vpaddd 0+224(%rbp),%ymm14,%ymm14 6242 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 6243 6244 vpand .Lclamp(%rip),%ymm3,%ymm3 6245 vmovdqa %ymm3,0+0(%rbp) 6246 6247 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 6248 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 6249 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 6250 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 6251 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 6252 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 6253 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 6254 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 6255 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 6256 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 6257 jmp .Lopen_avx2_short 6258.size chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2 6259.cfi_endproc 6260 6261 6262.type chacha20_poly1305_seal_avx2,@function 6263.align 64 6264chacha20_poly1305_seal_avx2: 6265.cfi_startproc 6266 6267 6268.cfi_adjust_cfa_offset 8 6269.cfi_offset %rbp,-16 6270.cfi_adjust_cfa_offset 8 6271.cfi_offset %rbx,-24 6272.cfi_adjust_cfa_offset 8 6273.cfi_offset %r12,-32 6274.cfi_adjust_cfa_offset 8 6275.cfi_offset %r13,-40 6276.cfi_adjust_cfa_offset 8 6277.cfi_offset %r14,-48 6278.cfi_adjust_cfa_offset 8 6279.cfi_offset %r15,-56 6280.cfi_adjust_cfa_offset 8 6281.cfi_offset %r9,-64 6282.cfi_adjust_cfa_offset 288 + 32 6283 6284 vzeroupper 6285 vmovdqa .Lchacha20_consts(%rip),%ymm0 6286 vbroadcasti128 0(%r9),%ymm4 6287 vbroadcasti128 16(%r9),%ymm8 6288 vbroadcasti128 32(%r9),%ymm12 6289 vpaddd .Lavx2_init(%rip),%ymm12,%ymm12 6290 cmpq $192,%rbx 6291 jbe .Lseal_avx2_192 6292 cmpq $320,%rbx 6293 jbe .Lseal_avx2_320 6294 vmovdqa %ymm0,%ymm1 6295 vmovdqa %ymm0,%ymm2 6296 vmovdqa %ymm0,%ymm3 6297 vmovdqa %ymm4,%ymm5 6298 vmovdqa %ymm4,%ymm6 6299 vmovdqa %ymm4,%ymm7 6300 vmovdqa %ymm4,0+64(%rbp) 6301 vmovdqa %ymm8,%ymm9 6302 vmovdqa %ymm8,%ymm10 6303 vmovdqa %ymm8,%ymm11 6304 vmovdqa %ymm8,0+96(%rbp) 6305 vmovdqa %ymm12,%ymm15 6306 vpaddd .Lavx2_inc(%rip),%ymm15,%ymm14 6307 vpaddd .Lavx2_inc(%rip),%ymm14,%ymm13 6308 vpaddd .Lavx2_inc(%rip),%ymm13,%ymm12 6309 vmovdqa %ymm12,0+160(%rbp) 6310 vmovdqa %ymm13,0+192(%rbp) 6311 vmovdqa %ymm14,0+224(%rbp) 6312 vmovdqa %ymm15,0+256(%rbp) 6313 movq $10,%r10 6314.Lseal_avx2_init_rounds: 6315 vmovdqa %ymm8,0+128(%rbp) 6316 vmovdqa .Lrol16(%rip),%ymm8 6317 vpaddd %ymm7,%ymm3,%ymm3 6318 vpaddd %ymm6,%ymm2,%ymm2 6319 vpaddd %ymm5,%ymm1,%ymm1 6320 vpaddd %ymm4,%ymm0,%ymm0 6321 vpxor %ymm3,%ymm15,%ymm15 6322 vpxor %ymm2,%ymm14,%ymm14 6323 vpxor %ymm1,%ymm13,%ymm13 6324 vpxor %ymm0,%ymm12,%ymm12 6325 vpshufb %ymm8,%ymm15,%ymm15 6326 vpshufb %ymm8,%ymm14,%ymm14 6327 vpshufb %ymm8,%ymm13,%ymm13 6328 vpshufb %ymm8,%ymm12,%ymm12 6329 vpaddd %ymm15,%ymm11,%ymm11 6330 vpaddd %ymm14,%ymm10,%ymm10 6331 vpaddd %ymm13,%ymm9,%ymm9 6332 vpaddd 0+128(%rbp),%ymm12,%ymm8 6333 vpxor %ymm11,%ymm7,%ymm7 6334 vpxor %ymm10,%ymm6,%ymm6 6335 vpxor %ymm9,%ymm5,%ymm5 6336 vpxor %ymm8,%ymm4,%ymm4 6337 vmovdqa %ymm8,0+128(%rbp) 6338 vpsrld $20,%ymm7,%ymm8 6339 vpslld $32-20,%ymm7,%ymm7 6340 vpxor %ymm8,%ymm7,%ymm7 6341 vpsrld $20,%ymm6,%ymm8 6342 vpslld $32-20,%ymm6,%ymm6 6343 vpxor %ymm8,%ymm6,%ymm6 6344 vpsrld $20,%ymm5,%ymm8 6345 vpslld $32-20,%ymm5,%ymm5 6346 vpxor %ymm8,%ymm5,%ymm5 6347 vpsrld $20,%ymm4,%ymm8 6348 vpslld $32-20,%ymm4,%ymm4 6349 vpxor %ymm8,%ymm4,%ymm4 6350 vmovdqa .Lrol8(%rip),%ymm8 6351 vpaddd %ymm7,%ymm3,%ymm3 6352 vpaddd %ymm6,%ymm2,%ymm2 6353 vpaddd %ymm5,%ymm1,%ymm1 6354 vpaddd %ymm4,%ymm0,%ymm0 6355 vpxor %ymm3,%ymm15,%ymm15 6356 vpxor %ymm2,%ymm14,%ymm14 6357 vpxor %ymm1,%ymm13,%ymm13 6358 vpxor %ymm0,%ymm12,%ymm12 6359 vpshufb %ymm8,%ymm15,%ymm15 6360 vpshufb %ymm8,%ymm14,%ymm14 6361 vpshufb %ymm8,%ymm13,%ymm13 6362 vpshufb %ymm8,%ymm12,%ymm12 6363 vpaddd %ymm15,%ymm11,%ymm11 6364 vpaddd %ymm14,%ymm10,%ymm10 6365 vpaddd %ymm13,%ymm9,%ymm9 6366 vpaddd 0+128(%rbp),%ymm12,%ymm8 6367 vpxor %ymm11,%ymm7,%ymm7 6368 vpxor %ymm10,%ymm6,%ymm6 6369 vpxor %ymm9,%ymm5,%ymm5 6370 vpxor %ymm8,%ymm4,%ymm4 6371 vmovdqa %ymm8,0+128(%rbp) 6372 vpsrld $25,%ymm7,%ymm8 6373 vpslld $32-25,%ymm7,%ymm7 6374 vpxor %ymm8,%ymm7,%ymm7 6375 vpsrld $25,%ymm6,%ymm8 6376 vpslld $32-25,%ymm6,%ymm6 6377 vpxor %ymm8,%ymm6,%ymm6 6378 vpsrld $25,%ymm5,%ymm8 6379 vpslld $32-25,%ymm5,%ymm5 6380 vpxor %ymm8,%ymm5,%ymm5 6381 vpsrld $25,%ymm4,%ymm8 6382 vpslld $32-25,%ymm4,%ymm4 6383 vpxor %ymm8,%ymm4,%ymm4 6384 vmovdqa 0+128(%rbp),%ymm8 6385 vpalignr $4,%ymm7,%ymm7,%ymm7 6386 vpalignr $8,%ymm11,%ymm11,%ymm11 6387 vpalignr $12,%ymm15,%ymm15,%ymm15 6388 vpalignr $4,%ymm6,%ymm6,%ymm6 6389 vpalignr $8,%ymm10,%ymm10,%ymm10 6390 vpalignr $12,%ymm14,%ymm14,%ymm14 6391 vpalignr $4,%ymm5,%ymm5,%ymm5 6392 vpalignr $8,%ymm9,%ymm9,%ymm9 6393 vpalignr $12,%ymm13,%ymm13,%ymm13 6394 vpalignr $4,%ymm4,%ymm4,%ymm4 6395 vpalignr $8,%ymm8,%ymm8,%ymm8 6396 vpalignr $12,%ymm12,%ymm12,%ymm12 6397 vmovdqa %ymm8,0+128(%rbp) 6398 vmovdqa .Lrol16(%rip),%ymm8 6399 vpaddd %ymm7,%ymm3,%ymm3 6400 vpaddd %ymm6,%ymm2,%ymm2 6401 vpaddd %ymm5,%ymm1,%ymm1 6402 vpaddd %ymm4,%ymm0,%ymm0 6403 vpxor %ymm3,%ymm15,%ymm15 6404 vpxor %ymm2,%ymm14,%ymm14 6405 vpxor %ymm1,%ymm13,%ymm13 6406 vpxor %ymm0,%ymm12,%ymm12 6407 vpshufb %ymm8,%ymm15,%ymm15 6408 vpshufb %ymm8,%ymm14,%ymm14 6409 vpshufb %ymm8,%ymm13,%ymm13 6410 vpshufb %ymm8,%ymm12,%ymm12 6411 vpaddd %ymm15,%ymm11,%ymm11 6412 vpaddd %ymm14,%ymm10,%ymm10 6413 vpaddd %ymm13,%ymm9,%ymm9 6414 vpaddd 0+128(%rbp),%ymm12,%ymm8 6415 vpxor %ymm11,%ymm7,%ymm7 6416 vpxor %ymm10,%ymm6,%ymm6 6417 vpxor %ymm9,%ymm5,%ymm5 6418 vpxor %ymm8,%ymm4,%ymm4 6419 vmovdqa %ymm8,0+128(%rbp) 6420 vpsrld $20,%ymm7,%ymm8 6421 vpslld $32-20,%ymm7,%ymm7 6422 vpxor %ymm8,%ymm7,%ymm7 6423 vpsrld $20,%ymm6,%ymm8 6424 vpslld $32-20,%ymm6,%ymm6 6425 vpxor %ymm8,%ymm6,%ymm6 6426 vpsrld $20,%ymm5,%ymm8 6427 vpslld $32-20,%ymm5,%ymm5 6428 vpxor %ymm8,%ymm5,%ymm5 6429 vpsrld $20,%ymm4,%ymm8 6430 vpslld $32-20,%ymm4,%ymm4 6431 vpxor %ymm8,%ymm4,%ymm4 6432 vmovdqa .Lrol8(%rip),%ymm8 6433 vpaddd %ymm7,%ymm3,%ymm3 6434 vpaddd %ymm6,%ymm2,%ymm2 6435 vpaddd %ymm5,%ymm1,%ymm1 6436 vpaddd %ymm4,%ymm0,%ymm0 6437 vpxor %ymm3,%ymm15,%ymm15 6438 vpxor %ymm2,%ymm14,%ymm14 6439 vpxor %ymm1,%ymm13,%ymm13 6440 vpxor %ymm0,%ymm12,%ymm12 6441 vpshufb %ymm8,%ymm15,%ymm15 6442 vpshufb %ymm8,%ymm14,%ymm14 6443 vpshufb %ymm8,%ymm13,%ymm13 6444 vpshufb %ymm8,%ymm12,%ymm12 6445 vpaddd %ymm15,%ymm11,%ymm11 6446 vpaddd %ymm14,%ymm10,%ymm10 6447 vpaddd %ymm13,%ymm9,%ymm9 6448 vpaddd 0+128(%rbp),%ymm12,%ymm8 6449 vpxor %ymm11,%ymm7,%ymm7 6450 vpxor %ymm10,%ymm6,%ymm6 6451 vpxor %ymm9,%ymm5,%ymm5 6452 vpxor %ymm8,%ymm4,%ymm4 6453 vmovdqa %ymm8,0+128(%rbp) 6454 vpsrld $25,%ymm7,%ymm8 6455 vpslld $32-25,%ymm7,%ymm7 6456 vpxor %ymm8,%ymm7,%ymm7 6457 vpsrld $25,%ymm6,%ymm8 6458 vpslld $32-25,%ymm6,%ymm6 6459 vpxor %ymm8,%ymm6,%ymm6 6460 vpsrld $25,%ymm5,%ymm8 6461 vpslld $32-25,%ymm5,%ymm5 6462 vpxor %ymm8,%ymm5,%ymm5 6463 vpsrld $25,%ymm4,%ymm8 6464 vpslld $32-25,%ymm4,%ymm4 6465 vpxor %ymm8,%ymm4,%ymm4 6466 vmovdqa 0+128(%rbp),%ymm8 6467 vpalignr $12,%ymm7,%ymm7,%ymm7 6468 vpalignr $8,%ymm11,%ymm11,%ymm11 6469 vpalignr $4,%ymm15,%ymm15,%ymm15 6470 vpalignr $12,%ymm6,%ymm6,%ymm6 6471 vpalignr $8,%ymm10,%ymm10,%ymm10 6472 vpalignr $4,%ymm14,%ymm14,%ymm14 6473 vpalignr $12,%ymm5,%ymm5,%ymm5 6474 vpalignr $8,%ymm9,%ymm9,%ymm9 6475 vpalignr $4,%ymm13,%ymm13,%ymm13 6476 vpalignr $12,%ymm4,%ymm4,%ymm4 6477 vpalignr $8,%ymm8,%ymm8,%ymm8 6478 vpalignr $4,%ymm12,%ymm12,%ymm12 6479 6480 decq %r10 6481 jnz .Lseal_avx2_init_rounds 6482 vpaddd .Lchacha20_consts(%rip),%ymm3,%ymm3 6483 vpaddd 0+64(%rbp),%ymm7,%ymm7 6484 vpaddd 0+96(%rbp),%ymm11,%ymm11 6485 vpaddd 0+256(%rbp),%ymm15,%ymm15 6486 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 6487 vpaddd 0+64(%rbp),%ymm6,%ymm6 6488 vpaddd 0+96(%rbp),%ymm10,%ymm10 6489 vpaddd 0+224(%rbp),%ymm14,%ymm14 6490 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 6491 vpaddd 0+64(%rbp),%ymm5,%ymm5 6492 vpaddd 0+96(%rbp),%ymm9,%ymm9 6493 vpaddd 0+192(%rbp),%ymm13,%ymm13 6494 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 6495 vpaddd 0+64(%rbp),%ymm4,%ymm4 6496 vpaddd 0+96(%rbp),%ymm8,%ymm8 6497 vpaddd 0+160(%rbp),%ymm12,%ymm12 6498 6499 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 6500 vperm2i128 $0x02,%ymm3,%ymm7,%ymm15 6501 vperm2i128 $0x13,%ymm3,%ymm7,%ymm3 6502 vpand .Lclamp(%rip),%ymm15,%ymm15 6503 vmovdqa %ymm15,0+0(%rbp) 6504 movq %r8,%r8 6505 call poly_hash_ad_internal 6506 6507 vpxor 0(%rsi),%ymm3,%ymm3 6508 vpxor 32(%rsi),%ymm11,%ymm11 6509 vmovdqu %ymm3,0(%rdi) 6510 vmovdqu %ymm11,32(%rdi) 6511 vperm2i128 $0x02,%ymm2,%ymm6,%ymm15 6512 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 6513 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 6514 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 6515 vpxor 0+64(%rsi),%ymm15,%ymm15 6516 vpxor 32+64(%rsi),%ymm2,%ymm2 6517 vpxor 64+64(%rsi),%ymm6,%ymm6 6518 vpxor 96+64(%rsi),%ymm10,%ymm10 6519 vmovdqu %ymm15,0+64(%rdi) 6520 vmovdqu %ymm2,32+64(%rdi) 6521 vmovdqu %ymm6,64+64(%rdi) 6522 vmovdqu %ymm10,96+64(%rdi) 6523 vperm2i128 $0x02,%ymm1,%ymm5,%ymm15 6524 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 6525 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 6526 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 6527 vpxor 0+192(%rsi),%ymm15,%ymm15 6528 vpxor 32+192(%rsi),%ymm1,%ymm1 6529 vpxor 64+192(%rsi),%ymm5,%ymm5 6530 vpxor 96+192(%rsi),%ymm9,%ymm9 6531 vmovdqu %ymm15,0+192(%rdi) 6532 vmovdqu %ymm1,32+192(%rdi) 6533 vmovdqu %ymm5,64+192(%rdi) 6534 vmovdqu %ymm9,96+192(%rdi) 6535 vperm2i128 $0x13,%ymm0,%ymm4,%ymm15 6536 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 6537 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 6538 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 6539 vmovdqa %ymm15,%ymm8 6540 6541 leaq 320(%rsi),%rsi 6542 subq $320,%rbx 6543 movq $320,%rcx 6544 cmpq $128,%rbx 6545 jbe .Lseal_avx2_short_hash_remainder 6546 vpxor 0(%rsi),%ymm0,%ymm0 6547 vpxor 32(%rsi),%ymm4,%ymm4 6548 vpxor 64(%rsi),%ymm8,%ymm8 6549 vpxor 96(%rsi),%ymm12,%ymm12 6550 vmovdqu %ymm0,320(%rdi) 6551 vmovdqu %ymm4,352(%rdi) 6552 vmovdqu %ymm8,384(%rdi) 6553 vmovdqu %ymm12,416(%rdi) 6554 leaq 128(%rsi),%rsi 6555 subq $128,%rbx 6556 movq $8,%rcx 6557 movq $2,%r8 6558 cmpq $128,%rbx 6559 jbe .Lseal_avx2_tail_128 6560 cmpq $256,%rbx 6561 jbe .Lseal_avx2_tail_256 6562 cmpq $384,%rbx 6563 jbe .Lseal_avx2_tail_384 6564 cmpq $512,%rbx 6565 jbe .Lseal_avx2_tail_512 6566 vmovdqa .Lchacha20_consts(%rip),%ymm0 6567 vmovdqa 0+64(%rbp),%ymm4 6568 vmovdqa 0+96(%rbp),%ymm8 6569 vmovdqa %ymm0,%ymm1 6570 vmovdqa %ymm4,%ymm5 6571 vmovdqa %ymm8,%ymm9 6572 vmovdqa %ymm0,%ymm2 6573 vmovdqa %ymm4,%ymm6 6574 vmovdqa %ymm8,%ymm10 6575 vmovdqa %ymm0,%ymm3 6576 vmovdqa %ymm4,%ymm7 6577 vmovdqa %ymm8,%ymm11 6578 vmovdqa .Lavx2_inc(%rip),%ymm12 6579 vpaddd 0+160(%rbp),%ymm12,%ymm15 6580 vpaddd %ymm15,%ymm12,%ymm14 6581 vpaddd %ymm14,%ymm12,%ymm13 6582 vpaddd %ymm13,%ymm12,%ymm12 6583 vmovdqa %ymm15,0+256(%rbp) 6584 vmovdqa %ymm14,0+224(%rbp) 6585 vmovdqa %ymm13,0+192(%rbp) 6586 vmovdqa %ymm12,0+160(%rbp) 6587 vmovdqa %ymm8,0+128(%rbp) 6588 vmovdqa .Lrol16(%rip),%ymm8 6589 vpaddd %ymm7,%ymm3,%ymm3 6590 vpaddd %ymm6,%ymm2,%ymm2 6591 vpaddd %ymm5,%ymm1,%ymm1 6592 vpaddd %ymm4,%ymm0,%ymm0 6593 vpxor %ymm3,%ymm15,%ymm15 6594 vpxor %ymm2,%ymm14,%ymm14 6595 vpxor %ymm1,%ymm13,%ymm13 6596 vpxor %ymm0,%ymm12,%ymm12 6597 vpshufb %ymm8,%ymm15,%ymm15 6598 vpshufb %ymm8,%ymm14,%ymm14 6599 vpshufb %ymm8,%ymm13,%ymm13 6600 vpshufb %ymm8,%ymm12,%ymm12 6601 vpaddd %ymm15,%ymm11,%ymm11 6602 vpaddd %ymm14,%ymm10,%ymm10 6603 vpaddd %ymm13,%ymm9,%ymm9 6604 vpaddd 0+128(%rbp),%ymm12,%ymm8 6605 vpxor %ymm11,%ymm7,%ymm7 6606 vpxor %ymm10,%ymm6,%ymm6 6607 vpxor %ymm9,%ymm5,%ymm5 6608 vpxor %ymm8,%ymm4,%ymm4 6609 vmovdqa %ymm8,0+128(%rbp) 6610 vpsrld $20,%ymm7,%ymm8 6611 vpslld $32-20,%ymm7,%ymm7 6612 vpxor %ymm8,%ymm7,%ymm7 6613 vpsrld $20,%ymm6,%ymm8 6614 vpslld $32-20,%ymm6,%ymm6 6615 vpxor %ymm8,%ymm6,%ymm6 6616 vpsrld $20,%ymm5,%ymm8 6617 vpslld $32-20,%ymm5,%ymm5 6618 vpxor %ymm8,%ymm5,%ymm5 6619 vpsrld $20,%ymm4,%ymm8 6620 vpslld $32-20,%ymm4,%ymm4 6621 vpxor %ymm8,%ymm4,%ymm4 6622 vmovdqa .Lrol8(%rip),%ymm8 6623 vpaddd %ymm7,%ymm3,%ymm3 6624 vpaddd %ymm6,%ymm2,%ymm2 6625 vpaddd %ymm5,%ymm1,%ymm1 6626 vpaddd %ymm4,%ymm0,%ymm0 6627 vpxor %ymm3,%ymm15,%ymm15 6628 vpxor %ymm2,%ymm14,%ymm14 6629 vpxor %ymm1,%ymm13,%ymm13 6630 vpxor %ymm0,%ymm12,%ymm12 6631 vpshufb %ymm8,%ymm15,%ymm15 6632 vpshufb %ymm8,%ymm14,%ymm14 6633 vpshufb %ymm8,%ymm13,%ymm13 6634 vpshufb %ymm8,%ymm12,%ymm12 6635 vpaddd %ymm15,%ymm11,%ymm11 6636 vpaddd %ymm14,%ymm10,%ymm10 6637 vpaddd %ymm13,%ymm9,%ymm9 6638 vpaddd 0+128(%rbp),%ymm12,%ymm8 6639 vpxor %ymm11,%ymm7,%ymm7 6640 vpxor %ymm10,%ymm6,%ymm6 6641 vpxor %ymm9,%ymm5,%ymm5 6642 vpxor %ymm8,%ymm4,%ymm4 6643 vmovdqa %ymm8,0+128(%rbp) 6644 vpsrld $25,%ymm7,%ymm8 6645 vpslld $32-25,%ymm7,%ymm7 6646 vpxor %ymm8,%ymm7,%ymm7 6647 vpsrld $25,%ymm6,%ymm8 6648 vpslld $32-25,%ymm6,%ymm6 6649 vpxor %ymm8,%ymm6,%ymm6 6650 vpsrld $25,%ymm5,%ymm8 6651 vpslld $32-25,%ymm5,%ymm5 6652 vpxor %ymm8,%ymm5,%ymm5 6653 vpsrld $25,%ymm4,%ymm8 6654 vpslld $32-25,%ymm4,%ymm4 6655 vpxor %ymm8,%ymm4,%ymm4 6656 vmovdqa 0+128(%rbp),%ymm8 6657 vpalignr $4,%ymm7,%ymm7,%ymm7 6658 vpalignr $8,%ymm11,%ymm11,%ymm11 6659 vpalignr $12,%ymm15,%ymm15,%ymm15 6660 vpalignr $4,%ymm6,%ymm6,%ymm6 6661 vpalignr $8,%ymm10,%ymm10,%ymm10 6662 vpalignr $12,%ymm14,%ymm14,%ymm14 6663 vpalignr $4,%ymm5,%ymm5,%ymm5 6664 vpalignr $8,%ymm9,%ymm9,%ymm9 6665 vpalignr $12,%ymm13,%ymm13,%ymm13 6666 vpalignr $4,%ymm4,%ymm4,%ymm4 6667 vpalignr $8,%ymm8,%ymm8,%ymm8 6668 vpalignr $12,%ymm12,%ymm12,%ymm12 6669 vmovdqa %ymm8,0+128(%rbp) 6670 vmovdqa .Lrol16(%rip),%ymm8 6671 vpaddd %ymm7,%ymm3,%ymm3 6672 vpaddd %ymm6,%ymm2,%ymm2 6673 vpaddd %ymm5,%ymm1,%ymm1 6674 vpaddd %ymm4,%ymm0,%ymm0 6675 vpxor %ymm3,%ymm15,%ymm15 6676 vpxor %ymm2,%ymm14,%ymm14 6677 vpxor %ymm1,%ymm13,%ymm13 6678 vpxor %ymm0,%ymm12,%ymm12 6679 vpshufb %ymm8,%ymm15,%ymm15 6680 vpshufb %ymm8,%ymm14,%ymm14 6681 vpshufb %ymm8,%ymm13,%ymm13 6682 vpshufb %ymm8,%ymm12,%ymm12 6683 vpaddd %ymm15,%ymm11,%ymm11 6684 vpaddd %ymm14,%ymm10,%ymm10 6685 vpaddd %ymm13,%ymm9,%ymm9 6686 vpaddd 0+128(%rbp),%ymm12,%ymm8 6687 vpxor %ymm11,%ymm7,%ymm7 6688 vpxor %ymm10,%ymm6,%ymm6 6689 vpxor %ymm9,%ymm5,%ymm5 6690 vpxor %ymm8,%ymm4,%ymm4 6691 vmovdqa %ymm8,0+128(%rbp) 6692 vpsrld $20,%ymm7,%ymm8 6693 vpslld $32-20,%ymm7,%ymm7 6694 vpxor %ymm8,%ymm7,%ymm7 6695 vpsrld $20,%ymm6,%ymm8 6696 vpslld $32-20,%ymm6,%ymm6 6697 vpxor %ymm8,%ymm6,%ymm6 6698 vpsrld $20,%ymm5,%ymm8 6699 vpslld $32-20,%ymm5,%ymm5 6700 vpxor %ymm8,%ymm5,%ymm5 6701 vpsrld $20,%ymm4,%ymm8 6702 vpslld $32-20,%ymm4,%ymm4 6703 vpxor %ymm8,%ymm4,%ymm4 6704 vmovdqa .Lrol8(%rip),%ymm8 6705 vpaddd %ymm7,%ymm3,%ymm3 6706 vpaddd %ymm6,%ymm2,%ymm2 6707 vpaddd %ymm5,%ymm1,%ymm1 6708 vpaddd %ymm4,%ymm0,%ymm0 6709 vpxor %ymm3,%ymm15,%ymm15 6710 vpxor %ymm2,%ymm14,%ymm14 6711 vpxor %ymm1,%ymm13,%ymm13 6712 vpxor %ymm0,%ymm12,%ymm12 6713 vpshufb %ymm8,%ymm15,%ymm15 6714 vpshufb %ymm8,%ymm14,%ymm14 6715 vpshufb %ymm8,%ymm13,%ymm13 6716 vpshufb %ymm8,%ymm12,%ymm12 6717 vpaddd %ymm15,%ymm11,%ymm11 6718 vpaddd %ymm14,%ymm10,%ymm10 6719 vpaddd %ymm13,%ymm9,%ymm9 6720 vpaddd 0+128(%rbp),%ymm12,%ymm8 6721 vpxor %ymm11,%ymm7,%ymm7 6722 vpxor %ymm10,%ymm6,%ymm6 6723 vpxor %ymm9,%ymm5,%ymm5 6724 vpxor %ymm8,%ymm4,%ymm4 6725 vmovdqa %ymm8,0+128(%rbp) 6726 vpsrld $25,%ymm7,%ymm8 6727 vpslld $32-25,%ymm7,%ymm7 6728 vpxor %ymm8,%ymm7,%ymm7 6729 vpsrld $25,%ymm6,%ymm8 6730 vpslld $32-25,%ymm6,%ymm6 6731 vpxor %ymm8,%ymm6,%ymm6 6732 vpsrld $25,%ymm5,%ymm8 6733 vpslld $32-25,%ymm5,%ymm5 6734 vpxor %ymm8,%ymm5,%ymm5 6735 vpsrld $25,%ymm4,%ymm8 6736 vpslld $32-25,%ymm4,%ymm4 6737 vpxor %ymm8,%ymm4,%ymm4 6738 vmovdqa 0+128(%rbp),%ymm8 6739 vpalignr $12,%ymm7,%ymm7,%ymm7 6740 vpalignr $8,%ymm11,%ymm11,%ymm11 6741 vpalignr $4,%ymm15,%ymm15,%ymm15 6742 vpalignr $12,%ymm6,%ymm6,%ymm6 6743 vpalignr $8,%ymm10,%ymm10,%ymm10 6744 vpalignr $4,%ymm14,%ymm14,%ymm14 6745 vpalignr $12,%ymm5,%ymm5,%ymm5 6746 vpalignr $8,%ymm9,%ymm9,%ymm9 6747 vpalignr $4,%ymm13,%ymm13,%ymm13 6748 vpalignr $12,%ymm4,%ymm4,%ymm4 6749 vpalignr $8,%ymm8,%ymm8,%ymm8 6750 vpalignr $4,%ymm12,%ymm12,%ymm12 6751 vmovdqa %ymm8,0+128(%rbp) 6752 vmovdqa .Lrol16(%rip),%ymm8 6753 vpaddd %ymm7,%ymm3,%ymm3 6754 vpaddd %ymm6,%ymm2,%ymm2 6755 vpaddd %ymm5,%ymm1,%ymm1 6756 vpaddd %ymm4,%ymm0,%ymm0 6757 vpxor %ymm3,%ymm15,%ymm15 6758 vpxor %ymm2,%ymm14,%ymm14 6759 vpxor %ymm1,%ymm13,%ymm13 6760 vpxor %ymm0,%ymm12,%ymm12 6761 vpshufb %ymm8,%ymm15,%ymm15 6762 vpshufb %ymm8,%ymm14,%ymm14 6763 vpshufb %ymm8,%ymm13,%ymm13 6764 vpshufb %ymm8,%ymm12,%ymm12 6765 vpaddd %ymm15,%ymm11,%ymm11 6766 vpaddd %ymm14,%ymm10,%ymm10 6767 vpaddd %ymm13,%ymm9,%ymm9 6768 vpaddd 0+128(%rbp),%ymm12,%ymm8 6769 vpxor %ymm11,%ymm7,%ymm7 6770 vpxor %ymm10,%ymm6,%ymm6 6771 vpxor %ymm9,%ymm5,%ymm5 6772 vpxor %ymm8,%ymm4,%ymm4 6773 vmovdqa %ymm8,0+128(%rbp) 6774 vpsrld $20,%ymm7,%ymm8 6775 vpslld $32-20,%ymm7,%ymm7 6776 vpxor %ymm8,%ymm7,%ymm7 6777 vpsrld $20,%ymm6,%ymm8 6778 vpslld $32-20,%ymm6,%ymm6 6779 vpxor %ymm8,%ymm6,%ymm6 6780 vpsrld $20,%ymm5,%ymm8 6781 vpslld $32-20,%ymm5,%ymm5 6782 vpxor %ymm8,%ymm5,%ymm5 6783 vpsrld $20,%ymm4,%ymm8 6784 vpslld $32-20,%ymm4,%ymm4 6785 vpxor %ymm8,%ymm4,%ymm4 6786 vmovdqa .Lrol8(%rip),%ymm8 6787 vpaddd %ymm7,%ymm3,%ymm3 6788 vpaddd %ymm6,%ymm2,%ymm2 6789 vpaddd %ymm5,%ymm1,%ymm1 6790 vpaddd %ymm4,%ymm0,%ymm0 6791 vpxor %ymm3,%ymm15,%ymm15 6792 6793 subq $16,%rdi 6794 movq $9,%rcx 6795 jmp .Lseal_avx2_main_loop_rounds_entry 6796.align 32 6797.Lseal_avx2_main_loop: 6798 vmovdqa .Lchacha20_consts(%rip),%ymm0 6799 vmovdqa 0+64(%rbp),%ymm4 6800 vmovdqa 0+96(%rbp),%ymm8 6801 vmovdqa %ymm0,%ymm1 6802 vmovdqa %ymm4,%ymm5 6803 vmovdqa %ymm8,%ymm9 6804 vmovdqa %ymm0,%ymm2 6805 vmovdqa %ymm4,%ymm6 6806 vmovdqa %ymm8,%ymm10 6807 vmovdqa %ymm0,%ymm3 6808 vmovdqa %ymm4,%ymm7 6809 vmovdqa %ymm8,%ymm11 6810 vmovdqa .Lavx2_inc(%rip),%ymm12 6811 vpaddd 0+160(%rbp),%ymm12,%ymm15 6812 vpaddd %ymm15,%ymm12,%ymm14 6813 vpaddd %ymm14,%ymm12,%ymm13 6814 vpaddd %ymm13,%ymm12,%ymm12 6815 vmovdqa %ymm15,0+256(%rbp) 6816 vmovdqa %ymm14,0+224(%rbp) 6817 vmovdqa %ymm13,0+192(%rbp) 6818 vmovdqa %ymm12,0+160(%rbp) 6819 6820 movq $10,%rcx 6821.align 32 6822.Lseal_avx2_main_loop_rounds: 6823 addq 0+0(%rdi),%r10 6824 adcq 8+0(%rdi),%r11 6825 adcq $1,%r12 6826 vmovdqa %ymm8,0+128(%rbp) 6827 vmovdqa .Lrol16(%rip),%ymm8 6828 vpaddd %ymm7,%ymm3,%ymm3 6829 vpaddd %ymm6,%ymm2,%ymm2 6830 vpaddd %ymm5,%ymm1,%ymm1 6831 vpaddd %ymm4,%ymm0,%ymm0 6832 vpxor %ymm3,%ymm15,%ymm15 6833 vpxor %ymm2,%ymm14,%ymm14 6834 vpxor %ymm1,%ymm13,%ymm13 6835 vpxor %ymm0,%ymm12,%ymm12 6836 movq 0+0+0(%rbp),%rdx 6837 movq %rdx,%r15 6838 mulxq %r10,%r13,%r14 6839 mulxq %r11,%rax,%rdx 6840 imulq %r12,%r15 6841 addq %rax,%r14 6842 adcq %rdx,%r15 6843 vpshufb %ymm8,%ymm15,%ymm15 6844 vpshufb %ymm8,%ymm14,%ymm14 6845 vpshufb %ymm8,%ymm13,%ymm13 6846 vpshufb %ymm8,%ymm12,%ymm12 6847 vpaddd %ymm15,%ymm11,%ymm11 6848 vpaddd %ymm14,%ymm10,%ymm10 6849 vpaddd %ymm13,%ymm9,%ymm9 6850 vpaddd 0+128(%rbp),%ymm12,%ymm8 6851 vpxor %ymm11,%ymm7,%ymm7 6852 movq 8+0+0(%rbp),%rdx 6853 mulxq %r10,%r10,%rax 6854 addq %r10,%r14 6855 mulxq %r11,%r11,%r9 6856 adcq %r11,%r15 6857 adcq $0,%r9 6858 imulq %r12,%rdx 6859 vpxor %ymm10,%ymm6,%ymm6 6860 vpxor %ymm9,%ymm5,%ymm5 6861 vpxor %ymm8,%ymm4,%ymm4 6862 vmovdqa %ymm8,0+128(%rbp) 6863 vpsrld $20,%ymm7,%ymm8 6864 vpslld $32-20,%ymm7,%ymm7 6865 vpxor %ymm8,%ymm7,%ymm7 6866 vpsrld $20,%ymm6,%ymm8 6867 vpslld $32-20,%ymm6,%ymm6 6868 vpxor %ymm8,%ymm6,%ymm6 6869 vpsrld $20,%ymm5,%ymm8 6870 vpslld $32-20,%ymm5,%ymm5 6871 addq %rax,%r15 6872 adcq %rdx,%r9 6873 vpxor %ymm8,%ymm5,%ymm5 6874 vpsrld $20,%ymm4,%ymm8 6875 vpslld $32-20,%ymm4,%ymm4 6876 vpxor %ymm8,%ymm4,%ymm4 6877 vmovdqa .Lrol8(%rip),%ymm8 6878 vpaddd %ymm7,%ymm3,%ymm3 6879 vpaddd %ymm6,%ymm2,%ymm2 6880 vpaddd %ymm5,%ymm1,%ymm1 6881 vpaddd %ymm4,%ymm0,%ymm0 6882 vpxor %ymm3,%ymm15,%ymm15 6883 movq %r13,%r10 6884 movq %r14,%r11 6885 movq %r15,%r12 6886 andq $3,%r12 6887 movq %r15,%r13 6888 andq $-4,%r13 6889 movq %r9,%r14 6890 shrdq $2,%r9,%r15 6891 shrq $2,%r9 6892 addq %r13,%r15 6893 adcq %r14,%r9 6894 addq %r15,%r10 6895 adcq %r9,%r11 6896 adcq $0,%r12 6897 6898.Lseal_avx2_main_loop_rounds_entry: 6899 vpxor %ymm2,%ymm14,%ymm14 6900 vpxor %ymm1,%ymm13,%ymm13 6901 vpxor %ymm0,%ymm12,%ymm12 6902 vpshufb %ymm8,%ymm15,%ymm15 6903 vpshufb %ymm8,%ymm14,%ymm14 6904 vpshufb %ymm8,%ymm13,%ymm13 6905 vpshufb %ymm8,%ymm12,%ymm12 6906 vpaddd %ymm15,%ymm11,%ymm11 6907 vpaddd %ymm14,%ymm10,%ymm10 6908 addq 0+16(%rdi),%r10 6909 adcq 8+16(%rdi),%r11 6910 adcq $1,%r12 6911 vpaddd %ymm13,%ymm9,%ymm9 6912 vpaddd 0+128(%rbp),%ymm12,%ymm8 6913 vpxor %ymm11,%ymm7,%ymm7 6914 vpxor %ymm10,%ymm6,%ymm6 6915 vpxor %ymm9,%ymm5,%ymm5 6916 vpxor %ymm8,%ymm4,%ymm4 6917 vmovdqa %ymm8,0+128(%rbp) 6918 vpsrld $25,%ymm7,%ymm8 6919 movq 0+0+0(%rbp),%rdx 6920 movq %rdx,%r15 6921 mulxq %r10,%r13,%r14 6922 mulxq %r11,%rax,%rdx 6923 imulq %r12,%r15 6924 addq %rax,%r14 6925 adcq %rdx,%r15 6926 vpslld $32-25,%ymm7,%ymm7 6927 vpxor %ymm8,%ymm7,%ymm7 6928 vpsrld $25,%ymm6,%ymm8 6929 vpslld $32-25,%ymm6,%ymm6 6930 vpxor %ymm8,%ymm6,%ymm6 6931 vpsrld $25,%ymm5,%ymm8 6932 vpslld $32-25,%ymm5,%ymm5 6933 vpxor %ymm8,%ymm5,%ymm5 6934 vpsrld $25,%ymm4,%ymm8 6935 vpslld $32-25,%ymm4,%ymm4 6936 vpxor %ymm8,%ymm4,%ymm4 6937 vmovdqa 0+128(%rbp),%ymm8 6938 vpalignr $4,%ymm7,%ymm7,%ymm7 6939 vpalignr $8,%ymm11,%ymm11,%ymm11 6940 vpalignr $12,%ymm15,%ymm15,%ymm15 6941 vpalignr $4,%ymm6,%ymm6,%ymm6 6942 vpalignr $8,%ymm10,%ymm10,%ymm10 6943 vpalignr $12,%ymm14,%ymm14,%ymm14 6944 movq 8+0+0(%rbp),%rdx 6945 mulxq %r10,%r10,%rax 6946 addq %r10,%r14 6947 mulxq %r11,%r11,%r9 6948 adcq %r11,%r15 6949 adcq $0,%r9 6950 imulq %r12,%rdx 6951 vpalignr $4,%ymm5,%ymm5,%ymm5 6952 vpalignr $8,%ymm9,%ymm9,%ymm9 6953 vpalignr $12,%ymm13,%ymm13,%ymm13 6954 vpalignr $4,%ymm4,%ymm4,%ymm4 6955 vpalignr $8,%ymm8,%ymm8,%ymm8 6956 vpalignr $12,%ymm12,%ymm12,%ymm12 6957 vmovdqa %ymm8,0+128(%rbp) 6958 vmovdqa .Lrol16(%rip),%ymm8 6959 vpaddd %ymm7,%ymm3,%ymm3 6960 vpaddd %ymm6,%ymm2,%ymm2 6961 vpaddd %ymm5,%ymm1,%ymm1 6962 vpaddd %ymm4,%ymm0,%ymm0 6963 vpxor %ymm3,%ymm15,%ymm15 6964 vpxor %ymm2,%ymm14,%ymm14 6965 vpxor %ymm1,%ymm13,%ymm13 6966 vpxor %ymm0,%ymm12,%ymm12 6967 vpshufb %ymm8,%ymm15,%ymm15 6968 vpshufb %ymm8,%ymm14,%ymm14 6969 addq %rax,%r15 6970 adcq %rdx,%r9 6971 vpshufb %ymm8,%ymm13,%ymm13 6972 vpshufb %ymm8,%ymm12,%ymm12 6973 vpaddd %ymm15,%ymm11,%ymm11 6974 vpaddd %ymm14,%ymm10,%ymm10 6975 vpaddd %ymm13,%ymm9,%ymm9 6976 vpaddd 0+128(%rbp),%ymm12,%ymm8 6977 vpxor %ymm11,%ymm7,%ymm7 6978 vpxor %ymm10,%ymm6,%ymm6 6979 vpxor %ymm9,%ymm5,%ymm5 6980 movq %r13,%r10 6981 movq %r14,%r11 6982 movq %r15,%r12 6983 andq $3,%r12 6984 movq %r15,%r13 6985 andq $-4,%r13 6986 movq %r9,%r14 6987 shrdq $2,%r9,%r15 6988 shrq $2,%r9 6989 addq %r13,%r15 6990 adcq %r14,%r9 6991 addq %r15,%r10 6992 adcq %r9,%r11 6993 adcq $0,%r12 6994 vpxor %ymm8,%ymm4,%ymm4 6995 vmovdqa %ymm8,0+128(%rbp) 6996 vpsrld $20,%ymm7,%ymm8 6997 vpslld $32-20,%ymm7,%ymm7 6998 vpxor %ymm8,%ymm7,%ymm7 6999 vpsrld $20,%ymm6,%ymm8 7000 vpslld $32-20,%ymm6,%ymm6 7001 vpxor %ymm8,%ymm6,%ymm6 7002 addq 0+32(%rdi),%r10 7003 adcq 8+32(%rdi),%r11 7004 adcq $1,%r12 7005 7006 leaq 48(%rdi),%rdi 7007 vpsrld $20,%ymm5,%ymm8 7008 vpslld $32-20,%ymm5,%ymm5 7009 vpxor %ymm8,%ymm5,%ymm5 7010 vpsrld $20,%ymm4,%ymm8 7011 vpslld $32-20,%ymm4,%ymm4 7012 vpxor %ymm8,%ymm4,%ymm4 7013 vmovdqa .Lrol8(%rip),%ymm8 7014 vpaddd %ymm7,%ymm3,%ymm3 7015 vpaddd %ymm6,%ymm2,%ymm2 7016 vpaddd %ymm5,%ymm1,%ymm1 7017 vpaddd %ymm4,%ymm0,%ymm0 7018 vpxor %ymm3,%ymm15,%ymm15 7019 vpxor %ymm2,%ymm14,%ymm14 7020 vpxor %ymm1,%ymm13,%ymm13 7021 vpxor %ymm0,%ymm12,%ymm12 7022 vpshufb %ymm8,%ymm15,%ymm15 7023 vpshufb %ymm8,%ymm14,%ymm14 7024 vpshufb %ymm8,%ymm13,%ymm13 7025 movq 0+0+0(%rbp),%rdx 7026 movq %rdx,%r15 7027 mulxq %r10,%r13,%r14 7028 mulxq %r11,%rax,%rdx 7029 imulq %r12,%r15 7030 addq %rax,%r14 7031 adcq %rdx,%r15 7032 vpshufb %ymm8,%ymm12,%ymm12 7033 vpaddd %ymm15,%ymm11,%ymm11 7034 vpaddd %ymm14,%ymm10,%ymm10 7035 vpaddd %ymm13,%ymm9,%ymm9 7036 vpaddd 0+128(%rbp),%ymm12,%ymm8 7037 vpxor %ymm11,%ymm7,%ymm7 7038 vpxor %ymm10,%ymm6,%ymm6 7039 vpxor %ymm9,%ymm5,%ymm5 7040 movq 8+0+0(%rbp),%rdx 7041 mulxq %r10,%r10,%rax 7042 addq %r10,%r14 7043 mulxq %r11,%r11,%r9 7044 adcq %r11,%r15 7045 adcq $0,%r9 7046 imulq %r12,%rdx 7047 vpxor %ymm8,%ymm4,%ymm4 7048 vmovdqa %ymm8,0+128(%rbp) 7049 vpsrld $25,%ymm7,%ymm8 7050 vpslld $32-25,%ymm7,%ymm7 7051 vpxor %ymm8,%ymm7,%ymm7 7052 vpsrld $25,%ymm6,%ymm8 7053 vpslld $32-25,%ymm6,%ymm6 7054 vpxor %ymm8,%ymm6,%ymm6 7055 addq %rax,%r15 7056 adcq %rdx,%r9 7057 vpsrld $25,%ymm5,%ymm8 7058 vpslld $32-25,%ymm5,%ymm5 7059 vpxor %ymm8,%ymm5,%ymm5 7060 vpsrld $25,%ymm4,%ymm8 7061 vpslld $32-25,%ymm4,%ymm4 7062 vpxor %ymm8,%ymm4,%ymm4 7063 vmovdqa 0+128(%rbp),%ymm8 7064 vpalignr $12,%ymm7,%ymm7,%ymm7 7065 vpalignr $8,%ymm11,%ymm11,%ymm11 7066 vpalignr $4,%ymm15,%ymm15,%ymm15 7067 vpalignr $12,%ymm6,%ymm6,%ymm6 7068 vpalignr $8,%ymm10,%ymm10,%ymm10 7069 vpalignr $4,%ymm14,%ymm14,%ymm14 7070 vpalignr $12,%ymm5,%ymm5,%ymm5 7071 vpalignr $8,%ymm9,%ymm9,%ymm9 7072 vpalignr $4,%ymm13,%ymm13,%ymm13 7073 vpalignr $12,%ymm4,%ymm4,%ymm4 7074 vpalignr $8,%ymm8,%ymm8,%ymm8 7075 movq %r13,%r10 7076 movq %r14,%r11 7077 movq %r15,%r12 7078 andq $3,%r12 7079 movq %r15,%r13 7080 andq $-4,%r13 7081 movq %r9,%r14 7082 shrdq $2,%r9,%r15 7083 shrq $2,%r9 7084 addq %r13,%r15 7085 adcq %r14,%r9 7086 addq %r15,%r10 7087 adcq %r9,%r11 7088 adcq $0,%r12 7089 vpalignr $4,%ymm12,%ymm12,%ymm12 7090 7091 decq %rcx 7092 jne .Lseal_avx2_main_loop_rounds 7093 vpaddd .Lchacha20_consts(%rip),%ymm3,%ymm3 7094 vpaddd 0+64(%rbp),%ymm7,%ymm7 7095 vpaddd 0+96(%rbp),%ymm11,%ymm11 7096 vpaddd 0+256(%rbp),%ymm15,%ymm15 7097 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 7098 vpaddd 0+64(%rbp),%ymm6,%ymm6 7099 vpaddd 0+96(%rbp),%ymm10,%ymm10 7100 vpaddd 0+224(%rbp),%ymm14,%ymm14 7101 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 7102 vpaddd 0+64(%rbp),%ymm5,%ymm5 7103 vpaddd 0+96(%rbp),%ymm9,%ymm9 7104 vpaddd 0+192(%rbp),%ymm13,%ymm13 7105 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 7106 vpaddd 0+64(%rbp),%ymm4,%ymm4 7107 vpaddd 0+96(%rbp),%ymm8,%ymm8 7108 vpaddd 0+160(%rbp),%ymm12,%ymm12 7109 7110 vmovdqa %ymm0,0+128(%rbp) 7111 addq 0+0(%rdi),%r10 7112 adcq 8+0(%rdi),%r11 7113 adcq $1,%r12 7114 movq 0+0+0(%rbp),%rdx 7115 movq %rdx,%r15 7116 mulxq %r10,%r13,%r14 7117 mulxq %r11,%rax,%rdx 7118 imulq %r12,%r15 7119 addq %rax,%r14 7120 adcq %rdx,%r15 7121 movq 8+0+0(%rbp),%rdx 7122 mulxq %r10,%r10,%rax 7123 addq %r10,%r14 7124 mulxq %r11,%r11,%r9 7125 adcq %r11,%r15 7126 adcq $0,%r9 7127 imulq %r12,%rdx 7128 addq %rax,%r15 7129 adcq %rdx,%r9 7130 movq %r13,%r10 7131 movq %r14,%r11 7132 movq %r15,%r12 7133 andq $3,%r12 7134 movq %r15,%r13 7135 andq $-4,%r13 7136 movq %r9,%r14 7137 shrdq $2,%r9,%r15 7138 shrq $2,%r9 7139 addq %r13,%r15 7140 adcq %r14,%r9 7141 addq %r15,%r10 7142 adcq %r9,%r11 7143 adcq $0,%r12 7144 addq 0+16(%rdi),%r10 7145 adcq 8+16(%rdi),%r11 7146 adcq $1,%r12 7147 movq 0+0+0(%rbp),%rdx 7148 movq %rdx,%r15 7149 mulxq %r10,%r13,%r14 7150 mulxq %r11,%rax,%rdx 7151 imulq %r12,%r15 7152 addq %rax,%r14 7153 adcq %rdx,%r15 7154 movq 8+0+0(%rbp),%rdx 7155 mulxq %r10,%r10,%rax 7156 addq %r10,%r14 7157 mulxq %r11,%r11,%r9 7158 adcq %r11,%r15 7159 adcq $0,%r9 7160 imulq %r12,%rdx 7161 addq %rax,%r15 7162 adcq %rdx,%r9 7163 movq %r13,%r10 7164 movq %r14,%r11 7165 movq %r15,%r12 7166 andq $3,%r12 7167 movq %r15,%r13 7168 andq $-4,%r13 7169 movq %r9,%r14 7170 shrdq $2,%r9,%r15 7171 shrq $2,%r9 7172 addq %r13,%r15 7173 adcq %r14,%r9 7174 addq %r15,%r10 7175 adcq %r9,%r11 7176 adcq $0,%r12 7177 7178 leaq 32(%rdi),%rdi 7179 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 7180 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 7181 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 7182 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 7183 vpxor 0+0(%rsi),%ymm0,%ymm0 7184 vpxor 32+0(%rsi),%ymm3,%ymm3 7185 vpxor 64+0(%rsi),%ymm7,%ymm7 7186 vpxor 96+0(%rsi),%ymm11,%ymm11 7187 vmovdqu %ymm0,0+0(%rdi) 7188 vmovdqu %ymm3,32+0(%rdi) 7189 vmovdqu %ymm7,64+0(%rdi) 7190 vmovdqu %ymm11,96+0(%rdi) 7191 7192 vmovdqa 0+128(%rbp),%ymm0 7193 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 7194 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 7195 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 7196 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 7197 vpxor 0+128(%rsi),%ymm3,%ymm3 7198 vpxor 32+128(%rsi),%ymm2,%ymm2 7199 vpxor 64+128(%rsi),%ymm6,%ymm6 7200 vpxor 96+128(%rsi),%ymm10,%ymm10 7201 vmovdqu %ymm3,0+128(%rdi) 7202 vmovdqu %ymm2,32+128(%rdi) 7203 vmovdqu %ymm6,64+128(%rdi) 7204 vmovdqu %ymm10,96+128(%rdi) 7205 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7206 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7207 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7208 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7209 vpxor 0+256(%rsi),%ymm3,%ymm3 7210 vpxor 32+256(%rsi),%ymm1,%ymm1 7211 vpxor 64+256(%rsi),%ymm5,%ymm5 7212 vpxor 96+256(%rsi),%ymm9,%ymm9 7213 vmovdqu %ymm3,0+256(%rdi) 7214 vmovdqu %ymm1,32+256(%rdi) 7215 vmovdqu %ymm5,64+256(%rdi) 7216 vmovdqu %ymm9,96+256(%rdi) 7217 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 7218 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 7219 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 7220 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 7221 vpxor 0+384(%rsi),%ymm3,%ymm3 7222 vpxor 32+384(%rsi),%ymm0,%ymm0 7223 vpxor 64+384(%rsi),%ymm4,%ymm4 7224 vpxor 96+384(%rsi),%ymm8,%ymm8 7225 vmovdqu %ymm3,0+384(%rdi) 7226 vmovdqu %ymm0,32+384(%rdi) 7227 vmovdqu %ymm4,64+384(%rdi) 7228 vmovdqu %ymm8,96+384(%rdi) 7229 7230 leaq 512(%rsi),%rsi 7231 subq $512,%rbx 7232 cmpq $512,%rbx 7233 jg .Lseal_avx2_main_loop 7234 7235 addq 0+0(%rdi),%r10 7236 adcq 8+0(%rdi),%r11 7237 adcq $1,%r12 7238 movq 0+0+0(%rbp),%rdx 7239 movq %rdx,%r15 7240 mulxq %r10,%r13,%r14 7241 mulxq %r11,%rax,%rdx 7242 imulq %r12,%r15 7243 addq %rax,%r14 7244 adcq %rdx,%r15 7245 movq 8+0+0(%rbp),%rdx 7246 mulxq %r10,%r10,%rax 7247 addq %r10,%r14 7248 mulxq %r11,%r11,%r9 7249 adcq %r11,%r15 7250 adcq $0,%r9 7251 imulq %r12,%rdx 7252 addq %rax,%r15 7253 adcq %rdx,%r9 7254 movq %r13,%r10 7255 movq %r14,%r11 7256 movq %r15,%r12 7257 andq $3,%r12 7258 movq %r15,%r13 7259 andq $-4,%r13 7260 movq %r9,%r14 7261 shrdq $2,%r9,%r15 7262 shrq $2,%r9 7263 addq %r13,%r15 7264 adcq %r14,%r9 7265 addq %r15,%r10 7266 adcq %r9,%r11 7267 adcq $0,%r12 7268 addq 0+16(%rdi),%r10 7269 adcq 8+16(%rdi),%r11 7270 adcq $1,%r12 7271 movq 0+0+0(%rbp),%rdx 7272 movq %rdx,%r15 7273 mulxq %r10,%r13,%r14 7274 mulxq %r11,%rax,%rdx 7275 imulq %r12,%r15 7276 addq %rax,%r14 7277 adcq %rdx,%r15 7278 movq 8+0+0(%rbp),%rdx 7279 mulxq %r10,%r10,%rax 7280 addq %r10,%r14 7281 mulxq %r11,%r11,%r9 7282 adcq %r11,%r15 7283 adcq $0,%r9 7284 imulq %r12,%rdx 7285 addq %rax,%r15 7286 adcq %rdx,%r9 7287 movq %r13,%r10 7288 movq %r14,%r11 7289 movq %r15,%r12 7290 andq $3,%r12 7291 movq %r15,%r13 7292 andq $-4,%r13 7293 movq %r9,%r14 7294 shrdq $2,%r9,%r15 7295 shrq $2,%r9 7296 addq %r13,%r15 7297 adcq %r14,%r9 7298 addq %r15,%r10 7299 adcq %r9,%r11 7300 adcq $0,%r12 7301 7302 leaq 32(%rdi),%rdi 7303 movq $10,%rcx 7304 xorq %r8,%r8 7305 7306 cmpq $384,%rbx 7307 ja .Lseal_avx2_tail_512 7308 cmpq $256,%rbx 7309 ja .Lseal_avx2_tail_384 7310 cmpq $128,%rbx 7311 ja .Lseal_avx2_tail_256 7312 7313.Lseal_avx2_tail_128: 7314 vmovdqa .Lchacha20_consts(%rip),%ymm0 7315 vmovdqa 0+64(%rbp),%ymm4 7316 vmovdqa 0+96(%rbp),%ymm8 7317 vmovdqa .Lavx2_inc(%rip),%ymm12 7318 vpaddd 0+160(%rbp),%ymm12,%ymm12 7319 vmovdqa %ymm12,0+160(%rbp) 7320 7321.Lseal_avx2_tail_128_rounds_and_3xhash: 7322 addq 0+0(%rdi),%r10 7323 adcq 8+0(%rdi),%r11 7324 adcq $1,%r12 7325 movq 0+0+0(%rbp),%rdx 7326 movq %rdx,%r15 7327 mulxq %r10,%r13,%r14 7328 mulxq %r11,%rax,%rdx 7329 imulq %r12,%r15 7330 addq %rax,%r14 7331 adcq %rdx,%r15 7332 movq 8+0+0(%rbp),%rdx 7333 mulxq %r10,%r10,%rax 7334 addq %r10,%r14 7335 mulxq %r11,%r11,%r9 7336 adcq %r11,%r15 7337 adcq $0,%r9 7338 imulq %r12,%rdx 7339 addq %rax,%r15 7340 adcq %rdx,%r9 7341 movq %r13,%r10 7342 movq %r14,%r11 7343 movq %r15,%r12 7344 andq $3,%r12 7345 movq %r15,%r13 7346 andq $-4,%r13 7347 movq %r9,%r14 7348 shrdq $2,%r9,%r15 7349 shrq $2,%r9 7350 addq %r13,%r15 7351 adcq %r14,%r9 7352 addq %r15,%r10 7353 adcq %r9,%r11 7354 adcq $0,%r12 7355 7356 leaq 16(%rdi),%rdi 7357.Lseal_avx2_tail_128_rounds_and_2xhash: 7358 vpaddd %ymm4,%ymm0,%ymm0 7359 vpxor %ymm0,%ymm12,%ymm12 7360 vpshufb .Lrol16(%rip),%ymm12,%ymm12 7361 vpaddd %ymm12,%ymm8,%ymm8 7362 vpxor %ymm8,%ymm4,%ymm4 7363 vpsrld $20,%ymm4,%ymm3 7364 vpslld $12,%ymm4,%ymm4 7365 vpxor %ymm3,%ymm4,%ymm4 7366 vpaddd %ymm4,%ymm0,%ymm0 7367 vpxor %ymm0,%ymm12,%ymm12 7368 vpshufb .Lrol8(%rip),%ymm12,%ymm12 7369 vpaddd %ymm12,%ymm8,%ymm8 7370 vpxor %ymm8,%ymm4,%ymm4 7371 vpslld $7,%ymm4,%ymm3 7372 vpsrld $25,%ymm4,%ymm4 7373 vpxor %ymm3,%ymm4,%ymm4 7374 vpalignr $12,%ymm12,%ymm12,%ymm12 7375 vpalignr $8,%ymm8,%ymm8,%ymm8 7376 vpalignr $4,%ymm4,%ymm4,%ymm4 7377 addq 0+0(%rdi),%r10 7378 adcq 8+0(%rdi),%r11 7379 adcq $1,%r12 7380 movq 0+0+0(%rbp),%rdx 7381 movq %rdx,%r15 7382 mulxq %r10,%r13,%r14 7383 mulxq %r11,%rax,%rdx 7384 imulq %r12,%r15 7385 addq %rax,%r14 7386 adcq %rdx,%r15 7387 movq 8+0+0(%rbp),%rdx 7388 mulxq %r10,%r10,%rax 7389 addq %r10,%r14 7390 mulxq %r11,%r11,%r9 7391 adcq %r11,%r15 7392 adcq $0,%r9 7393 imulq %r12,%rdx 7394 addq %rax,%r15 7395 adcq %rdx,%r9 7396 movq %r13,%r10 7397 movq %r14,%r11 7398 movq %r15,%r12 7399 andq $3,%r12 7400 movq %r15,%r13 7401 andq $-4,%r13 7402 movq %r9,%r14 7403 shrdq $2,%r9,%r15 7404 shrq $2,%r9 7405 addq %r13,%r15 7406 adcq %r14,%r9 7407 addq %r15,%r10 7408 adcq %r9,%r11 7409 adcq $0,%r12 7410 vpaddd %ymm4,%ymm0,%ymm0 7411 vpxor %ymm0,%ymm12,%ymm12 7412 vpshufb .Lrol16(%rip),%ymm12,%ymm12 7413 vpaddd %ymm12,%ymm8,%ymm8 7414 vpxor %ymm8,%ymm4,%ymm4 7415 vpsrld $20,%ymm4,%ymm3 7416 vpslld $12,%ymm4,%ymm4 7417 vpxor %ymm3,%ymm4,%ymm4 7418 vpaddd %ymm4,%ymm0,%ymm0 7419 vpxor %ymm0,%ymm12,%ymm12 7420 vpshufb .Lrol8(%rip),%ymm12,%ymm12 7421 vpaddd %ymm12,%ymm8,%ymm8 7422 vpxor %ymm8,%ymm4,%ymm4 7423 vpslld $7,%ymm4,%ymm3 7424 vpsrld $25,%ymm4,%ymm4 7425 vpxor %ymm3,%ymm4,%ymm4 7426 vpalignr $4,%ymm12,%ymm12,%ymm12 7427 vpalignr $8,%ymm8,%ymm8,%ymm8 7428 vpalignr $12,%ymm4,%ymm4,%ymm4 7429 addq 0+16(%rdi),%r10 7430 adcq 8+16(%rdi),%r11 7431 adcq $1,%r12 7432 movq 0+0+0(%rbp),%rdx 7433 movq %rdx,%r15 7434 mulxq %r10,%r13,%r14 7435 mulxq %r11,%rax,%rdx 7436 imulq %r12,%r15 7437 addq %rax,%r14 7438 adcq %rdx,%r15 7439 movq 8+0+0(%rbp),%rdx 7440 mulxq %r10,%r10,%rax 7441 addq %r10,%r14 7442 mulxq %r11,%r11,%r9 7443 adcq %r11,%r15 7444 adcq $0,%r9 7445 imulq %r12,%rdx 7446 addq %rax,%r15 7447 adcq %rdx,%r9 7448 movq %r13,%r10 7449 movq %r14,%r11 7450 movq %r15,%r12 7451 andq $3,%r12 7452 movq %r15,%r13 7453 andq $-4,%r13 7454 movq %r9,%r14 7455 shrdq $2,%r9,%r15 7456 shrq $2,%r9 7457 addq %r13,%r15 7458 adcq %r14,%r9 7459 addq %r15,%r10 7460 adcq %r9,%r11 7461 adcq $0,%r12 7462 7463 leaq 32(%rdi),%rdi 7464 decq %rcx 7465 jg .Lseal_avx2_tail_128_rounds_and_3xhash 7466 decq %r8 7467 jge .Lseal_avx2_tail_128_rounds_and_2xhash 7468 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 7469 vpaddd 0+64(%rbp),%ymm4,%ymm4 7470 vpaddd 0+96(%rbp),%ymm8,%ymm8 7471 vpaddd 0+160(%rbp),%ymm12,%ymm12 7472 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7473 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7474 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7475 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7476 vmovdqa %ymm3,%ymm8 7477 7478 jmp .Lseal_avx2_short_loop 7479 7480.Lseal_avx2_tail_256: 7481 vmovdqa .Lchacha20_consts(%rip),%ymm0 7482 vmovdqa 0+64(%rbp),%ymm4 7483 vmovdqa 0+96(%rbp),%ymm8 7484 vmovdqa %ymm0,%ymm1 7485 vmovdqa %ymm4,%ymm5 7486 vmovdqa %ymm8,%ymm9 7487 vmovdqa .Lavx2_inc(%rip),%ymm12 7488 vpaddd 0+160(%rbp),%ymm12,%ymm13 7489 vpaddd %ymm13,%ymm12,%ymm12 7490 vmovdqa %ymm12,0+160(%rbp) 7491 vmovdqa %ymm13,0+192(%rbp) 7492 7493.Lseal_avx2_tail_256_rounds_and_3xhash: 7494 addq 0+0(%rdi),%r10 7495 adcq 8+0(%rdi),%r11 7496 adcq $1,%r12 7497 movq 0+0+0(%rbp),%rax 7498 movq %rax,%r15 7499 mulq %r10 7500 movq %rax,%r13 7501 movq %rdx,%r14 7502 movq 0+0+0(%rbp),%rax 7503 mulq %r11 7504 imulq %r12,%r15 7505 addq %rax,%r14 7506 adcq %rdx,%r15 7507 movq 8+0+0(%rbp),%rax 7508 movq %rax,%r9 7509 mulq %r10 7510 addq %rax,%r14 7511 adcq $0,%rdx 7512 movq %rdx,%r10 7513 movq 8+0+0(%rbp),%rax 7514 mulq %r11 7515 addq %rax,%r15 7516 adcq $0,%rdx 7517 imulq %r12,%r9 7518 addq %r10,%r15 7519 adcq %rdx,%r9 7520 movq %r13,%r10 7521 movq %r14,%r11 7522 movq %r15,%r12 7523 andq $3,%r12 7524 movq %r15,%r13 7525 andq $-4,%r13 7526 movq %r9,%r14 7527 shrdq $2,%r9,%r15 7528 shrq $2,%r9 7529 addq %r13,%r15 7530 adcq %r14,%r9 7531 addq %r15,%r10 7532 adcq %r9,%r11 7533 adcq $0,%r12 7534 7535 leaq 16(%rdi),%rdi 7536.Lseal_avx2_tail_256_rounds_and_2xhash: 7537 vpaddd %ymm4,%ymm0,%ymm0 7538 vpxor %ymm0,%ymm12,%ymm12 7539 vpshufb .Lrol16(%rip),%ymm12,%ymm12 7540 vpaddd %ymm12,%ymm8,%ymm8 7541 vpxor %ymm8,%ymm4,%ymm4 7542 vpsrld $20,%ymm4,%ymm3 7543 vpslld $12,%ymm4,%ymm4 7544 vpxor %ymm3,%ymm4,%ymm4 7545 vpaddd %ymm4,%ymm0,%ymm0 7546 vpxor %ymm0,%ymm12,%ymm12 7547 vpshufb .Lrol8(%rip),%ymm12,%ymm12 7548 vpaddd %ymm12,%ymm8,%ymm8 7549 vpxor %ymm8,%ymm4,%ymm4 7550 vpslld $7,%ymm4,%ymm3 7551 vpsrld $25,%ymm4,%ymm4 7552 vpxor %ymm3,%ymm4,%ymm4 7553 vpalignr $12,%ymm12,%ymm12,%ymm12 7554 vpalignr $8,%ymm8,%ymm8,%ymm8 7555 vpalignr $4,%ymm4,%ymm4,%ymm4 7556 vpaddd %ymm5,%ymm1,%ymm1 7557 vpxor %ymm1,%ymm13,%ymm13 7558 vpshufb .Lrol16(%rip),%ymm13,%ymm13 7559 vpaddd %ymm13,%ymm9,%ymm9 7560 vpxor %ymm9,%ymm5,%ymm5 7561 vpsrld $20,%ymm5,%ymm3 7562 vpslld $12,%ymm5,%ymm5 7563 vpxor %ymm3,%ymm5,%ymm5 7564 vpaddd %ymm5,%ymm1,%ymm1 7565 vpxor %ymm1,%ymm13,%ymm13 7566 vpshufb .Lrol8(%rip),%ymm13,%ymm13 7567 vpaddd %ymm13,%ymm9,%ymm9 7568 vpxor %ymm9,%ymm5,%ymm5 7569 vpslld $7,%ymm5,%ymm3 7570 vpsrld $25,%ymm5,%ymm5 7571 vpxor %ymm3,%ymm5,%ymm5 7572 vpalignr $12,%ymm13,%ymm13,%ymm13 7573 vpalignr $8,%ymm9,%ymm9,%ymm9 7574 vpalignr $4,%ymm5,%ymm5,%ymm5 7575 addq 0+0(%rdi),%r10 7576 adcq 8+0(%rdi),%r11 7577 adcq $1,%r12 7578 movq 0+0+0(%rbp),%rax 7579 movq %rax,%r15 7580 mulq %r10 7581 movq %rax,%r13 7582 movq %rdx,%r14 7583 movq 0+0+0(%rbp),%rax 7584 mulq %r11 7585 imulq %r12,%r15 7586 addq %rax,%r14 7587 adcq %rdx,%r15 7588 movq 8+0+0(%rbp),%rax 7589 movq %rax,%r9 7590 mulq %r10 7591 addq %rax,%r14 7592 adcq $0,%rdx 7593 movq %rdx,%r10 7594 movq 8+0+0(%rbp),%rax 7595 mulq %r11 7596 addq %rax,%r15 7597 adcq $0,%rdx 7598 imulq %r12,%r9 7599 addq %r10,%r15 7600 adcq %rdx,%r9 7601 movq %r13,%r10 7602 movq %r14,%r11 7603 movq %r15,%r12 7604 andq $3,%r12 7605 movq %r15,%r13 7606 andq $-4,%r13 7607 movq %r9,%r14 7608 shrdq $2,%r9,%r15 7609 shrq $2,%r9 7610 addq %r13,%r15 7611 adcq %r14,%r9 7612 addq %r15,%r10 7613 adcq %r9,%r11 7614 adcq $0,%r12 7615 vpaddd %ymm4,%ymm0,%ymm0 7616 vpxor %ymm0,%ymm12,%ymm12 7617 vpshufb .Lrol16(%rip),%ymm12,%ymm12 7618 vpaddd %ymm12,%ymm8,%ymm8 7619 vpxor %ymm8,%ymm4,%ymm4 7620 vpsrld $20,%ymm4,%ymm3 7621 vpslld $12,%ymm4,%ymm4 7622 vpxor %ymm3,%ymm4,%ymm4 7623 vpaddd %ymm4,%ymm0,%ymm0 7624 vpxor %ymm0,%ymm12,%ymm12 7625 vpshufb .Lrol8(%rip),%ymm12,%ymm12 7626 vpaddd %ymm12,%ymm8,%ymm8 7627 vpxor %ymm8,%ymm4,%ymm4 7628 vpslld $7,%ymm4,%ymm3 7629 vpsrld $25,%ymm4,%ymm4 7630 vpxor %ymm3,%ymm4,%ymm4 7631 vpalignr $4,%ymm12,%ymm12,%ymm12 7632 vpalignr $8,%ymm8,%ymm8,%ymm8 7633 vpalignr $12,%ymm4,%ymm4,%ymm4 7634 vpaddd %ymm5,%ymm1,%ymm1 7635 vpxor %ymm1,%ymm13,%ymm13 7636 vpshufb .Lrol16(%rip),%ymm13,%ymm13 7637 vpaddd %ymm13,%ymm9,%ymm9 7638 vpxor %ymm9,%ymm5,%ymm5 7639 vpsrld $20,%ymm5,%ymm3 7640 vpslld $12,%ymm5,%ymm5 7641 vpxor %ymm3,%ymm5,%ymm5 7642 vpaddd %ymm5,%ymm1,%ymm1 7643 vpxor %ymm1,%ymm13,%ymm13 7644 vpshufb .Lrol8(%rip),%ymm13,%ymm13 7645 vpaddd %ymm13,%ymm9,%ymm9 7646 vpxor %ymm9,%ymm5,%ymm5 7647 vpslld $7,%ymm5,%ymm3 7648 vpsrld $25,%ymm5,%ymm5 7649 vpxor %ymm3,%ymm5,%ymm5 7650 vpalignr $4,%ymm13,%ymm13,%ymm13 7651 vpalignr $8,%ymm9,%ymm9,%ymm9 7652 vpalignr $12,%ymm5,%ymm5,%ymm5 7653 addq 0+16(%rdi),%r10 7654 adcq 8+16(%rdi),%r11 7655 adcq $1,%r12 7656 movq 0+0+0(%rbp),%rax 7657 movq %rax,%r15 7658 mulq %r10 7659 movq %rax,%r13 7660 movq %rdx,%r14 7661 movq 0+0+0(%rbp),%rax 7662 mulq %r11 7663 imulq %r12,%r15 7664 addq %rax,%r14 7665 adcq %rdx,%r15 7666 movq 8+0+0(%rbp),%rax 7667 movq %rax,%r9 7668 mulq %r10 7669 addq %rax,%r14 7670 adcq $0,%rdx 7671 movq %rdx,%r10 7672 movq 8+0+0(%rbp),%rax 7673 mulq %r11 7674 addq %rax,%r15 7675 adcq $0,%rdx 7676 imulq %r12,%r9 7677 addq %r10,%r15 7678 adcq %rdx,%r9 7679 movq %r13,%r10 7680 movq %r14,%r11 7681 movq %r15,%r12 7682 andq $3,%r12 7683 movq %r15,%r13 7684 andq $-4,%r13 7685 movq %r9,%r14 7686 shrdq $2,%r9,%r15 7687 shrq $2,%r9 7688 addq %r13,%r15 7689 adcq %r14,%r9 7690 addq %r15,%r10 7691 adcq %r9,%r11 7692 adcq $0,%r12 7693 7694 leaq 32(%rdi),%rdi 7695 decq %rcx 7696 jg .Lseal_avx2_tail_256_rounds_and_3xhash 7697 decq %r8 7698 jge .Lseal_avx2_tail_256_rounds_and_2xhash 7699 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 7700 vpaddd 0+64(%rbp),%ymm5,%ymm5 7701 vpaddd 0+96(%rbp),%ymm9,%ymm9 7702 vpaddd 0+192(%rbp),%ymm13,%ymm13 7703 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 7704 vpaddd 0+64(%rbp),%ymm4,%ymm4 7705 vpaddd 0+96(%rbp),%ymm8,%ymm8 7706 vpaddd 0+160(%rbp),%ymm12,%ymm12 7707 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7708 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7709 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7710 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7711 vpxor 0+0(%rsi),%ymm3,%ymm3 7712 vpxor 32+0(%rsi),%ymm1,%ymm1 7713 vpxor 64+0(%rsi),%ymm5,%ymm5 7714 vpxor 96+0(%rsi),%ymm9,%ymm9 7715 vmovdqu %ymm3,0+0(%rdi) 7716 vmovdqu %ymm1,32+0(%rdi) 7717 vmovdqu %ymm5,64+0(%rdi) 7718 vmovdqu %ymm9,96+0(%rdi) 7719 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7720 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7721 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7722 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7723 vmovdqa %ymm3,%ymm8 7724 7725 movq $128,%rcx 7726 leaq 128(%rsi),%rsi 7727 subq $128,%rbx 7728 jmp .Lseal_avx2_short_hash_remainder 7729 7730.Lseal_avx2_tail_384: 7731 vmovdqa .Lchacha20_consts(%rip),%ymm0 7732 vmovdqa 0+64(%rbp),%ymm4 7733 vmovdqa 0+96(%rbp),%ymm8 7734 vmovdqa %ymm0,%ymm1 7735 vmovdqa %ymm4,%ymm5 7736 vmovdqa %ymm8,%ymm9 7737 vmovdqa %ymm0,%ymm2 7738 vmovdqa %ymm4,%ymm6 7739 vmovdqa %ymm8,%ymm10 7740 vmovdqa .Lavx2_inc(%rip),%ymm12 7741 vpaddd 0+160(%rbp),%ymm12,%ymm14 7742 vpaddd %ymm14,%ymm12,%ymm13 7743 vpaddd %ymm13,%ymm12,%ymm12 7744 vmovdqa %ymm12,0+160(%rbp) 7745 vmovdqa %ymm13,0+192(%rbp) 7746 vmovdqa %ymm14,0+224(%rbp) 7747 7748.Lseal_avx2_tail_384_rounds_and_3xhash: 7749 addq 0+0(%rdi),%r10 7750 adcq 8+0(%rdi),%r11 7751 adcq $1,%r12 7752 movq 0+0+0(%rbp),%rax 7753 movq %rax,%r15 7754 mulq %r10 7755 movq %rax,%r13 7756 movq %rdx,%r14 7757 movq 0+0+0(%rbp),%rax 7758 mulq %r11 7759 imulq %r12,%r15 7760 addq %rax,%r14 7761 adcq %rdx,%r15 7762 movq 8+0+0(%rbp),%rax 7763 movq %rax,%r9 7764 mulq %r10 7765 addq %rax,%r14 7766 adcq $0,%rdx 7767 movq %rdx,%r10 7768 movq 8+0+0(%rbp),%rax 7769 mulq %r11 7770 addq %rax,%r15 7771 adcq $0,%rdx 7772 imulq %r12,%r9 7773 addq %r10,%r15 7774 adcq %rdx,%r9 7775 movq %r13,%r10 7776 movq %r14,%r11 7777 movq %r15,%r12 7778 andq $3,%r12 7779 movq %r15,%r13 7780 andq $-4,%r13 7781 movq %r9,%r14 7782 shrdq $2,%r9,%r15 7783 shrq $2,%r9 7784 addq %r13,%r15 7785 adcq %r14,%r9 7786 addq %r15,%r10 7787 adcq %r9,%r11 7788 adcq $0,%r12 7789 7790 leaq 16(%rdi),%rdi 7791.Lseal_avx2_tail_384_rounds_and_2xhash: 7792 vpaddd %ymm4,%ymm0,%ymm0 7793 vpxor %ymm0,%ymm12,%ymm12 7794 vpshufb .Lrol16(%rip),%ymm12,%ymm12 7795 vpaddd %ymm12,%ymm8,%ymm8 7796 vpxor %ymm8,%ymm4,%ymm4 7797 vpsrld $20,%ymm4,%ymm3 7798 vpslld $12,%ymm4,%ymm4 7799 vpxor %ymm3,%ymm4,%ymm4 7800 vpaddd %ymm4,%ymm0,%ymm0 7801 vpxor %ymm0,%ymm12,%ymm12 7802 vpshufb .Lrol8(%rip),%ymm12,%ymm12 7803 vpaddd %ymm12,%ymm8,%ymm8 7804 vpxor %ymm8,%ymm4,%ymm4 7805 vpslld $7,%ymm4,%ymm3 7806 vpsrld $25,%ymm4,%ymm4 7807 vpxor %ymm3,%ymm4,%ymm4 7808 vpalignr $12,%ymm12,%ymm12,%ymm12 7809 vpalignr $8,%ymm8,%ymm8,%ymm8 7810 vpalignr $4,%ymm4,%ymm4,%ymm4 7811 vpaddd %ymm5,%ymm1,%ymm1 7812 vpxor %ymm1,%ymm13,%ymm13 7813 vpshufb .Lrol16(%rip),%ymm13,%ymm13 7814 vpaddd %ymm13,%ymm9,%ymm9 7815 vpxor %ymm9,%ymm5,%ymm5 7816 vpsrld $20,%ymm5,%ymm3 7817 vpslld $12,%ymm5,%ymm5 7818 vpxor %ymm3,%ymm5,%ymm5 7819 vpaddd %ymm5,%ymm1,%ymm1 7820 vpxor %ymm1,%ymm13,%ymm13 7821 vpshufb .Lrol8(%rip),%ymm13,%ymm13 7822 vpaddd %ymm13,%ymm9,%ymm9 7823 vpxor %ymm9,%ymm5,%ymm5 7824 vpslld $7,%ymm5,%ymm3 7825 vpsrld $25,%ymm5,%ymm5 7826 vpxor %ymm3,%ymm5,%ymm5 7827 vpalignr $12,%ymm13,%ymm13,%ymm13 7828 vpalignr $8,%ymm9,%ymm9,%ymm9 7829 vpalignr $4,%ymm5,%ymm5,%ymm5 7830 addq 0+0(%rdi),%r10 7831 adcq 8+0(%rdi),%r11 7832 adcq $1,%r12 7833 movq 0+0+0(%rbp),%rax 7834 movq %rax,%r15 7835 mulq %r10 7836 movq %rax,%r13 7837 movq %rdx,%r14 7838 movq 0+0+0(%rbp),%rax 7839 mulq %r11 7840 imulq %r12,%r15 7841 addq %rax,%r14 7842 adcq %rdx,%r15 7843 movq 8+0+0(%rbp),%rax 7844 movq %rax,%r9 7845 mulq %r10 7846 addq %rax,%r14 7847 adcq $0,%rdx 7848 movq %rdx,%r10 7849 movq 8+0+0(%rbp),%rax 7850 mulq %r11 7851 addq %rax,%r15 7852 adcq $0,%rdx 7853 imulq %r12,%r9 7854 addq %r10,%r15 7855 adcq %rdx,%r9 7856 movq %r13,%r10 7857 movq %r14,%r11 7858 movq %r15,%r12 7859 andq $3,%r12 7860 movq %r15,%r13 7861 andq $-4,%r13 7862 movq %r9,%r14 7863 shrdq $2,%r9,%r15 7864 shrq $2,%r9 7865 addq %r13,%r15 7866 adcq %r14,%r9 7867 addq %r15,%r10 7868 adcq %r9,%r11 7869 adcq $0,%r12 7870 vpaddd %ymm6,%ymm2,%ymm2 7871 vpxor %ymm2,%ymm14,%ymm14 7872 vpshufb .Lrol16(%rip),%ymm14,%ymm14 7873 vpaddd %ymm14,%ymm10,%ymm10 7874 vpxor %ymm10,%ymm6,%ymm6 7875 vpsrld $20,%ymm6,%ymm3 7876 vpslld $12,%ymm6,%ymm6 7877 vpxor %ymm3,%ymm6,%ymm6 7878 vpaddd %ymm6,%ymm2,%ymm2 7879 vpxor %ymm2,%ymm14,%ymm14 7880 vpshufb .Lrol8(%rip),%ymm14,%ymm14 7881 vpaddd %ymm14,%ymm10,%ymm10 7882 vpxor %ymm10,%ymm6,%ymm6 7883 vpslld $7,%ymm6,%ymm3 7884 vpsrld $25,%ymm6,%ymm6 7885 vpxor %ymm3,%ymm6,%ymm6 7886 vpalignr $12,%ymm14,%ymm14,%ymm14 7887 vpalignr $8,%ymm10,%ymm10,%ymm10 7888 vpalignr $4,%ymm6,%ymm6,%ymm6 7889 vpaddd %ymm4,%ymm0,%ymm0 7890 vpxor %ymm0,%ymm12,%ymm12 7891 vpshufb .Lrol16(%rip),%ymm12,%ymm12 7892 vpaddd %ymm12,%ymm8,%ymm8 7893 vpxor %ymm8,%ymm4,%ymm4 7894 vpsrld $20,%ymm4,%ymm3 7895 vpslld $12,%ymm4,%ymm4 7896 vpxor %ymm3,%ymm4,%ymm4 7897 vpaddd %ymm4,%ymm0,%ymm0 7898 vpxor %ymm0,%ymm12,%ymm12 7899 vpshufb .Lrol8(%rip),%ymm12,%ymm12 7900 vpaddd %ymm12,%ymm8,%ymm8 7901 vpxor %ymm8,%ymm4,%ymm4 7902 vpslld $7,%ymm4,%ymm3 7903 vpsrld $25,%ymm4,%ymm4 7904 vpxor %ymm3,%ymm4,%ymm4 7905 vpalignr $4,%ymm12,%ymm12,%ymm12 7906 vpalignr $8,%ymm8,%ymm8,%ymm8 7907 vpalignr $12,%ymm4,%ymm4,%ymm4 7908 addq 0+16(%rdi),%r10 7909 adcq 8+16(%rdi),%r11 7910 adcq $1,%r12 7911 movq 0+0+0(%rbp),%rax 7912 movq %rax,%r15 7913 mulq %r10 7914 movq %rax,%r13 7915 movq %rdx,%r14 7916 movq 0+0+0(%rbp),%rax 7917 mulq %r11 7918 imulq %r12,%r15 7919 addq %rax,%r14 7920 adcq %rdx,%r15 7921 movq 8+0+0(%rbp),%rax 7922 movq %rax,%r9 7923 mulq %r10 7924 addq %rax,%r14 7925 adcq $0,%rdx 7926 movq %rdx,%r10 7927 movq 8+0+0(%rbp),%rax 7928 mulq %r11 7929 addq %rax,%r15 7930 adcq $0,%rdx 7931 imulq %r12,%r9 7932 addq %r10,%r15 7933 adcq %rdx,%r9 7934 movq %r13,%r10 7935 movq %r14,%r11 7936 movq %r15,%r12 7937 andq $3,%r12 7938 movq %r15,%r13 7939 andq $-4,%r13 7940 movq %r9,%r14 7941 shrdq $2,%r9,%r15 7942 shrq $2,%r9 7943 addq %r13,%r15 7944 adcq %r14,%r9 7945 addq %r15,%r10 7946 adcq %r9,%r11 7947 adcq $0,%r12 7948 vpaddd %ymm5,%ymm1,%ymm1 7949 vpxor %ymm1,%ymm13,%ymm13 7950 vpshufb .Lrol16(%rip),%ymm13,%ymm13 7951 vpaddd %ymm13,%ymm9,%ymm9 7952 vpxor %ymm9,%ymm5,%ymm5 7953 vpsrld $20,%ymm5,%ymm3 7954 vpslld $12,%ymm5,%ymm5 7955 vpxor %ymm3,%ymm5,%ymm5 7956 vpaddd %ymm5,%ymm1,%ymm1 7957 vpxor %ymm1,%ymm13,%ymm13 7958 vpshufb .Lrol8(%rip),%ymm13,%ymm13 7959 vpaddd %ymm13,%ymm9,%ymm9 7960 vpxor %ymm9,%ymm5,%ymm5 7961 vpslld $7,%ymm5,%ymm3 7962 vpsrld $25,%ymm5,%ymm5 7963 vpxor %ymm3,%ymm5,%ymm5 7964 vpalignr $4,%ymm13,%ymm13,%ymm13 7965 vpalignr $8,%ymm9,%ymm9,%ymm9 7966 vpalignr $12,%ymm5,%ymm5,%ymm5 7967 vpaddd %ymm6,%ymm2,%ymm2 7968 vpxor %ymm2,%ymm14,%ymm14 7969 vpshufb .Lrol16(%rip),%ymm14,%ymm14 7970 vpaddd %ymm14,%ymm10,%ymm10 7971 vpxor %ymm10,%ymm6,%ymm6 7972 vpsrld $20,%ymm6,%ymm3 7973 vpslld $12,%ymm6,%ymm6 7974 vpxor %ymm3,%ymm6,%ymm6 7975 vpaddd %ymm6,%ymm2,%ymm2 7976 vpxor %ymm2,%ymm14,%ymm14 7977 vpshufb .Lrol8(%rip),%ymm14,%ymm14 7978 vpaddd %ymm14,%ymm10,%ymm10 7979 vpxor %ymm10,%ymm6,%ymm6 7980 vpslld $7,%ymm6,%ymm3 7981 vpsrld $25,%ymm6,%ymm6 7982 vpxor %ymm3,%ymm6,%ymm6 7983 vpalignr $4,%ymm14,%ymm14,%ymm14 7984 vpalignr $8,%ymm10,%ymm10,%ymm10 7985 vpalignr $12,%ymm6,%ymm6,%ymm6 7986 7987 leaq 32(%rdi),%rdi 7988 decq %rcx 7989 jg .Lseal_avx2_tail_384_rounds_and_3xhash 7990 decq %r8 7991 jge .Lseal_avx2_tail_384_rounds_and_2xhash 7992 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 7993 vpaddd 0+64(%rbp),%ymm6,%ymm6 7994 vpaddd 0+96(%rbp),%ymm10,%ymm10 7995 vpaddd 0+224(%rbp),%ymm14,%ymm14 7996 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 7997 vpaddd 0+64(%rbp),%ymm5,%ymm5 7998 vpaddd 0+96(%rbp),%ymm9,%ymm9 7999 vpaddd 0+192(%rbp),%ymm13,%ymm13 8000 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 8001 vpaddd 0+64(%rbp),%ymm4,%ymm4 8002 vpaddd 0+96(%rbp),%ymm8,%ymm8 8003 vpaddd 0+160(%rbp),%ymm12,%ymm12 8004 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 8005 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 8006 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 8007 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 8008 vpxor 0+0(%rsi),%ymm3,%ymm3 8009 vpxor 32+0(%rsi),%ymm2,%ymm2 8010 vpxor 64+0(%rsi),%ymm6,%ymm6 8011 vpxor 96+0(%rsi),%ymm10,%ymm10 8012 vmovdqu %ymm3,0+0(%rdi) 8013 vmovdqu %ymm2,32+0(%rdi) 8014 vmovdqu %ymm6,64+0(%rdi) 8015 vmovdqu %ymm10,96+0(%rdi) 8016 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 8017 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 8018 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 8019 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 8020 vpxor 0+128(%rsi),%ymm3,%ymm3 8021 vpxor 32+128(%rsi),%ymm1,%ymm1 8022 vpxor 64+128(%rsi),%ymm5,%ymm5 8023 vpxor 96+128(%rsi),%ymm9,%ymm9 8024 vmovdqu %ymm3,0+128(%rdi) 8025 vmovdqu %ymm1,32+128(%rdi) 8026 vmovdqu %ymm5,64+128(%rdi) 8027 vmovdqu %ymm9,96+128(%rdi) 8028 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 8029 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 8030 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 8031 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 8032 vmovdqa %ymm3,%ymm8 8033 8034 movq $256,%rcx 8035 leaq 256(%rsi),%rsi 8036 subq $256,%rbx 8037 jmp .Lseal_avx2_short_hash_remainder 8038 8039.Lseal_avx2_tail_512: 8040 vmovdqa .Lchacha20_consts(%rip),%ymm0 8041 vmovdqa 0+64(%rbp),%ymm4 8042 vmovdqa 0+96(%rbp),%ymm8 8043 vmovdqa %ymm0,%ymm1 8044 vmovdqa %ymm4,%ymm5 8045 vmovdqa %ymm8,%ymm9 8046 vmovdqa %ymm0,%ymm2 8047 vmovdqa %ymm4,%ymm6 8048 vmovdqa %ymm8,%ymm10 8049 vmovdqa %ymm0,%ymm3 8050 vmovdqa %ymm4,%ymm7 8051 vmovdqa %ymm8,%ymm11 8052 vmovdqa .Lavx2_inc(%rip),%ymm12 8053 vpaddd 0+160(%rbp),%ymm12,%ymm15 8054 vpaddd %ymm15,%ymm12,%ymm14 8055 vpaddd %ymm14,%ymm12,%ymm13 8056 vpaddd %ymm13,%ymm12,%ymm12 8057 vmovdqa %ymm15,0+256(%rbp) 8058 vmovdqa %ymm14,0+224(%rbp) 8059 vmovdqa %ymm13,0+192(%rbp) 8060 vmovdqa %ymm12,0+160(%rbp) 8061 8062.Lseal_avx2_tail_512_rounds_and_3xhash: 8063 addq 0+0(%rdi),%r10 8064 adcq 8+0(%rdi),%r11 8065 adcq $1,%r12 8066 movq 0+0+0(%rbp),%rdx 8067 movq %rdx,%r15 8068 mulxq %r10,%r13,%r14 8069 mulxq %r11,%rax,%rdx 8070 imulq %r12,%r15 8071 addq %rax,%r14 8072 adcq %rdx,%r15 8073 movq 8+0+0(%rbp),%rdx 8074 mulxq %r10,%r10,%rax 8075 addq %r10,%r14 8076 mulxq %r11,%r11,%r9 8077 adcq %r11,%r15 8078 adcq $0,%r9 8079 imulq %r12,%rdx 8080 addq %rax,%r15 8081 adcq %rdx,%r9 8082 movq %r13,%r10 8083 movq %r14,%r11 8084 movq %r15,%r12 8085 andq $3,%r12 8086 movq %r15,%r13 8087 andq $-4,%r13 8088 movq %r9,%r14 8089 shrdq $2,%r9,%r15 8090 shrq $2,%r9 8091 addq %r13,%r15 8092 adcq %r14,%r9 8093 addq %r15,%r10 8094 adcq %r9,%r11 8095 adcq $0,%r12 8096 8097 leaq 16(%rdi),%rdi 8098.Lseal_avx2_tail_512_rounds_and_2xhash: 8099 vmovdqa %ymm8,0+128(%rbp) 8100 vmovdqa .Lrol16(%rip),%ymm8 8101 vpaddd %ymm7,%ymm3,%ymm3 8102 vpaddd %ymm6,%ymm2,%ymm2 8103 vpaddd %ymm5,%ymm1,%ymm1 8104 vpaddd %ymm4,%ymm0,%ymm0 8105 vpxor %ymm3,%ymm15,%ymm15 8106 vpxor %ymm2,%ymm14,%ymm14 8107 vpxor %ymm1,%ymm13,%ymm13 8108 vpxor %ymm0,%ymm12,%ymm12 8109 vpshufb %ymm8,%ymm15,%ymm15 8110 vpshufb %ymm8,%ymm14,%ymm14 8111 vpshufb %ymm8,%ymm13,%ymm13 8112 vpshufb %ymm8,%ymm12,%ymm12 8113 vpaddd %ymm15,%ymm11,%ymm11 8114 vpaddd %ymm14,%ymm10,%ymm10 8115 vpaddd %ymm13,%ymm9,%ymm9 8116 vpaddd 0+128(%rbp),%ymm12,%ymm8 8117 vpxor %ymm11,%ymm7,%ymm7 8118 vpxor %ymm10,%ymm6,%ymm6 8119 addq 0+0(%rdi),%r10 8120 adcq 8+0(%rdi),%r11 8121 adcq $1,%r12 8122 vpxor %ymm9,%ymm5,%ymm5 8123 vpxor %ymm8,%ymm4,%ymm4 8124 vmovdqa %ymm8,0+128(%rbp) 8125 vpsrld $20,%ymm7,%ymm8 8126 vpslld $32-20,%ymm7,%ymm7 8127 vpxor %ymm8,%ymm7,%ymm7 8128 vpsrld $20,%ymm6,%ymm8 8129 vpslld $32-20,%ymm6,%ymm6 8130 vpxor %ymm8,%ymm6,%ymm6 8131 vpsrld $20,%ymm5,%ymm8 8132 vpslld $32-20,%ymm5,%ymm5 8133 vpxor %ymm8,%ymm5,%ymm5 8134 vpsrld $20,%ymm4,%ymm8 8135 vpslld $32-20,%ymm4,%ymm4 8136 vpxor %ymm8,%ymm4,%ymm4 8137 vmovdqa .Lrol8(%rip),%ymm8 8138 vpaddd %ymm7,%ymm3,%ymm3 8139 vpaddd %ymm6,%ymm2,%ymm2 8140 vpaddd %ymm5,%ymm1,%ymm1 8141 vpaddd %ymm4,%ymm0,%ymm0 8142 movq 0+0+0(%rbp),%rdx 8143 movq %rdx,%r15 8144 mulxq %r10,%r13,%r14 8145 mulxq %r11,%rax,%rdx 8146 imulq %r12,%r15 8147 addq %rax,%r14 8148 adcq %rdx,%r15 8149 vpxor %ymm3,%ymm15,%ymm15 8150 vpxor %ymm2,%ymm14,%ymm14 8151 vpxor %ymm1,%ymm13,%ymm13 8152 vpxor %ymm0,%ymm12,%ymm12 8153 vpshufb %ymm8,%ymm15,%ymm15 8154 vpshufb %ymm8,%ymm14,%ymm14 8155 vpshufb %ymm8,%ymm13,%ymm13 8156 vpshufb %ymm8,%ymm12,%ymm12 8157 vpaddd %ymm15,%ymm11,%ymm11 8158 vpaddd %ymm14,%ymm10,%ymm10 8159 vpaddd %ymm13,%ymm9,%ymm9 8160 vpaddd 0+128(%rbp),%ymm12,%ymm8 8161 vpxor %ymm11,%ymm7,%ymm7 8162 vpxor %ymm10,%ymm6,%ymm6 8163 vpxor %ymm9,%ymm5,%ymm5 8164 vpxor %ymm8,%ymm4,%ymm4 8165 vmovdqa %ymm8,0+128(%rbp) 8166 vpsrld $25,%ymm7,%ymm8 8167 vpslld $32-25,%ymm7,%ymm7 8168 vpxor %ymm8,%ymm7,%ymm7 8169 movq 8+0+0(%rbp),%rdx 8170 mulxq %r10,%r10,%rax 8171 addq %r10,%r14 8172 mulxq %r11,%r11,%r9 8173 adcq %r11,%r15 8174 adcq $0,%r9 8175 imulq %r12,%rdx 8176 vpsrld $25,%ymm6,%ymm8 8177 vpslld $32-25,%ymm6,%ymm6 8178 vpxor %ymm8,%ymm6,%ymm6 8179 vpsrld $25,%ymm5,%ymm8 8180 vpslld $32-25,%ymm5,%ymm5 8181 vpxor %ymm8,%ymm5,%ymm5 8182 vpsrld $25,%ymm4,%ymm8 8183 vpslld $32-25,%ymm4,%ymm4 8184 vpxor %ymm8,%ymm4,%ymm4 8185 vmovdqa 0+128(%rbp),%ymm8 8186 vpalignr $4,%ymm7,%ymm7,%ymm7 8187 vpalignr $8,%ymm11,%ymm11,%ymm11 8188 vpalignr $12,%ymm15,%ymm15,%ymm15 8189 vpalignr $4,%ymm6,%ymm6,%ymm6 8190 vpalignr $8,%ymm10,%ymm10,%ymm10 8191 vpalignr $12,%ymm14,%ymm14,%ymm14 8192 vpalignr $4,%ymm5,%ymm5,%ymm5 8193 vpalignr $8,%ymm9,%ymm9,%ymm9 8194 vpalignr $12,%ymm13,%ymm13,%ymm13 8195 vpalignr $4,%ymm4,%ymm4,%ymm4 8196 addq %rax,%r15 8197 adcq %rdx,%r9 8198 vpalignr $8,%ymm8,%ymm8,%ymm8 8199 vpalignr $12,%ymm12,%ymm12,%ymm12 8200 vmovdqa %ymm8,0+128(%rbp) 8201 vmovdqa .Lrol16(%rip),%ymm8 8202 vpaddd %ymm7,%ymm3,%ymm3 8203 vpaddd %ymm6,%ymm2,%ymm2 8204 vpaddd %ymm5,%ymm1,%ymm1 8205 vpaddd %ymm4,%ymm0,%ymm0 8206 vpxor %ymm3,%ymm15,%ymm15 8207 vpxor %ymm2,%ymm14,%ymm14 8208 vpxor %ymm1,%ymm13,%ymm13 8209 vpxor %ymm0,%ymm12,%ymm12 8210 vpshufb %ymm8,%ymm15,%ymm15 8211 vpshufb %ymm8,%ymm14,%ymm14 8212 vpshufb %ymm8,%ymm13,%ymm13 8213 vpshufb %ymm8,%ymm12,%ymm12 8214 vpaddd %ymm15,%ymm11,%ymm11 8215 vpaddd %ymm14,%ymm10,%ymm10 8216 vpaddd %ymm13,%ymm9,%ymm9 8217 vpaddd 0+128(%rbp),%ymm12,%ymm8 8218 movq %r13,%r10 8219 movq %r14,%r11 8220 movq %r15,%r12 8221 andq $3,%r12 8222 movq %r15,%r13 8223 andq $-4,%r13 8224 movq %r9,%r14 8225 shrdq $2,%r9,%r15 8226 shrq $2,%r9 8227 addq %r13,%r15 8228 adcq %r14,%r9 8229 addq %r15,%r10 8230 adcq %r9,%r11 8231 adcq $0,%r12 8232 vpxor %ymm11,%ymm7,%ymm7 8233 vpxor %ymm10,%ymm6,%ymm6 8234 vpxor %ymm9,%ymm5,%ymm5 8235 vpxor %ymm8,%ymm4,%ymm4 8236 vmovdqa %ymm8,0+128(%rbp) 8237 vpsrld $20,%ymm7,%ymm8 8238 vpslld $32-20,%ymm7,%ymm7 8239 vpxor %ymm8,%ymm7,%ymm7 8240 vpsrld $20,%ymm6,%ymm8 8241 vpslld $32-20,%ymm6,%ymm6 8242 vpxor %ymm8,%ymm6,%ymm6 8243 vpsrld $20,%ymm5,%ymm8 8244 vpslld $32-20,%ymm5,%ymm5 8245 vpxor %ymm8,%ymm5,%ymm5 8246 vpsrld $20,%ymm4,%ymm8 8247 vpslld $32-20,%ymm4,%ymm4 8248 vpxor %ymm8,%ymm4,%ymm4 8249 vmovdqa .Lrol8(%rip),%ymm8 8250 vpaddd %ymm7,%ymm3,%ymm3 8251 vpaddd %ymm6,%ymm2,%ymm2 8252 addq 0+16(%rdi),%r10 8253 adcq 8+16(%rdi),%r11 8254 adcq $1,%r12 8255 vpaddd %ymm5,%ymm1,%ymm1 8256 vpaddd %ymm4,%ymm0,%ymm0 8257 vpxor %ymm3,%ymm15,%ymm15 8258 vpxor %ymm2,%ymm14,%ymm14 8259 vpxor %ymm1,%ymm13,%ymm13 8260 vpxor %ymm0,%ymm12,%ymm12 8261 vpshufb %ymm8,%ymm15,%ymm15 8262 vpshufb %ymm8,%ymm14,%ymm14 8263 vpshufb %ymm8,%ymm13,%ymm13 8264 vpshufb %ymm8,%ymm12,%ymm12 8265 vpaddd %ymm15,%ymm11,%ymm11 8266 vpaddd %ymm14,%ymm10,%ymm10 8267 vpaddd %ymm13,%ymm9,%ymm9 8268 vpaddd 0+128(%rbp),%ymm12,%ymm8 8269 vpxor %ymm11,%ymm7,%ymm7 8270 vpxor %ymm10,%ymm6,%ymm6 8271 vpxor %ymm9,%ymm5,%ymm5 8272 vpxor %ymm8,%ymm4,%ymm4 8273 vmovdqa %ymm8,0+128(%rbp) 8274 vpsrld $25,%ymm7,%ymm8 8275 movq 0+0+0(%rbp),%rdx 8276 movq %rdx,%r15 8277 mulxq %r10,%r13,%r14 8278 mulxq %r11,%rax,%rdx 8279 imulq %r12,%r15 8280 addq %rax,%r14 8281 adcq %rdx,%r15 8282 vpslld $32-25,%ymm7,%ymm7 8283 vpxor %ymm8,%ymm7,%ymm7 8284 vpsrld $25,%ymm6,%ymm8 8285 vpslld $32-25,%ymm6,%ymm6 8286 vpxor %ymm8,%ymm6,%ymm6 8287 vpsrld $25,%ymm5,%ymm8 8288 vpslld $32-25,%ymm5,%ymm5 8289 vpxor %ymm8,%ymm5,%ymm5 8290 vpsrld $25,%ymm4,%ymm8 8291 vpslld $32-25,%ymm4,%ymm4 8292 vpxor %ymm8,%ymm4,%ymm4 8293 vmovdqa 0+128(%rbp),%ymm8 8294 vpalignr $12,%ymm7,%ymm7,%ymm7 8295 vpalignr $8,%ymm11,%ymm11,%ymm11 8296 vpalignr $4,%ymm15,%ymm15,%ymm15 8297 vpalignr $12,%ymm6,%ymm6,%ymm6 8298 vpalignr $8,%ymm10,%ymm10,%ymm10 8299 vpalignr $4,%ymm14,%ymm14,%ymm14 8300 vpalignr $12,%ymm5,%ymm5,%ymm5 8301 vpalignr $8,%ymm9,%ymm9,%ymm9 8302 movq 8+0+0(%rbp),%rdx 8303 mulxq %r10,%r10,%rax 8304 addq %r10,%r14 8305 mulxq %r11,%r11,%r9 8306 adcq %r11,%r15 8307 adcq $0,%r9 8308 imulq %r12,%rdx 8309 vpalignr $4,%ymm13,%ymm13,%ymm13 8310 vpalignr $12,%ymm4,%ymm4,%ymm4 8311 vpalignr $8,%ymm8,%ymm8,%ymm8 8312 vpalignr $4,%ymm12,%ymm12,%ymm12 8313 8314 8315 8316 8317 8318 8319 8320 8321 8322 8323 8324 8325 8326 8327 8328 8329 addq %rax,%r15 8330 adcq %rdx,%r9 8331 8332 8333 8334 8335 8336 8337 8338 8339 8340 8341 8342 8343 8344 8345 8346 8347 8348 8349 8350 8351 movq %r13,%r10 8352 movq %r14,%r11 8353 movq %r15,%r12 8354 andq $3,%r12 8355 movq %r15,%r13 8356 andq $-4,%r13 8357 movq %r9,%r14 8358 shrdq $2,%r9,%r15 8359 shrq $2,%r9 8360 addq %r13,%r15 8361 adcq %r14,%r9 8362 addq %r15,%r10 8363 adcq %r9,%r11 8364 adcq $0,%r12 8365 8366 leaq 32(%rdi),%rdi 8367 decq %rcx 8368 jg .Lseal_avx2_tail_512_rounds_and_3xhash 8369 decq %r8 8370 jge .Lseal_avx2_tail_512_rounds_and_2xhash 8371 vpaddd .Lchacha20_consts(%rip),%ymm3,%ymm3 8372 vpaddd 0+64(%rbp),%ymm7,%ymm7 8373 vpaddd 0+96(%rbp),%ymm11,%ymm11 8374 vpaddd 0+256(%rbp),%ymm15,%ymm15 8375 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 8376 vpaddd 0+64(%rbp),%ymm6,%ymm6 8377 vpaddd 0+96(%rbp),%ymm10,%ymm10 8378 vpaddd 0+224(%rbp),%ymm14,%ymm14 8379 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 8380 vpaddd 0+64(%rbp),%ymm5,%ymm5 8381 vpaddd 0+96(%rbp),%ymm9,%ymm9 8382 vpaddd 0+192(%rbp),%ymm13,%ymm13 8383 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 8384 vpaddd 0+64(%rbp),%ymm4,%ymm4 8385 vpaddd 0+96(%rbp),%ymm8,%ymm8 8386 vpaddd 0+160(%rbp),%ymm12,%ymm12 8387 8388 vmovdqa %ymm0,0+128(%rbp) 8389 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 8390 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 8391 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 8392 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 8393 vpxor 0+0(%rsi),%ymm0,%ymm0 8394 vpxor 32+0(%rsi),%ymm3,%ymm3 8395 vpxor 64+0(%rsi),%ymm7,%ymm7 8396 vpxor 96+0(%rsi),%ymm11,%ymm11 8397 vmovdqu %ymm0,0+0(%rdi) 8398 vmovdqu %ymm3,32+0(%rdi) 8399 vmovdqu %ymm7,64+0(%rdi) 8400 vmovdqu %ymm11,96+0(%rdi) 8401 8402 vmovdqa 0+128(%rbp),%ymm0 8403 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 8404 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 8405 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 8406 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 8407 vpxor 0+128(%rsi),%ymm3,%ymm3 8408 vpxor 32+128(%rsi),%ymm2,%ymm2 8409 vpxor 64+128(%rsi),%ymm6,%ymm6 8410 vpxor 96+128(%rsi),%ymm10,%ymm10 8411 vmovdqu %ymm3,0+128(%rdi) 8412 vmovdqu %ymm2,32+128(%rdi) 8413 vmovdqu %ymm6,64+128(%rdi) 8414 vmovdqu %ymm10,96+128(%rdi) 8415 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 8416 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 8417 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 8418 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 8419 vpxor 0+256(%rsi),%ymm3,%ymm3 8420 vpxor 32+256(%rsi),%ymm1,%ymm1 8421 vpxor 64+256(%rsi),%ymm5,%ymm5 8422 vpxor 96+256(%rsi),%ymm9,%ymm9 8423 vmovdqu %ymm3,0+256(%rdi) 8424 vmovdqu %ymm1,32+256(%rdi) 8425 vmovdqu %ymm5,64+256(%rdi) 8426 vmovdqu %ymm9,96+256(%rdi) 8427 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 8428 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 8429 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 8430 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 8431 vmovdqa %ymm3,%ymm8 8432 8433 movq $384,%rcx 8434 leaq 384(%rsi),%rsi 8435 subq $384,%rbx 8436 jmp .Lseal_avx2_short_hash_remainder 8437 8438.Lseal_avx2_320: 8439 vmovdqa %ymm0,%ymm1 8440 vmovdqa %ymm0,%ymm2 8441 vmovdqa %ymm4,%ymm5 8442 vmovdqa %ymm4,%ymm6 8443 vmovdqa %ymm8,%ymm9 8444 vmovdqa %ymm8,%ymm10 8445 vpaddd .Lavx2_inc(%rip),%ymm12,%ymm13 8446 vpaddd .Lavx2_inc(%rip),%ymm13,%ymm14 8447 vmovdqa %ymm4,%ymm7 8448 vmovdqa %ymm8,%ymm11 8449 vmovdqa %ymm12,0+160(%rbp) 8450 vmovdqa %ymm13,0+192(%rbp) 8451 vmovdqa %ymm14,0+224(%rbp) 8452 movq $10,%r10 8453.Lseal_avx2_320_rounds: 8454 vpaddd %ymm4,%ymm0,%ymm0 8455 vpxor %ymm0,%ymm12,%ymm12 8456 vpshufb .Lrol16(%rip),%ymm12,%ymm12 8457 vpaddd %ymm12,%ymm8,%ymm8 8458 vpxor %ymm8,%ymm4,%ymm4 8459 vpsrld $20,%ymm4,%ymm3 8460 vpslld $12,%ymm4,%ymm4 8461 vpxor %ymm3,%ymm4,%ymm4 8462 vpaddd %ymm4,%ymm0,%ymm0 8463 vpxor %ymm0,%ymm12,%ymm12 8464 vpshufb .Lrol8(%rip),%ymm12,%ymm12 8465 vpaddd %ymm12,%ymm8,%ymm8 8466 vpxor %ymm8,%ymm4,%ymm4 8467 vpslld $7,%ymm4,%ymm3 8468 vpsrld $25,%ymm4,%ymm4 8469 vpxor %ymm3,%ymm4,%ymm4 8470 vpalignr $12,%ymm12,%ymm12,%ymm12 8471 vpalignr $8,%ymm8,%ymm8,%ymm8 8472 vpalignr $4,%ymm4,%ymm4,%ymm4 8473 vpaddd %ymm5,%ymm1,%ymm1 8474 vpxor %ymm1,%ymm13,%ymm13 8475 vpshufb .Lrol16(%rip),%ymm13,%ymm13 8476 vpaddd %ymm13,%ymm9,%ymm9 8477 vpxor %ymm9,%ymm5,%ymm5 8478 vpsrld $20,%ymm5,%ymm3 8479 vpslld $12,%ymm5,%ymm5 8480 vpxor %ymm3,%ymm5,%ymm5 8481 vpaddd %ymm5,%ymm1,%ymm1 8482 vpxor %ymm1,%ymm13,%ymm13 8483 vpshufb .Lrol8(%rip),%ymm13,%ymm13 8484 vpaddd %ymm13,%ymm9,%ymm9 8485 vpxor %ymm9,%ymm5,%ymm5 8486 vpslld $7,%ymm5,%ymm3 8487 vpsrld $25,%ymm5,%ymm5 8488 vpxor %ymm3,%ymm5,%ymm5 8489 vpalignr $12,%ymm13,%ymm13,%ymm13 8490 vpalignr $8,%ymm9,%ymm9,%ymm9 8491 vpalignr $4,%ymm5,%ymm5,%ymm5 8492 vpaddd %ymm6,%ymm2,%ymm2 8493 vpxor %ymm2,%ymm14,%ymm14 8494 vpshufb .Lrol16(%rip),%ymm14,%ymm14 8495 vpaddd %ymm14,%ymm10,%ymm10 8496 vpxor %ymm10,%ymm6,%ymm6 8497 vpsrld $20,%ymm6,%ymm3 8498 vpslld $12,%ymm6,%ymm6 8499 vpxor %ymm3,%ymm6,%ymm6 8500 vpaddd %ymm6,%ymm2,%ymm2 8501 vpxor %ymm2,%ymm14,%ymm14 8502 vpshufb .Lrol8(%rip),%ymm14,%ymm14 8503 vpaddd %ymm14,%ymm10,%ymm10 8504 vpxor %ymm10,%ymm6,%ymm6 8505 vpslld $7,%ymm6,%ymm3 8506 vpsrld $25,%ymm6,%ymm6 8507 vpxor %ymm3,%ymm6,%ymm6 8508 vpalignr $12,%ymm14,%ymm14,%ymm14 8509 vpalignr $8,%ymm10,%ymm10,%ymm10 8510 vpalignr $4,%ymm6,%ymm6,%ymm6 8511 vpaddd %ymm4,%ymm0,%ymm0 8512 vpxor %ymm0,%ymm12,%ymm12 8513 vpshufb .Lrol16(%rip),%ymm12,%ymm12 8514 vpaddd %ymm12,%ymm8,%ymm8 8515 vpxor %ymm8,%ymm4,%ymm4 8516 vpsrld $20,%ymm4,%ymm3 8517 vpslld $12,%ymm4,%ymm4 8518 vpxor %ymm3,%ymm4,%ymm4 8519 vpaddd %ymm4,%ymm0,%ymm0 8520 vpxor %ymm0,%ymm12,%ymm12 8521 vpshufb .Lrol8(%rip),%ymm12,%ymm12 8522 vpaddd %ymm12,%ymm8,%ymm8 8523 vpxor %ymm8,%ymm4,%ymm4 8524 vpslld $7,%ymm4,%ymm3 8525 vpsrld $25,%ymm4,%ymm4 8526 vpxor %ymm3,%ymm4,%ymm4 8527 vpalignr $4,%ymm12,%ymm12,%ymm12 8528 vpalignr $8,%ymm8,%ymm8,%ymm8 8529 vpalignr $12,%ymm4,%ymm4,%ymm4 8530 vpaddd %ymm5,%ymm1,%ymm1 8531 vpxor %ymm1,%ymm13,%ymm13 8532 vpshufb .Lrol16(%rip),%ymm13,%ymm13 8533 vpaddd %ymm13,%ymm9,%ymm9 8534 vpxor %ymm9,%ymm5,%ymm5 8535 vpsrld $20,%ymm5,%ymm3 8536 vpslld $12,%ymm5,%ymm5 8537 vpxor %ymm3,%ymm5,%ymm5 8538 vpaddd %ymm5,%ymm1,%ymm1 8539 vpxor %ymm1,%ymm13,%ymm13 8540 vpshufb .Lrol8(%rip),%ymm13,%ymm13 8541 vpaddd %ymm13,%ymm9,%ymm9 8542 vpxor %ymm9,%ymm5,%ymm5 8543 vpslld $7,%ymm5,%ymm3 8544 vpsrld $25,%ymm5,%ymm5 8545 vpxor %ymm3,%ymm5,%ymm5 8546 vpalignr $4,%ymm13,%ymm13,%ymm13 8547 vpalignr $8,%ymm9,%ymm9,%ymm9 8548 vpalignr $12,%ymm5,%ymm5,%ymm5 8549 vpaddd %ymm6,%ymm2,%ymm2 8550 vpxor %ymm2,%ymm14,%ymm14 8551 vpshufb .Lrol16(%rip),%ymm14,%ymm14 8552 vpaddd %ymm14,%ymm10,%ymm10 8553 vpxor %ymm10,%ymm6,%ymm6 8554 vpsrld $20,%ymm6,%ymm3 8555 vpslld $12,%ymm6,%ymm6 8556 vpxor %ymm3,%ymm6,%ymm6 8557 vpaddd %ymm6,%ymm2,%ymm2 8558 vpxor %ymm2,%ymm14,%ymm14 8559 vpshufb .Lrol8(%rip),%ymm14,%ymm14 8560 vpaddd %ymm14,%ymm10,%ymm10 8561 vpxor %ymm10,%ymm6,%ymm6 8562 vpslld $7,%ymm6,%ymm3 8563 vpsrld $25,%ymm6,%ymm6 8564 vpxor %ymm3,%ymm6,%ymm6 8565 vpalignr $4,%ymm14,%ymm14,%ymm14 8566 vpalignr $8,%ymm10,%ymm10,%ymm10 8567 vpalignr $12,%ymm6,%ymm6,%ymm6 8568 8569 decq %r10 8570 jne .Lseal_avx2_320_rounds 8571 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 8572 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 8573 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 8574 vpaddd %ymm7,%ymm4,%ymm4 8575 vpaddd %ymm7,%ymm5,%ymm5 8576 vpaddd %ymm7,%ymm6,%ymm6 8577 vpaddd %ymm11,%ymm8,%ymm8 8578 vpaddd %ymm11,%ymm9,%ymm9 8579 vpaddd %ymm11,%ymm10,%ymm10 8580 vpaddd 0+160(%rbp),%ymm12,%ymm12 8581 vpaddd 0+192(%rbp),%ymm13,%ymm13 8582 vpaddd 0+224(%rbp),%ymm14,%ymm14 8583 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 8584 8585 vpand .Lclamp(%rip),%ymm3,%ymm3 8586 vmovdqa %ymm3,0+0(%rbp) 8587 8588 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 8589 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 8590 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 8591 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 8592 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 8593 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 8594 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 8595 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 8596 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 8597 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 8598 jmp .Lseal_avx2_short 8599 8600.Lseal_avx2_192: 8601 vmovdqa %ymm0,%ymm1 8602 vmovdqa %ymm0,%ymm2 8603 vmovdqa %ymm4,%ymm5 8604 vmovdqa %ymm4,%ymm6 8605 vmovdqa %ymm8,%ymm9 8606 vmovdqa %ymm8,%ymm10 8607 vpaddd .Lavx2_inc(%rip),%ymm12,%ymm13 8608 vmovdqa %ymm12,%ymm11 8609 vmovdqa %ymm13,%ymm15 8610 movq $10,%r10 8611.Lseal_avx2_192_rounds: 8612 vpaddd %ymm4,%ymm0,%ymm0 8613 vpxor %ymm0,%ymm12,%ymm12 8614 vpshufb .Lrol16(%rip),%ymm12,%ymm12 8615 vpaddd %ymm12,%ymm8,%ymm8 8616 vpxor %ymm8,%ymm4,%ymm4 8617 vpsrld $20,%ymm4,%ymm3 8618 vpslld $12,%ymm4,%ymm4 8619 vpxor %ymm3,%ymm4,%ymm4 8620 vpaddd %ymm4,%ymm0,%ymm0 8621 vpxor %ymm0,%ymm12,%ymm12 8622 vpshufb .Lrol8(%rip),%ymm12,%ymm12 8623 vpaddd %ymm12,%ymm8,%ymm8 8624 vpxor %ymm8,%ymm4,%ymm4 8625 vpslld $7,%ymm4,%ymm3 8626 vpsrld $25,%ymm4,%ymm4 8627 vpxor %ymm3,%ymm4,%ymm4 8628 vpalignr $12,%ymm12,%ymm12,%ymm12 8629 vpalignr $8,%ymm8,%ymm8,%ymm8 8630 vpalignr $4,%ymm4,%ymm4,%ymm4 8631 vpaddd %ymm5,%ymm1,%ymm1 8632 vpxor %ymm1,%ymm13,%ymm13 8633 vpshufb .Lrol16(%rip),%ymm13,%ymm13 8634 vpaddd %ymm13,%ymm9,%ymm9 8635 vpxor %ymm9,%ymm5,%ymm5 8636 vpsrld $20,%ymm5,%ymm3 8637 vpslld $12,%ymm5,%ymm5 8638 vpxor %ymm3,%ymm5,%ymm5 8639 vpaddd %ymm5,%ymm1,%ymm1 8640 vpxor %ymm1,%ymm13,%ymm13 8641 vpshufb .Lrol8(%rip),%ymm13,%ymm13 8642 vpaddd %ymm13,%ymm9,%ymm9 8643 vpxor %ymm9,%ymm5,%ymm5 8644 vpslld $7,%ymm5,%ymm3 8645 vpsrld $25,%ymm5,%ymm5 8646 vpxor %ymm3,%ymm5,%ymm5 8647 vpalignr $12,%ymm13,%ymm13,%ymm13 8648 vpalignr $8,%ymm9,%ymm9,%ymm9 8649 vpalignr $4,%ymm5,%ymm5,%ymm5 8650 vpaddd %ymm4,%ymm0,%ymm0 8651 vpxor %ymm0,%ymm12,%ymm12 8652 vpshufb .Lrol16(%rip),%ymm12,%ymm12 8653 vpaddd %ymm12,%ymm8,%ymm8 8654 vpxor %ymm8,%ymm4,%ymm4 8655 vpsrld $20,%ymm4,%ymm3 8656 vpslld $12,%ymm4,%ymm4 8657 vpxor %ymm3,%ymm4,%ymm4 8658 vpaddd %ymm4,%ymm0,%ymm0 8659 vpxor %ymm0,%ymm12,%ymm12 8660 vpshufb .Lrol8(%rip),%ymm12,%ymm12 8661 vpaddd %ymm12,%ymm8,%ymm8 8662 vpxor %ymm8,%ymm4,%ymm4 8663 vpslld $7,%ymm4,%ymm3 8664 vpsrld $25,%ymm4,%ymm4 8665 vpxor %ymm3,%ymm4,%ymm4 8666 vpalignr $4,%ymm12,%ymm12,%ymm12 8667 vpalignr $8,%ymm8,%ymm8,%ymm8 8668 vpalignr $12,%ymm4,%ymm4,%ymm4 8669 vpaddd %ymm5,%ymm1,%ymm1 8670 vpxor %ymm1,%ymm13,%ymm13 8671 vpshufb .Lrol16(%rip),%ymm13,%ymm13 8672 vpaddd %ymm13,%ymm9,%ymm9 8673 vpxor %ymm9,%ymm5,%ymm5 8674 vpsrld $20,%ymm5,%ymm3 8675 vpslld $12,%ymm5,%ymm5 8676 vpxor %ymm3,%ymm5,%ymm5 8677 vpaddd %ymm5,%ymm1,%ymm1 8678 vpxor %ymm1,%ymm13,%ymm13 8679 vpshufb .Lrol8(%rip),%ymm13,%ymm13 8680 vpaddd %ymm13,%ymm9,%ymm9 8681 vpxor %ymm9,%ymm5,%ymm5 8682 vpslld $7,%ymm5,%ymm3 8683 vpsrld $25,%ymm5,%ymm5 8684 vpxor %ymm3,%ymm5,%ymm5 8685 vpalignr $4,%ymm13,%ymm13,%ymm13 8686 vpalignr $8,%ymm9,%ymm9,%ymm9 8687 vpalignr $12,%ymm5,%ymm5,%ymm5 8688 8689 decq %r10 8690 jne .Lseal_avx2_192_rounds 8691 vpaddd %ymm2,%ymm0,%ymm0 8692 vpaddd %ymm2,%ymm1,%ymm1 8693 vpaddd %ymm6,%ymm4,%ymm4 8694 vpaddd %ymm6,%ymm5,%ymm5 8695 vpaddd %ymm10,%ymm8,%ymm8 8696 vpaddd %ymm10,%ymm9,%ymm9 8697 vpaddd %ymm11,%ymm12,%ymm12 8698 vpaddd %ymm15,%ymm13,%ymm13 8699 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 8700 8701 vpand .Lclamp(%rip),%ymm3,%ymm3 8702 vmovdqa %ymm3,0+0(%rbp) 8703 8704 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 8705 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 8706 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 8707 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 8708 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 8709 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 8710.Lseal_avx2_short: 8711 movq %r8,%r8 8712 call poly_hash_ad_internal 8713 xorq %rcx,%rcx 8714.Lseal_avx2_short_hash_remainder: 8715 cmpq $16,%rcx 8716 jb .Lseal_avx2_short_loop 8717 addq 0+0(%rdi),%r10 8718 adcq 8+0(%rdi),%r11 8719 adcq $1,%r12 8720 movq 0+0+0(%rbp),%rax 8721 movq %rax,%r15 8722 mulq %r10 8723 movq %rax,%r13 8724 movq %rdx,%r14 8725 movq 0+0+0(%rbp),%rax 8726 mulq %r11 8727 imulq %r12,%r15 8728 addq %rax,%r14 8729 adcq %rdx,%r15 8730 movq 8+0+0(%rbp),%rax 8731 movq %rax,%r9 8732 mulq %r10 8733 addq %rax,%r14 8734 adcq $0,%rdx 8735 movq %rdx,%r10 8736 movq 8+0+0(%rbp),%rax 8737 mulq %r11 8738 addq %rax,%r15 8739 adcq $0,%rdx 8740 imulq %r12,%r9 8741 addq %r10,%r15 8742 adcq %rdx,%r9 8743 movq %r13,%r10 8744 movq %r14,%r11 8745 movq %r15,%r12 8746 andq $3,%r12 8747 movq %r15,%r13 8748 andq $-4,%r13 8749 movq %r9,%r14 8750 shrdq $2,%r9,%r15 8751 shrq $2,%r9 8752 addq %r13,%r15 8753 adcq %r14,%r9 8754 addq %r15,%r10 8755 adcq %r9,%r11 8756 adcq $0,%r12 8757 8758 subq $16,%rcx 8759 addq $16,%rdi 8760 jmp .Lseal_avx2_short_hash_remainder 8761.Lseal_avx2_short_loop: 8762 cmpq $32,%rbx 8763 jb .Lseal_avx2_short_tail 8764 subq $32,%rbx 8765 8766 vpxor (%rsi),%ymm0,%ymm0 8767 vmovdqu %ymm0,(%rdi) 8768 leaq 32(%rsi),%rsi 8769 8770 addq 0+0(%rdi),%r10 8771 adcq 8+0(%rdi),%r11 8772 adcq $1,%r12 8773 movq 0+0+0(%rbp),%rax 8774 movq %rax,%r15 8775 mulq %r10 8776 movq %rax,%r13 8777 movq %rdx,%r14 8778 movq 0+0+0(%rbp),%rax 8779 mulq %r11 8780 imulq %r12,%r15 8781 addq %rax,%r14 8782 adcq %rdx,%r15 8783 movq 8+0+0(%rbp),%rax 8784 movq %rax,%r9 8785 mulq %r10 8786 addq %rax,%r14 8787 adcq $0,%rdx 8788 movq %rdx,%r10 8789 movq 8+0+0(%rbp),%rax 8790 mulq %r11 8791 addq %rax,%r15 8792 adcq $0,%rdx 8793 imulq %r12,%r9 8794 addq %r10,%r15 8795 adcq %rdx,%r9 8796 movq %r13,%r10 8797 movq %r14,%r11 8798 movq %r15,%r12 8799 andq $3,%r12 8800 movq %r15,%r13 8801 andq $-4,%r13 8802 movq %r9,%r14 8803 shrdq $2,%r9,%r15 8804 shrq $2,%r9 8805 addq %r13,%r15 8806 adcq %r14,%r9 8807 addq %r15,%r10 8808 adcq %r9,%r11 8809 adcq $0,%r12 8810 addq 0+16(%rdi),%r10 8811 adcq 8+16(%rdi),%r11 8812 adcq $1,%r12 8813 movq 0+0+0(%rbp),%rax 8814 movq %rax,%r15 8815 mulq %r10 8816 movq %rax,%r13 8817 movq %rdx,%r14 8818 movq 0+0+0(%rbp),%rax 8819 mulq %r11 8820 imulq %r12,%r15 8821 addq %rax,%r14 8822 adcq %rdx,%r15 8823 movq 8+0+0(%rbp),%rax 8824 movq %rax,%r9 8825 mulq %r10 8826 addq %rax,%r14 8827 adcq $0,%rdx 8828 movq %rdx,%r10 8829 movq 8+0+0(%rbp),%rax 8830 mulq %r11 8831 addq %rax,%r15 8832 adcq $0,%rdx 8833 imulq %r12,%r9 8834 addq %r10,%r15 8835 adcq %rdx,%r9 8836 movq %r13,%r10 8837 movq %r14,%r11 8838 movq %r15,%r12 8839 andq $3,%r12 8840 movq %r15,%r13 8841 andq $-4,%r13 8842 movq %r9,%r14 8843 shrdq $2,%r9,%r15 8844 shrq $2,%r9 8845 addq %r13,%r15 8846 adcq %r14,%r9 8847 addq %r15,%r10 8848 adcq %r9,%r11 8849 adcq $0,%r12 8850 8851 leaq 32(%rdi),%rdi 8852 8853 vmovdqa %ymm4,%ymm0 8854 vmovdqa %ymm8,%ymm4 8855 vmovdqa %ymm12,%ymm8 8856 vmovdqa %ymm1,%ymm12 8857 vmovdqa %ymm5,%ymm1 8858 vmovdqa %ymm9,%ymm5 8859 vmovdqa %ymm13,%ymm9 8860 vmovdqa %ymm2,%ymm13 8861 vmovdqa %ymm6,%ymm2 8862 jmp .Lseal_avx2_short_loop 8863.Lseal_avx2_short_tail: 8864 cmpq $16,%rbx 8865 jb .Lseal_avx2_exit 8866 subq $16,%rbx 8867 vpxor (%rsi),%xmm0,%xmm3 8868 vmovdqu %xmm3,(%rdi) 8869 leaq 16(%rsi),%rsi 8870 addq 0+0(%rdi),%r10 8871 adcq 8+0(%rdi),%r11 8872 adcq $1,%r12 8873 movq 0+0+0(%rbp),%rax 8874 movq %rax,%r15 8875 mulq %r10 8876 movq %rax,%r13 8877 movq %rdx,%r14 8878 movq 0+0+0(%rbp),%rax 8879 mulq %r11 8880 imulq %r12,%r15 8881 addq %rax,%r14 8882 adcq %rdx,%r15 8883 movq 8+0+0(%rbp),%rax 8884 movq %rax,%r9 8885 mulq %r10 8886 addq %rax,%r14 8887 adcq $0,%rdx 8888 movq %rdx,%r10 8889 movq 8+0+0(%rbp),%rax 8890 mulq %r11 8891 addq %rax,%r15 8892 adcq $0,%rdx 8893 imulq %r12,%r9 8894 addq %r10,%r15 8895 adcq %rdx,%r9 8896 movq %r13,%r10 8897 movq %r14,%r11 8898 movq %r15,%r12 8899 andq $3,%r12 8900 movq %r15,%r13 8901 andq $-4,%r13 8902 movq %r9,%r14 8903 shrdq $2,%r9,%r15 8904 shrq $2,%r9 8905 addq %r13,%r15 8906 adcq %r14,%r9 8907 addq %r15,%r10 8908 adcq %r9,%r11 8909 adcq $0,%r12 8910 8911 leaq 16(%rdi),%rdi 8912 vextracti128 $1,%ymm0,%xmm0 8913.Lseal_avx2_exit: 8914 vzeroupper 8915 jmp .Lseal_sse_tail_16 8916.cfi_endproc 8917.size chacha20_poly1305_seal_avx2, .-chacha20_poly1305_seal_avx2 8918#endif 8919