xref: /aosp_15_r20/external/cronet/third_party/boringssl/src/gen/crypto/chacha20_poly1305_x86_64-linux.S (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__)
7.text
8.extern	OPENSSL_ia32cap_P
9.hidden OPENSSL_ia32cap_P
10
11chacha20_poly1305_constants:
12
13.section	.rodata
14.align	64
15.Lchacha20_consts:
16.byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
17.byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
18.Lrol8:
19.byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
20.byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
21.Lrol16:
22.byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
23.byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
24.Lavx2_init:
25.long	0,0,0,0
26.Lsse_inc:
27.long	1,0,0,0
28.Lavx2_inc:
29.long	2,0,0,0,2,0,0,0
30.Lclamp:
31.quad	0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC
32.quad	0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
33.align	16
34.Land_masks:
35.byte	0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
36.byte	0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
37.byte	0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
38.byte	0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
39.byte	0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
40.byte	0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
41.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
42.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
43.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
44.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
45.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
46.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
47.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
48.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
49.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
50.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
51.text
52
53.type	poly_hash_ad_internal,@function
54.align	64
55poly_hash_ad_internal:
56.cfi_startproc
57.cfi_def_cfa	rsp, 8
58	xorq	%r10,%r10
59	xorq	%r11,%r11
60	xorq	%r12,%r12
61	cmpq	$13,%r8
62	jne	.Lhash_ad_loop
63.Lpoly_fast_tls_ad:
64
65	movq	(%rcx),%r10
66	movq	5(%rcx),%r11
67	shrq	$24,%r11
68	movq	$1,%r12
69	movq	0+0+0(%rbp),%rax
70	movq	%rax,%r15
71	mulq	%r10
72	movq	%rax,%r13
73	movq	%rdx,%r14
74	movq	0+0+0(%rbp),%rax
75	mulq	%r11
76	imulq	%r12,%r15
77	addq	%rax,%r14
78	adcq	%rdx,%r15
79	movq	8+0+0(%rbp),%rax
80	movq	%rax,%r9
81	mulq	%r10
82	addq	%rax,%r14
83	adcq	$0,%rdx
84	movq	%rdx,%r10
85	movq	8+0+0(%rbp),%rax
86	mulq	%r11
87	addq	%rax,%r15
88	adcq	$0,%rdx
89	imulq	%r12,%r9
90	addq	%r10,%r15
91	adcq	%rdx,%r9
92	movq	%r13,%r10
93	movq	%r14,%r11
94	movq	%r15,%r12
95	andq	$3,%r12
96	movq	%r15,%r13
97	andq	$-4,%r13
98	movq	%r9,%r14
99	shrdq	$2,%r9,%r15
100	shrq	$2,%r9
101	addq	%r13,%r15
102	adcq	%r14,%r9
103	addq	%r15,%r10
104	adcq	%r9,%r11
105	adcq	$0,%r12
106
107	ret
108.Lhash_ad_loop:
109
110	cmpq	$16,%r8
111	jb	.Lhash_ad_tail
112	addq	0+0(%rcx),%r10
113	adcq	8+0(%rcx),%r11
114	adcq	$1,%r12
115	movq	0+0+0(%rbp),%rax
116	movq	%rax,%r15
117	mulq	%r10
118	movq	%rax,%r13
119	movq	%rdx,%r14
120	movq	0+0+0(%rbp),%rax
121	mulq	%r11
122	imulq	%r12,%r15
123	addq	%rax,%r14
124	adcq	%rdx,%r15
125	movq	8+0+0(%rbp),%rax
126	movq	%rax,%r9
127	mulq	%r10
128	addq	%rax,%r14
129	adcq	$0,%rdx
130	movq	%rdx,%r10
131	movq	8+0+0(%rbp),%rax
132	mulq	%r11
133	addq	%rax,%r15
134	adcq	$0,%rdx
135	imulq	%r12,%r9
136	addq	%r10,%r15
137	adcq	%rdx,%r9
138	movq	%r13,%r10
139	movq	%r14,%r11
140	movq	%r15,%r12
141	andq	$3,%r12
142	movq	%r15,%r13
143	andq	$-4,%r13
144	movq	%r9,%r14
145	shrdq	$2,%r9,%r15
146	shrq	$2,%r9
147	addq	%r13,%r15
148	adcq	%r14,%r9
149	addq	%r15,%r10
150	adcq	%r9,%r11
151	adcq	$0,%r12
152
153	leaq	16(%rcx),%rcx
154	subq	$16,%r8
155	jmp	.Lhash_ad_loop
156.Lhash_ad_tail:
157	cmpq	$0,%r8
158	je	.Lhash_ad_done
159
160	xorq	%r13,%r13
161	xorq	%r14,%r14
162	xorq	%r15,%r15
163	addq	%r8,%rcx
164.Lhash_ad_tail_loop:
165	shldq	$8,%r13,%r14
166	shlq	$8,%r13
167	movzbq	-1(%rcx),%r15
168	xorq	%r15,%r13
169	decq	%rcx
170	decq	%r8
171	jne	.Lhash_ad_tail_loop
172
173	addq	%r13,%r10
174	adcq	%r14,%r11
175	adcq	$1,%r12
176	movq	0+0+0(%rbp),%rax
177	movq	%rax,%r15
178	mulq	%r10
179	movq	%rax,%r13
180	movq	%rdx,%r14
181	movq	0+0+0(%rbp),%rax
182	mulq	%r11
183	imulq	%r12,%r15
184	addq	%rax,%r14
185	adcq	%rdx,%r15
186	movq	8+0+0(%rbp),%rax
187	movq	%rax,%r9
188	mulq	%r10
189	addq	%rax,%r14
190	adcq	$0,%rdx
191	movq	%rdx,%r10
192	movq	8+0+0(%rbp),%rax
193	mulq	%r11
194	addq	%rax,%r15
195	adcq	$0,%rdx
196	imulq	%r12,%r9
197	addq	%r10,%r15
198	adcq	%rdx,%r9
199	movq	%r13,%r10
200	movq	%r14,%r11
201	movq	%r15,%r12
202	andq	$3,%r12
203	movq	%r15,%r13
204	andq	$-4,%r13
205	movq	%r9,%r14
206	shrdq	$2,%r9,%r15
207	shrq	$2,%r9
208	addq	%r13,%r15
209	adcq	%r14,%r9
210	addq	%r15,%r10
211	adcq	%r9,%r11
212	adcq	$0,%r12
213
214
215.Lhash_ad_done:
216	ret
217.cfi_endproc
218.size	poly_hash_ad_internal, .-poly_hash_ad_internal
219
220.globl	chacha20_poly1305_open
221.hidden chacha20_poly1305_open
222.type	chacha20_poly1305_open,@function
223.align	64
224chacha20_poly1305_open:
225.cfi_startproc
226_CET_ENDBR
227	pushq	%rbp
228.cfi_adjust_cfa_offset	8
229.cfi_offset	%rbp,-16
230	pushq	%rbx
231.cfi_adjust_cfa_offset	8
232.cfi_offset	%rbx,-24
233	pushq	%r12
234.cfi_adjust_cfa_offset	8
235.cfi_offset	%r12,-32
236	pushq	%r13
237.cfi_adjust_cfa_offset	8
238.cfi_offset	%r13,-40
239	pushq	%r14
240.cfi_adjust_cfa_offset	8
241.cfi_offset	%r14,-48
242	pushq	%r15
243.cfi_adjust_cfa_offset	8
244.cfi_offset	%r15,-56
245
246
247	pushq	%r9
248.cfi_adjust_cfa_offset	8
249.cfi_offset	%r9,-64
250	subq	$288 + 0 + 32,%rsp
251.cfi_adjust_cfa_offset	288 + 32
252
253	leaq	32(%rsp),%rbp
254	andq	$-32,%rbp
255
256	movq	%rdx,%rbx
257	movq	%r8,0+0+32(%rbp)
258	movq	%rbx,8+0+32(%rbp)
259
260	movl	OPENSSL_ia32cap_P+8(%rip),%eax
261	andl	$288,%eax
262	xorl	$288,%eax
263	jz	chacha20_poly1305_open_avx2
264
265	cmpq	$128,%rbx
266	jbe	.Lopen_sse_128
267
268	movdqa	.Lchacha20_consts(%rip),%xmm0
269	movdqu	0(%r9),%xmm4
270	movdqu	16(%r9),%xmm8
271	movdqu	32(%r9),%xmm12
272
273	movdqa	%xmm12,%xmm7
274
275	movdqa	%xmm4,0+48(%rbp)
276	movdqa	%xmm8,0+64(%rbp)
277	movdqa	%xmm12,0+96(%rbp)
278	movq	$10,%r10
279.Lopen_sse_init_rounds:
280	paddd	%xmm4,%xmm0
281	pxor	%xmm0,%xmm12
282	pshufb	.Lrol16(%rip),%xmm12
283	paddd	%xmm12,%xmm8
284	pxor	%xmm8,%xmm4
285	movdqa	%xmm4,%xmm3
286	pslld	$12,%xmm3
287	psrld	$20,%xmm4
288	pxor	%xmm3,%xmm4
289	paddd	%xmm4,%xmm0
290	pxor	%xmm0,%xmm12
291	pshufb	.Lrol8(%rip),%xmm12
292	paddd	%xmm12,%xmm8
293	pxor	%xmm8,%xmm4
294	movdqa	%xmm4,%xmm3
295	pslld	$7,%xmm3
296	psrld	$25,%xmm4
297	pxor	%xmm3,%xmm4
298.byte	102,15,58,15,228,4
299.byte	102,69,15,58,15,192,8
300.byte	102,69,15,58,15,228,12
301	paddd	%xmm4,%xmm0
302	pxor	%xmm0,%xmm12
303	pshufb	.Lrol16(%rip),%xmm12
304	paddd	%xmm12,%xmm8
305	pxor	%xmm8,%xmm4
306	movdqa	%xmm4,%xmm3
307	pslld	$12,%xmm3
308	psrld	$20,%xmm4
309	pxor	%xmm3,%xmm4
310	paddd	%xmm4,%xmm0
311	pxor	%xmm0,%xmm12
312	pshufb	.Lrol8(%rip),%xmm12
313	paddd	%xmm12,%xmm8
314	pxor	%xmm8,%xmm4
315	movdqa	%xmm4,%xmm3
316	pslld	$7,%xmm3
317	psrld	$25,%xmm4
318	pxor	%xmm3,%xmm4
319.byte	102,15,58,15,228,12
320.byte	102,69,15,58,15,192,8
321.byte	102,69,15,58,15,228,4
322
323	decq	%r10
324	jne	.Lopen_sse_init_rounds
325
326	paddd	.Lchacha20_consts(%rip),%xmm0
327	paddd	0+48(%rbp),%xmm4
328
329	pand	.Lclamp(%rip),%xmm0
330	movdqa	%xmm0,0+0(%rbp)
331	movdqa	%xmm4,0+16(%rbp)
332
333	movq	%r8,%r8
334	call	poly_hash_ad_internal
335.Lopen_sse_main_loop:
336	cmpq	$256,%rbx
337	jb	.Lopen_sse_tail
338
339	movdqa	.Lchacha20_consts(%rip),%xmm0
340	movdqa	0+48(%rbp),%xmm4
341	movdqa	0+64(%rbp),%xmm8
342	movdqa	%xmm0,%xmm1
343	movdqa	%xmm4,%xmm5
344	movdqa	%xmm8,%xmm9
345	movdqa	%xmm0,%xmm2
346	movdqa	%xmm4,%xmm6
347	movdqa	%xmm8,%xmm10
348	movdqa	%xmm0,%xmm3
349	movdqa	%xmm4,%xmm7
350	movdqa	%xmm8,%xmm11
351	movdqa	0+96(%rbp),%xmm15
352	paddd	.Lsse_inc(%rip),%xmm15
353	movdqa	%xmm15,%xmm14
354	paddd	.Lsse_inc(%rip),%xmm14
355	movdqa	%xmm14,%xmm13
356	paddd	.Lsse_inc(%rip),%xmm13
357	movdqa	%xmm13,%xmm12
358	paddd	.Lsse_inc(%rip),%xmm12
359	movdqa	%xmm12,0+96(%rbp)
360	movdqa	%xmm13,0+112(%rbp)
361	movdqa	%xmm14,0+128(%rbp)
362	movdqa	%xmm15,0+144(%rbp)
363
364
365
366	movq	$4,%rcx
367	movq	%rsi,%r8
368.Lopen_sse_main_loop_rounds:
369	movdqa	%xmm8,0+80(%rbp)
370	movdqa	.Lrol16(%rip),%xmm8
371	paddd	%xmm7,%xmm3
372	paddd	%xmm6,%xmm2
373	paddd	%xmm5,%xmm1
374	paddd	%xmm4,%xmm0
375	pxor	%xmm3,%xmm15
376	pxor	%xmm2,%xmm14
377	pxor	%xmm1,%xmm13
378	pxor	%xmm0,%xmm12
379.byte	102,69,15,56,0,248
380.byte	102,69,15,56,0,240
381.byte	102,69,15,56,0,232
382.byte	102,69,15,56,0,224
383	movdqa	0+80(%rbp),%xmm8
384	paddd	%xmm15,%xmm11
385	paddd	%xmm14,%xmm10
386	paddd	%xmm13,%xmm9
387	paddd	%xmm12,%xmm8
388	pxor	%xmm11,%xmm7
389	addq	0+0(%r8),%r10
390	adcq	8+0(%r8),%r11
391	adcq	$1,%r12
392
393	leaq	16(%r8),%r8
394	pxor	%xmm10,%xmm6
395	pxor	%xmm9,%xmm5
396	pxor	%xmm8,%xmm4
397	movdqa	%xmm8,0+80(%rbp)
398	movdqa	%xmm7,%xmm8
399	psrld	$20,%xmm8
400	pslld	$32-20,%xmm7
401	pxor	%xmm8,%xmm7
402	movdqa	%xmm6,%xmm8
403	psrld	$20,%xmm8
404	pslld	$32-20,%xmm6
405	pxor	%xmm8,%xmm6
406	movdqa	%xmm5,%xmm8
407	psrld	$20,%xmm8
408	pslld	$32-20,%xmm5
409	pxor	%xmm8,%xmm5
410	movdqa	%xmm4,%xmm8
411	psrld	$20,%xmm8
412	pslld	$32-20,%xmm4
413	pxor	%xmm8,%xmm4
414	movq	0+0+0(%rbp),%rax
415	movq	%rax,%r15
416	mulq	%r10
417	movq	%rax,%r13
418	movq	%rdx,%r14
419	movq	0+0+0(%rbp),%rax
420	mulq	%r11
421	imulq	%r12,%r15
422	addq	%rax,%r14
423	adcq	%rdx,%r15
424	movdqa	.Lrol8(%rip),%xmm8
425	paddd	%xmm7,%xmm3
426	paddd	%xmm6,%xmm2
427	paddd	%xmm5,%xmm1
428	paddd	%xmm4,%xmm0
429	pxor	%xmm3,%xmm15
430	pxor	%xmm2,%xmm14
431	pxor	%xmm1,%xmm13
432	pxor	%xmm0,%xmm12
433.byte	102,69,15,56,0,248
434.byte	102,69,15,56,0,240
435.byte	102,69,15,56,0,232
436.byte	102,69,15,56,0,224
437	movdqa	0+80(%rbp),%xmm8
438	paddd	%xmm15,%xmm11
439	paddd	%xmm14,%xmm10
440	paddd	%xmm13,%xmm9
441	paddd	%xmm12,%xmm8
442	pxor	%xmm11,%xmm7
443	pxor	%xmm10,%xmm6
444	movq	8+0+0(%rbp),%rax
445	movq	%rax,%r9
446	mulq	%r10
447	addq	%rax,%r14
448	adcq	$0,%rdx
449	movq	%rdx,%r10
450	movq	8+0+0(%rbp),%rax
451	mulq	%r11
452	addq	%rax,%r15
453	adcq	$0,%rdx
454	pxor	%xmm9,%xmm5
455	pxor	%xmm8,%xmm4
456	movdqa	%xmm8,0+80(%rbp)
457	movdqa	%xmm7,%xmm8
458	psrld	$25,%xmm8
459	pslld	$32-25,%xmm7
460	pxor	%xmm8,%xmm7
461	movdqa	%xmm6,%xmm8
462	psrld	$25,%xmm8
463	pslld	$32-25,%xmm6
464	pxor	%xmm8,%xmm6
465	movdqa	%xmm5,%xmm8
466	psrld	$25,%xmm8
467	pslld	$32-25,%xmm5
468	pxor	%xmm8,%xmm5
469	movdqa	%xmm4,%xmm8
470	psrld	$25,%xmm8
471	pslld	$32-25,%xmm4
472	pxor	%xmm8,%xmm4
473	movdqa	0+80(%rbp),%xmm8
474	imulq	%r12,%r9
475	addq	%r10,%r15
476	adcq	%rdx,%r9
477.byte	102,15,58,15,255,4
478.byte	102,69,15,58,15,219,8
479.byte	102,69,15,58,15,255,12
480.byte	102,15,58,15,246,4
481.byte	102,69,15,58,15,210,8
482.byte	102,69,15,58,15,246,12
483.byte	102,15,58,15,237,4
484.byte	102,69,15,58,15,201,8
485.byte	102,69,15,58,15,237,12
486.byte	102,15,58,15,228,4
487.byte	102,69,15,58,15,192,8
488.byte	102,69,15,58,15,228,12
489	movdqa	%xmm8,0+80(%rbp)
490	movdqa	.Lrol16(%rip),%xmm8
491	paddd	%xmm7,%xmm3
492	paddd	%xmm6,%xmm2
493	paddd	%xmm5,%xmm1
494	paddd	%xmm4,%xmm0
495	pxor	%xmm3,%xmm15
496	pxor	%xmm2,%xmm14
497	movq	%r13,%r10
498	movq	%r14,%r11
499	movq	%r15,%r12
500	andq	$3,%r12
501	movq	%r15,%r13
502	andq	$-4,%r13
503	movq	%r9,%r14
504	shrdq	$2,%r9,%r15
505	shrq	$2,%r9
506	addq	%r13,%r15
507	adcq	%r14,%r9
508	addq	%r15,%r10
509	adcq	%r9,%r11
510	adcq	$0,%r12
511	pxor	%xmm1,%xmm13
512	pxor	%xmm0,%xmm12
513.byte	102,69,15,56,0,248
514.byte	102,69,15,56,0,240
515.byte	102,69,15,56,0,232
516.byte	102,69,15,56,0,224
517	movdqa	0+80(%rbp),%xmm8
518	paddd	%xmm15,%xmm11
519	paddd	%xmm14,%xmm10
520	paddd	%xmm13,%xmm9
521	paddd	%xmm12,%xmm8
522	pxor	%xmm11,%xmm7
523	pxor	%xmm10,%xmm6
524	pxor	%xmm9,%xmm5
525	pxor	%xmm8,%xmm4
526	movdqa	%xmm8,0+80(%rbp)
527	movdqa	%xmm7,%xmm8
528	psrld	$20,%xmm8
529	pslld	$32-20,%xmm7
530	pxor	%xmm8,%xmm7
531	movdqa	%xmm6,%xmm8
532	psrld	$20,%xmm8
533	pslld	$32-20,%xmm6
534	pxor	%xmm8,%xmm6
535	movdqa	%xmm5,%xmm8
536	psrld	$20,%xmm8
537	pslld	$32-20,%xmm5
538	pxor	%xmm8,%xmm5
539	movdqa	%xmm4,%xmm8
540	psrld	$20,%xmm8
541	pslld	$32-20,%xmm4
542	pxor	%xmm8,%xmm4
543	movdqa	.Lrol8(%rip),%xmm8
544	paddd	%xmm7,%xmm3
545	paddd	%xmm6,%xmm2
546	paddd	%xmm5,%xmm1
547	paddd	%xmm4,%xmm0
548	pxor	%xmm3,%xmm15
549	pxor	%xmm2,%xmm14
550	pxor	%xmm1,%xmm13
551	pxor	%xmm0,%xmm12
552.byte	102,69,15,56,0,248
553.byte	102,69,15,56,0,240
554.byte	102,69,15,56,0,232
555.byte	102,69,15,56,0,224
556	movdqa	0+80(%rbp),%xmm8
557	paddd	%xmm15,%xmm11
558	paddd	%xmm14,%xmm10
559	paddd	%xmm13,%xmm9
560	paddd	%xmm12,%xmm8
561	pxor	%xmm11,%xmm7
562	pxor	%xmm10,%xmm6
563	pxor	%xmm9,%xmm5
564	pxor	%xmm8,%xmm4
565	movdqa	%xmm8,0+80(%rbp)
566	movdqa	%xmm7,%xmm8
567	psrld	$25,%xmm8
568	pslld	$32-25,%xmm7
569	pxor	%xmm8,%xmm7
570	movdqa	%xmm6,%xmm8
571	psrld	$25,%xmm8
572	pslld	$32-25,%xmm6
573	pxor	%xmm8,%xmm6
574	movdqa	%xmm5,%xmm8
575	psrld	$25,%xmm8
576	pslld	$32-25,%xmm5
577	pxor	%xmm8,%xmm5
578	movdqa	%xmm4,%xmm8
579	psrld	$25,%xmm8
580	pslld	$32-25,%xmm4
581	pxor	%xmm8,%xmm4
582	movdqa	0+80(%rbp),%xmm8
583.byte	102,15,58,15,255,12
584.byte	102,69,15,58,15,219,8
585.byte	102,69,15,58,15,255,4
586.byte	102,15,58,15,246,12
587.byte	102,69,15,58,15,210,8
588.byte	102,69,15,58,15,246,4
589.byte	102,15,58,15,237,12
590.byte	102,69,15,58,15,201,8
591.byte	102,69,15,58,15,237,4
592.byte	102,15,58,15,228,12
593.byte	102,69,15,58,15,192,8
594.byte	102,69,15,58,15,228,4
595
596	decq	%rcx
597	jge	.Lopen_sse_main_loop_rounds
598	addq	0+0(%r8),%r10
599	adcq	8+0(%r8),%r11
600	adcq	$1,%r12
601	movq	0+0+0(%rbp),%rax
602	movq	%rax,%r15
603	mulq	%r10
604	movq	%rax,%r13
605	movq	%rdx,%r14
606	movq	0+0+0(%rbp),%rax
607	mulq	%r11
608	imulq	%r12,%r15
609	addq	%rax,%r14
610	adcq	%rdx,%r15
611	movq	8+0+0(%rbp),%rax
612	movq	%rax,%r9
613	mulq	%r10
614	addq	%rax,%r14
615	adcq	$0,%rdx
616	movq	%rdx,%r10
617	movq	8+0+0(%rbp),%rax
618	mulq	%r11
619	addq	%rax,%r15
620	adcq	$0,%rdx
621	imulq	%r12,%r9
622	addq	%r10,%r15
623	adcq	%rdx,%r9
624	movq	%r13,%r10
625	movq	%r14,%r11
626	movq	%r15,%r12
627	andq	$3,%r12
628	movq	%r15,%r13
629	andq	$-4,%r13
630	movq	%r9,%r14
631	shrdq	$2,%r9,%r15
632	shrq	$2,%r9
633	addq	%r13,%r15
634	adcq	%r14,%r9
635	addq	%r15,%r10
636	adcq	%r9,%r11
637	adcq	$0,%r12
638
639	leaq	16(%r8),%r8
640	cmpq	$-6,%rcx
641	jg	.Lopen_sse_main_loop_rounds
642	paddd	.Lchacha20_consts(%rip),%xmm3
643	paddd	0+48(%rbp),%xmm7
644	paddd	0+64(%rbp),%xmm11
645	paddd	0+144(%rbp),%xmm15
646	paddd	.Lchacha20_consts(%rip),%xmm2
647	paddd	0+48(%rbp),%xmm6
648	paddd	0+64(%rbp),%xmm10
649	paddd	0+128(%rbp),%xmm14
650	paddd	.Lchacha20_consts(%rip),%xmm1
651	paddd	0+48(%rbp),%xmm5
652	paddd	0+64(%rbp),%xmm9
653	paddd	0+112(%rbp),%xmm13
654	paddd	.Lchacha20_consts(%rip),%xmm0
655	paddd	0+48(%rbp),%xmm4
656	paddd	0+64(%rbp),%xmm8
657	paddd	0+96(%rbp),%xmm12
658	movdqa	%xmm12,0+80(%rbp)
659	movdqu	0 + 0(%rsi),%xmm12
660	pxor	%xmm3,%xmm12
661	movdqu	%xmm12,0 + 0(%rdi)
662	movdqu	16 + 0(%rsi),%xmm12
663	pxor	%xmm7,%xmm12
664	movdqu	%xmm12,16 + 0(%rdi)
665	movdqu	32 + 0(%rsi),%xmm12
666	pxor	%xmm11,%xmm12
667	movdqu	%xmm12,32 + 0(%rdi)
668	movdqu	48 + 0(%rsi),%xmm12
669	pxor	%xmm15,%xmm12
670	movdqu	%xmm12,48 + 0(%rdi)
671	movdqu	0 + 64(%rsi),%xmm3
672	movdqu	16 + 64(%rsi),%xmm7
673	movdqu	32 + 64(%rsi),%xmm11
674	movdqu	48 + 64(%rsi),%xmm15
675	pxor	%xmm3,%xmm2
676	pxor	%xmm7,%xmm6
677	pxor	%xmm11,%xmm10
678	pxor	%xmm14,%xmm15
679	movdqu	%xmm2,0 + 64(%rdi)
680	movdqu	%xmm6,16 + 64(%rdi)
681	movdqu	%xmm10,32 + 64(%rdi)
682	movdqu	%xmm15,48 + 64(%rdi)
683	movdqu	0 + 128(%rsi),%xmm3
684	movdqu	16 + 128(%rsi),%xmm7
685	movdqu	32 + 128(%rsi),%xmm11
686	movdqu	48 + 128(%rsi),%xmm15
687	pxor	%xmm3,%xmm1
688	pxor	%xmm7,%xmm5
689	pxor	%xmm11,%xmm9
690	pxor	%xmm13,%xmm15
691	movdqu	%xmm1,0 + 128(%rdi)
692	movdqu	%xmm5,16 + 128(%rdi)
693	movdqu	%xmm9,32 + 128(%rdi)
694	movdqu	%xmm15,48 + 128(%rdi)
695	movdqu	0 + 192(%rsi),%xmm3
696	movdqu	16 + 192(%rsi),%xmm7
697	movdqu	32 + 192(%rsi),%xmm11
698	movdqu	48 + 192(%rsi),%xmm15
699	pxor	%xmm3,%xmm0
700	pxor	%xmm7,%xmm4
701	pxor	%xmm11,%xmm8
702	pxor	0+80(%rbp),%xmm15
703	movdqu	%xmm0,0 + 192(%rdi)
704	movdqu	%xmm4,16 + 192(%rdi)
705	movdqu	%xmm8,32 + 192(%rdi)
706	movdqu	%xmm15,48 + 192(%rdi)
707
708	leaq	256(%rsi),%rsi
709	leaq	256(%rdi),%rdi
710	subq	$256,%rbx
711	jmp	.Lopen_sse_main_loop
712.Lopen_sse_tail:
713
714	testq	%rbx,%rbx
715	jz	.Lopen_sse_finalize
716	cmpq	$192,%rbx
717	ja	.Lopen_sse_tail_256
718	cmpq	$128,%rbx
719	ja	.Lopen_sse_tail_192
720	cmpq	$64,%rbx
721	ja	.Lopen_sse_tail_128
722	movdqa	.Lchacha20_consts(%rip),%xmm0
723	movdqa	0+48(%rbp),%xmm4
724	movdqa	0+64(%rbp),%xmm8
725	movdqa	0+96(%rbp),%xmm12
726	paddd	.Lsse_inc(%rip),%xmm12
727	movdqa	%xmm12,0+96(%rbp)
728
729	xorq	%r8,%r8
730	movq	%rbx,%rcx
731	cmpq	$16,%rcx
732	jb	.Lopen_sse_tail_64_rounds
733.Lopen_sse_tail_64_rounds_and_x1hash:
734	addq	0+0(%rsi,%r8,1),%r10
735	adcq	8+0(%rsi,%r8,1),%r11
736	adcq	$1,%r12
737	movq	0+0+0(%rbp),%rax
738	movq	%rax,%r15
739	mulq	%r10
740	movq	%rax,%r13
741	movq	%rdx,%r14
742	movq	0+0+0(%rbp),%rax
743	mulq	%r11
744	imulq	%r12,%r15
745	addq	%rax,%r14
746	adcq	%rdx,%r15
747	movq	8+0+0(%rbp),%rax
748	movq	%rax,%r9
749	mulq	%r10
750	addq	%rax,%r14
751	adcq	$0,%rdx
752	movq	%rdx,%r10
753	movq	8+0+0(%rbp),%rax
754	mulq	%r11
755	addq	%rax,%r15
756	adcq	$0,%rdx
757	imulq	%r12,%r9
758	addq	%r10,%r15
759	adcq	%rdx,%r9
760	movq	%r13,%r10
761	movq	%r14,%r11
762	movq	%r15,%r12
763	andq	$3,%r12
764	movq	%r15,%r13
765	andq	$-4,%r13
766	movq	%r9,%r14
767	shrdq	$2,%r9,%r15
768	shrq	$2,%r9
769	addq	%r13,%r15
770	adcq	%r14,%r9
771	addq	%r15,%r10
772	adcq	%r9,%r11
773	adcq	$0,%r12
774
775	subq	$16,%rcx
776.Lopen_sse_tail_64_rounds:
777	addq	$16,%r8
778	paddd	%xmm4,%xmm0
779	pxor	%xmm0,%xmm12
780	pshufb	.Lrol16(%rip),%xmm12
781	paddd	%xmm12,%xmm8
782	pxor	%xmm8,%xmm4
783	movdqa	%xmm4,%xmm3
784	pslld	$12,%xmm3
785	psrld	$20,%xmm4
786	pxor	%xmm3,%xmm4
787	paddd	%xmm4,%xmm0
788	pxor	%xmm0,%xmm12
789	pshufb	.Lrol8(%rip),%xmm12
790	paddd	%xmm12,%xmm8
791	pxor	%xmm8,%xmm4
792	movdqa	%xmm4,%xmm3
793	pslld	$7,%xmm3
794	psrld	$25,%xmm4
795	pxor	%xmm3,%xmm4
796.byte	102,15,58,15,228,4
797.byte	102,69,15,58,15,192,8
798.byte	102,69,15,58,15,228,12
799	paddd	%xmm4,%xmm0
800	pxor	%xmm0,%xmm12
801	pshufb	.Lrol16(%rip),%xmm12
802	paddd	%xmm12,%xmm8
803	pxor	%xmm8,%xmm4
804	movdqa	%xmm4,%xmm3
805	pslld	$12,%xmm3
806	psrld	$20,%xmm4
807	pxor	%xmm3,%xmm4
808	paddd	%xmm4,%xmm0
809	pxor	%xmm0,%xmm12
810	pshufb	.Lrol8(%rip),%xmm12
811	paddd	%xmm12,%xmm8
812	pxor	%xmm8,%xmm4
813	movdqa	%xmm4,%xmm3
814	pslld	$7,%xmm3
815	psrld	$25,%xmm4
816	pxor	%xmm3,%xmm4
817.byte	102,15,58,15,228,12
818.byte	102,69,15,58,15,192,8
819.byte	102,69,15,58,15,228,4
820
821	cmpq	$16,%rcx
822	jae	.Lopen_sse_tail_64_rounds_and_x1hash
823	cmpq	$160,%r8
824	jne	.Lopen_sse_tail_64_rounds
825	paddd	.Lchacha20_consts(%rip),%xmm0
826	paddd	0+48(%rbp),%xmm4
827	paddd	0+64(%rbp),%xmm8
828	paddd	0+96(%rbp),%xmm12
829
830	jmp	.Lopen_sse_tail_64_dec_loop
831
832.Lopen_sse_tail_128:
833	movdqa	.Lchacha20_consts(%rip),%xmm0
834	movdqa	0+48(%rbp),%xmm4
835	movdqa	0+64(%rbp),%xmm8
836	movdqa	%xmm0,%xmm1
837	movdqa	%xmm4,%xmm5
838	movdqa	%xmm8,%xmm9
839	movdqa	0+96(%rbp),%xmm13
840	paddd	.Lsse_inc(%rip),%xmm13
841	movdqa	%xmm13,%xmm12
842	paddd	.Lsse_inc(%rip),%xmm12
843	movdqa	%xmm12,0+96(%rbp)
844	movdqa	%xmm13,0+112(%rbp)
845
846	movq	%rbx,%rcx
847	andq	$-16,%rcx
848	xorq	%r8,%r8
849.Lopen_sse_tail_128_rounds_and_x1hash:
850	addq	0+0(%rsi,%r8,1),%r10
851	adcq	8+0(%rsi,%r8,1),%r11
852	adcq	$1,%r12
853	movq	0+0+0(%rbp),%rax
854	movq	%rax,%r15
855	mulq	%r10
856	movq	%rax,%r13
857	movq	%rdx,%r14
858	movq	0+0+0(%rbp),%rax
859	mulq	%r11
860	imulq	%r12,%r15
861	addq	%rax,%r14
862	adcq	%rdx,%r15
863	movq	8+0+0(%rbp),%rax
864	movq	%rax,%r9
865	mulq	%r10
866	addq	%rax,%r14
867	adcq	$0,%rdx
868	movq	%rdx,%r10
869	movq	8+0+0(%rbp),%rax
870	mulq	%r11
871	addq	%rax,%r15
872	adcq	$0,%rdx
873	imulq	%r12,%r9
874	addq	%r10,%r15
875	adcq	%rdx,%r9
876	movq	%r13,%r10
877	movq	%r14,%r11
878	movq	%r15,%r12
879	andq	$3,%r12
880	movq	%r15,%r13
881	andq	$-4,%r13
882	movq	%r9,%r14
883	shrdq	$2,%r9,%r15
884	shrq	$2,%r9
885	addq	%r13,%r15
886	adcq	%r14,%r9
887	addq	%r15,%r10
888	adcq	%r9,%r11
889	adcq	$0,%r12
890
891.Lopen_sse_tail_128_rounds:
892	addq	$16,%r8
893	paddd	%xmm4,%xmm0
894	pxor	%xmm0,%xmm12
895	pshufb	.Lrol16(%rip),%xmm12
896	paddd	%xmm12,%xmm8
897	pxor	%xmm8,%xmm4
898	movdqa	%xmm4,%xmm3
899	pslld	$12,%xmm3
900	psrld	$20,%xmm4
901	pxor	%xmm3,%xmm4
902	paddd	%xmm4,%xmm0
903	pxor	%xmm0,%xmm12
904	pshufb	.Lrol8(%rip),%xmm12
905	paddd	%xmm12,%xmm8
906	pxor	%xmm8,%xmm4
907	movdqa	%xmm4,%xmm3
908	pslld	$7,%xmm3
909	psrld	$25,%xmm4
910	pxor	%xmm3,%xmm4
911.byte	102,15,58,15,228,4
912.byte	102,69,15,58,15,192,8
913.byte	102,69,15,58,15,228,12
914	paddd	%xmm5,%xmm1
915	pxor	%xmm1,%xmm13
916	pshufb	.Lrol16(%rip),%xmm13
917	paddd	%xmm13,%xmm9
918	pxor	%xmm9,%xmm5
919	movdqa	%xmm5,%xmm3
920	pslld	$12,%xmm3
921	psrld	$20,%xmm5
922	pxor	%xmm3,%xmm5
923	paddd	%xmm5,%xmm1
924	pxor	%xmm1,%xmm13
925	pshufb	.Lrol8(%rip),%xmm13
926	paddd	%xmm13,%xmm9
927	pxor	%xmm9,%xmm5
928	movdqa	%xmm5,%xmm3
929	pslld	$7,%xmm3
930	psrld	$25,%xmm5
931	pxor	%xmm3,%xmm5
932.byte	102,15,58,15,237,4
933.byte	102,69,15,58,15,201,8
934.byte	102,69,15,58,15,237,12
935	paddd	%xmm4,%xmm0
936	pxor	%xmm0,%xmm12
937	pshufb	.Lrol16(%rip),%xmm12
938	paddd	%xmm12,%xmm8
939	pxor	%xmm8,%xmm4
940	movdqa	%xmm4,%xmm3
941	pslld	$12,%xmm3
942	psrld	$20,%xmm4
943	pxor	%xmm3,%xmm4
944	paddd	%xmm4,%xmm0
945	pxor	%xmm0,%xmm12
946	pshufb	.Lrol8(%rip),%xmm12
947	paddd	%xmm12,%xmm8
948	pxor	%xmm8,%xmm4
949	movdqa	%xmm4,%xmm3
950	pslld	$7,%xmm3
951	psrld	$25,%xmm4
952	pxor	%xmm3,%xmm4
953.byte	102,15,58,15,228,12
954.byte	102,69,15,58,15,192,8
955.byte	102,69,15,58,15,228,4
956	paddd	%xmm5,%xmm1
957	pxor	%xmm1,%xmm13
958	pshufb	.Lrol16(%rip),%xmm13
959	paddd	%xmm13,%xmm9
960	pxor	%xmm9,%xmm5
961	movdqa	%xmm5,%xmm3
962	pslld	$12,%xmm3
963	psrld	$20,%xmm5
964	pxor	%xmm3,%xmm5
965	paddd	%xmm5,%xmm1
966	pxor	%xmm1,%xmm13
967	pshufb	.Lrol8(%rip),%xmm13
968	paddd	%xmm13,%xmm9
969	pxor	%xmm9,%xmm5
970	movdqa	%xmm5,%xmm3
971	pslld	$7,%xmm3
972	psrld	$25,%xmm5
973	pxor	%xmm3,%xmm5
974.byte	102,15,58,15,237,12
975.byte	102,69,15,58,15,201,8
976.byte	102,69,15,58,15,237,4
977
978	cmpq	%rcx,%r8
979	jb	.Lopen_sse_tail_128_rounds_and_x1hash
980	cmpq	$160,%r8
981	jne	.Lopen_sse_tail_128_rounds
982	paddd	.Lchacha20_consts(%rip),%xmm1
983	paddd	0+48(%rbp),%xmm5
984	paddd	0+64(%rbp),%xmm9
985	paddd	0+112(%rbp),%xmm13
986	paddd	.Lchacha20_consts(%rip),%xmm0
987	paddd	0+48(%rbp),%xmm4
988	paddd	0+64(%rbp),%xmm8
989	paddd	0+96(%rbp),%xmm12
990	movdqu	0 + 0(%rsi),%xmm3
991	movdqu	16 + 0(%rsi),%xmm7
992	movdqu	32 + 0(%rsi),%xmm11
993	movdqu	48 + 0(%rsi),%xmm15
994	pxor	%xmm3,%xmm1
995	pxor	%xmm7,%xmm5
996	pxor	%xmm11,%xmm9
997	pxor	%xmm13,%xmm15
998	movdqu	%xmm1,0 + 0(%rdi)
999	movdqu	%xmm5,16 + 0(%rdi)
1000	movdqu	%xmm9,32 + 0(%rdi)
1001	movdqu	%xmm15,48 + 0(%rdi)
1002
1003	subq	$64,%rbx
1004	leaq	64(%rsi),%rsi
1005	leaq	64(%rdi),%rdi
1006	jmp	.Lopen_sse_tail_64_dec_loop
1007
1008.Lopen_sse_tail_192:
1009	movdqa	.Lchacha20_consts(%rip),%xmm0
1010	movdqa	0+48(%rbp),%xmm4
1011	movdqa	0+64(%rbp),%xmm8
1012	movdqa	%xmm0,%xmm1
1013	movdqa	%xmm4,%xmm5
1014	movdqa	%xmm8,%xmm9
1015	movdqa	%xmm0,%xmm2
1016	movdqa	%xmm4,%xmm6
1017	movdqa	%xmm8,%xmm10
1018	movdqa	0+96(%rbp),%xmm14
1019	paddd	.Lsse_inc(%rip),%xmm14
1020	movdqa	%xmm14,%xmm13
1021	paddd	.Lsse_inc(%rip),%xmm13
1022	movdqa	%xmm13,%xmm12
1023	paddd	.Lsse_inc(%rip),%xmm12
1024	movdqa	%xmm12,0+96(%rbp)
1025	movdqa	%xmm13,0+112(%rbp)
1026	movdqa	%xmm14,0+128(%rbp)
1027
1028	movq	%rbx,%rcx
1029	movq	$160,%r8
1030	cmpq	$160,%rcx
1031	cmovgq	%r8,%rcx
1032	andq	$-16,%rcx
1033	xorq	%r8,%r8
1034.Lopen_sse_tail_192_rounds_and_x1hash:
1035	addq	0+0(%rsi,%r8,1),%r10
1036	adcq	8+0(%rsi,%r8,1),%r11
1037	adcq	$1,%r12
1038	movq	0+0+0(%rbp),%rax
1039	movq	%rax,%r15
1040	mulq	%r10
1041	movq	%rax,%r13
1042	movq	%rdx,%r14
1043	movq	0+0+0(%rbp),%rax
1044	mulq	%r11
1045	imulq	%r12,%r15
1046	addq	%rax,%r14
1047	adcq	%rdx,%r15
1048	movq	8+0+0(%rbp),%rax
1049	movq	%rax,%r9
1050	mulq	%r10
1051	addq	%rax,%r14
1052	adcq	$0,%rdx
1053	movq	%rdx,%r10
1054	movq	8+0+0(%rbp),%rax
1055	mulq	%r11
1056	addq	%rax,%r15
1057	adcq	$0,%rdx
1058	imulq	%r12,%r9
1059	addq	%r10,%r15
1060	adcq	%rdx,%r9
1061	movq	%r13,%r10
1062	movq	%r14,%r11
1063	movq	%r15,%r12
1064	andq	$3,%r12
1065	movq	%r15,%r13
1066	andq	$-4,%r13
1067	movq	%r9,%r14
1068	shrdq	$2,%r9,%r15
1069	shrq	$2,%r9
1070	addq	%r13,%r15
1071	adcq	%r14,%r9
1072	addq	%r15,%r10
1073	adcq	%r9,%r11
1074	adcq	$0,%r12
1075
1076.Lopen_sse_tail_192_rounds:
1077	addq	$16,%r8
1078	paddd	%xmm4,%xmm0
1079	pxor	%xmm0,%xmm12
1080	pshufb	.Lrol16(%rip),%xmm12
1081	paddd	%xmm12,%xmm8
1082	pxor	%xmm8,%xmm4
1083	movdqa	%xmm4,%xmm3
1084	pslld	$12,%xmm3
1085	psrld	$20,%xmm4
1086	pxor	%xmm3,%xmm4
1087	paddd	%xmm4,%xmm0
1088	pxor	%xmm0,%xmm12
1089	pshufb	.Lrol8(%rip),%xmm12
1090	paddd	%xmm12,%xmm8
1091	pxor	%xmm8,%xmm4
1092	movdqa	%xmm4,%xmm3
1093	pslld	$7,%xmm3
1094	psrld	$25,%xmm4
1095	pxor	%xmm3,%xmm4
1096.byte	102,15,58,15,228,4
1097.byte	102,69,15,58,15,192,8
1098.byte	102,69,15,58,15,228,12
1099	paddd	%xmm5,%xmm1
1100	pxor	%xmm1,%xmm13
1101	pshufb	.Lrol16(%rip),%xmm13
1102	paddd	%xmm13,%xmm9
1103	pxor	%xmm9,%xmm5
1104	movdqa	%xmm5,%xmm3
1105	pslld	$12,%xmm3
1106	psrld	$20,%xmm5
1107	pxor	%xmm3,%xmm5
1108	paddd	%xmm5,%xmm1
1109	pxor	%xmm1,%xmm13
1110	pshufb	.Lrol8(%rip),%xmm13
1111	paddd	%xmm13,%xmm9
1112	pxor	%xmm9,%xmm5
1113	movdqa	%xmm5,%xmm3
1114	pslld	$7,%xmm3
1115	psrld	$25,%xmm5
1116	pxor	%xmm3,%xmm5
1117.byte	102,15,58,15,237,4
1118.byte	102,69,15,58,15,201,8
1119.byte	102,69,15,58,15,237,12
1120	paddd	%xmm6,%xmm2
1121	pxor	%xmm2,%xmm14
1122	pshufb	.Lrol16(%rip),%xmm14
1123	paddd	%xmm14,%xmm10
1124	pxor	%xmm10,%xmm6
1125	movdqa	%xmm6,%xmm3
1126	pslld	$12,%xmm3
1127	psrld	$20,%xmm6
1128	pxor	%xmm3,%xmm6
1129	paddd	%xmm6,%xmm2
1130	pxor	%xmm2,%xmm14
1131	pshufb	.Lrol8(%rip),%xmm14
1132	paddd	%xmm14,%xmm10
1133	pxor	%xmm10,%xmm6
1134	movdqa	%xmm6,%xmm3
1135	pslld	$7,%xmm3
1136	psrld	$25,%xmm6
1137	pxor	%xmm3,%xmm6
1138.byte	102,15,58,15,246,4
1139.byte	102,69,15,58,15,210,8
1140.byte	102,69,15,58,15,246,12
1141	paddd	%xmm4,%xmm0
1142	pxor	%xmm0,%xmm12
1143	pshufb	.Lrol16(%rip),%xmm12
1144	paddd	%xmm12,%xmm8
1145	pxor	%xmm8,%xmm4
1146	movdqa	%xmm4,%xmm3
1147	pslld	$12,%xmm3
1148	psrld	$20,%xmm4
1149	pxor	%xmm3,%xmm4
1150	paddd	%xmm4,%xmm0
1151	pxor	%xmm0,%xmm12
1152	pshufb	.Lrol8(%rip),%xmm12
1153	paddd	%xmm12,%xmm8
1154	pxor	%xmm8,%xmm4
1155	movdqa	%xmm4,%xmm3
1156	pslld	$7,%xmm3
1157	psrld	$25,%xmm4
1158	pxor	%xmm3,%xmm4
1159.byte	102,15,58,15,228,12
1160.byte	102,69,15,58,15,192,8
1161.byte	102,69,15,58,15,228,4
1162	paddd	%xmm5,%xmm1
1163	pxor	%xmm1,%xmm13
1164	pshufb	.Lrol16(%rip),%xmm13
1165	paddd	%xmm13,%xmm9
1166	pxor	%xmm9,%xmm5
1167	movdqa	%xmm5,%xmm3
1168	pslld	$12,%xmm3
1169	psrld	$20,%xmm5
1170	pxor	%xmm3,%xmm5
1171	paddd	%xmm5,%xmm1
1172	pxor	%xmm1,%xmm13
1173	pshufb	.Lrol8(%rip),%xmm13
1174	paddd	%xmm13,%xmm9
1175	pxor	%xmm9,%xmm5
1176	movdqa	%xmm5,%xmm3
1177	pslld	$7,%xmm3
1178	psrld	$25,%xmm5
1179	pxor	%xmm3,%xmm5
1180.byte	102,15,58,15,237,12
1181.byte	102,69,15,58,15,201,8
1182.byte	102,69,15,58,15,237,4
1183	paddd	%xmm6,%xmm2
1184	pxor	%xmm2,%xmm14
1185	pshufb	.Lrol16(%rip),%xmm14
1186	paddd	%xmm14,%xmm10
1187	pxor	%xmm10,%xmm6
1188	movdqa	%xmm6,%xmm3
1189	pslld	$12,%xmm3
1190	psrld	$20,%xmm6
1191	pxor	%xmm3,%xmm6
1192	paddd	%xmm6,%xmm2
1193	pxor	%xmm2,%xmm14
1194	pshufb	.Lrol8(%rip),%xmm14
1195	paddd	%xmm14,%xmm10
1196	pxor	%xmm10,%xmm6
1197	movdqa	%xmm6,%xmm3
1198	pslld	$7,%xmm3
1199	psrld	$25,%xmm6
1200	pxor	%xmm3,%xmm6
1201.byte	102,15,58,15,246,12
1202.byte	102,69,15,58,15,210,8
1203.byte	102,69,15,58,15,246,4
1204
1205	cmpq	%rcx,%r8
1206	jb	.Lopen_sse_tail_192_rounds_and_x1hash
1207	cmpq	$160,%r8
1208	jne	.Lopen_sse_tail_192_rounds
1209	cmpq	$176,%rbx
1210	jb	.Lopen_sse_tail_192_finish
1211	addq	0+160(%rsi),%r10
1212	adcq	8+160(%rsi),%r11
1213	adcq	$1,%r12
1214	movq	0+0+0(%rbp),%rax
1215	movq	%rax,%r15
1216	mulq	%r10
1217	movq	%rax,%r13
1218	movq	%rdx,%r14
1219	movq	0+0+0(%rbp),%rax
1220	mulq	%r11
1221	imulq	%r12,%r15
1222	addq	%rax,%r14
1223	adcq	%rdx,%r15
1224	movq	8+0+0(%rbp),%rax
1225	movq	%rax,%r9
1226	mulq	%r10
1227	addq	%rax,%r14
1228	adcq	$0,%rdx
1229	movq	%rdx,%r10
1230	movq	8+0+0(%rbp),%rax
1231	mulq	%r11
1232	addq	%rax,%r15
1233	adcq	$0,%rdx
1234	imulq	%r12,%r9
1235	addq	%r10,%r15
1236	adcq	%rdx,%r9
1237	movq	%r13,%r10
1238	movq	%r14,%r11
1239	movq	%r15,%r12
1240	andq	$3,%r12
1241	movq	%r15,%r13
1242	andq	$-4,%r13
1243	movq	%r9,%r14
1244	shrdq	$2,%r9,%r15
1245	shrq	$2,%r9
1246	addq	%r13,%r15
1247	adcq	%r14,%r9
1248	addq	%r15,%r10
1249	adcq	%r9,%r11
1250	adcq	$0,%r12
1251
1252	cmpq	$192,%rbx
1253	jb	.Lopen_sse_tail_192_finish
1254	addq	0+176(%rsi),%r10
1255	adcq	8+176(%rsi),%r11
1256	adcq	$1,%r12
1257	movq	0+0+0(%rbp),%rax
1258	movq	%rax,%r15
1259	mulq	%r10
1260	movq	%rax,%r13
1261	movq	%rdx,%r14
1262	movq	0+0+0(%rbp),%rax
1263	mulq	%r11
1264	imulq	%r12,%r15
1265	addq	%rax,%r14
1266	adcq	%rdx,%r15
1267	movq	8+0+0(%rbp),%rax
1268	movq	%rax,%r9
1269	mulq	%r10
1270	addq	%rax,%r14
1271	adcq	$0,%rdx
1272	movq	%rdx,%r10
1273	movq	8+0+0(%rbp),%rax
1274	mulq	%r11
1275	addq	%rax,%r15
1276	adcq	$0,%rdx
1277	imulq	%r12,%r9
1278	addq	%r10,%r15
1279	adcq	%rdx,%r9
1280	movq	%r13,%r10
1281	movq	%r14,%r11
1282	movq	%r15,%r12
1283	andq	$3,%r12
1284	movq	%r15,%r13
1285	andq	$-4,%r13
1286	movq	%r9,%r14
1287	shrdq	$2,%r9,%r15
1288	shrq	$2,%r9
1289	addq	%r13,%r15
1290	adcq	%r14,%r9
1291	addq	%r15,%r10
1292	adcq	%r9,%r11
1293	adcq	$0,%r12
1294
1295.Lopen_sse_tail_192_finish:
1296	paddd	.Lchacha20_consts(%rip),%xmm2
1297	paddd	0+48(%rbp),%xmm6
1298	paddd	0+64(%rbp),%xmm10
1299	paddd	0+128(%rbp),%xmm14
1300	paddd	.Lchacha20_consts(%rip),%xmm1
1301	paddd	0+48(%rbp),%xmm5
1302	paddd	0+64(%rbp),%xmm9
1303	paddd	0+112(%rbp),%xmm13
1304	paddd	.Lchacha20_consts(%rip),%xmm0
1305	paddd	0+48(%rbp),%xmm4
1306	paddd	0+64(%rbp),%xmm8
1307	paddd	0+96(%rbp),%xmm12
1308	movdqu	0 + 0(%rsi),%xmm3
1309	movdqu	16 + 0(%rsi),%xmm7
1310	movdqu	32 + 0(%rsi),%xmm11
1311	movdqu	48 + 0(%rsi),%xmm15
1312	pxor	%xmm3,%xmm2
1313	pxor	%xmm7,%xmm6
1314	pxor	%xmm11,%xmm10
1315	pxor	%xmm14,%xmm15
1316	movdqu	%xmm2,0 + 0(%rdi)
1317	movdqu	%xmm6,16 + 0(%rdi)
1318	movdqu	%xmm10,32 + 0(%rdi)
1319	movdqu	%xmm15,48 + 0(%rdi)
1320	movdqu	0 + 64(%rsi),%xmm3
1321	movdqu	16 + 64(%rsi),%xmm7
1322	movdqu	32 + 64(%rsi),%xmm11
1323	movdqu	48 + 64(%rsi),%xmm15
1324	pxor	%xmm3,%xmm1
1325	pxor	%xmm7,%xmm5
1326	pxor	%xmm11,%xmm9
1327	pxor	%xmm13,%xmm15
1328	movdqu	%xmm1,0 + 64(%rdi)
1329	movdqu	%xmm5,16 + 64(%rdi)
1330	movdqu	%xmm9,32 + 64(%rdi)
1331	movdqu	%xmm15,48 + 64(%rdi)
1332
1333	subq	$128,%rbx
1334	leaq	128(%rsi),%rsi
1335	leaq	128(%rdi),%rdi
1336	jmp	.Lopen_sse_tail_64_dec_loop
1337
1338.Lopen_sse_tail_256:
1339	movdqa	.Lchacha20_consts(%rip),%xmm0
1340	movdqa	0+48(%rbp),%xmm4
1341	movdqa	0+64(%rbp),%xmm8
1342	movdqa	%xmm0,%xmm1
1343	movdqa	%xmm4,%xmm5
1344	movdqa	%xmm8,%xmm9
1345	movdqa	%xmm0,%xmm2
1346	movdqa	%xmm4,%xmm6
1347	movdqa	%xmm8,%xmm10
1348	movdqa	%xmm0,%xmm3
1349	movdqa	%xmm4,%xmm7
1350	movdqa	%xmm8,%xmm11
1351	movdqa	0+96(%rbp),%xmm15
1352	paddd	.Lsse_inc(%rip),%xmm15
1353	movdqa	%xmm15,%xmm14
1354	paddd	.Lsse_inc(%rip),%xmm14
1355	movdqa	%xmm14,%xmm13
1356	paddd	.Lsse_inc(%rip),%xmm13
1357	movdqa	%xmm13,%xmm12
1358	paddd	.Lsse_inc(%rip),%xmm12
1359	movdqa	%xmm12,0+96(%rbp)
1360	movdqa	%xmm13,0+112(%rbp)
1361	movdqa	%xmm14,0+128(%rbp)
1362	movdqa	%xmm15,0+144(%rbp)
1363
1364	xorq	%r8,%r8
1365.Lopen_sse_tail_256_rounds_and_x1hash:
1366	addq	0+0(%rsi,%r8,1),%r10
1367	adcq	8+0(%rsi,%r8,1),%r11
1368	adcq	$1,%r12
1369	movdqa	%xmm11,0+80(%rbp)
1370	paddd	%xmm4,%xmm0
1371	pxor	%xmm0,%xmm12
1372	pshufb	.Lrol16(%rip),%xmm12
1373	paddd	%xmm12,%xmm8
1374	pxor	%xmm8,%xmm4
1375	movdqa	%xmm4,%xmm11
1376	pslld	$12,%xmm11
1377	psrld	$20,%xmm4
1378	pxor	%xmm11,%xmm4
1379	paddd	%xmm4,%xmm0
1380	pxor	%xmm0,%xmm12
1381	pshufb	.Lrol8(%rip),%xmm12
1382	paddd	%xmm12,%xmm8
1383	pxor	%xmm8,%xmm4
1384	movdqa	%xmm4,%xmm11
1385	pslld	$7,%xmm11
1386	psrld	$25,%xmm4
1387	pxor	%xmm11,%xmm4
1388.byte	102,15,58,15,228,4
1389.byte	102,69,15,58,15,192,8
1390.byte	102,69,15,58,15,228,12
1391	paddd	%xmm5,%xmm1
1392	pxor	%xmm1,%xmm13
1393	pshufb	.Lrol16(%rip),%xmm13
1394	paddd	%xmm13,%xmm9
1395	pxor	%xmm9,%xmm5
1396	movdqa	%xmm5,%xmm11
1397	pslld	$12,%xmm11
1398	psrld	$20,%xmm5
1399	pxor	%xmm11,%xmm5
1400	paddd	%xmm5,%xmm1
1401	pxor	%xmm1,%xmm13
1402	pshufb	.Lrol8(%rip),%xmm13
1403	paddd	%xmm13,%xmm9
1404	pxor	%xmm9,%xmm5
1405	movdqa	%xmm5,%xmm11
1406	pslld	$7,%xmm11
1407	psrld	$25,%xmm5
1408	pxor	%xmm11,%xmm5
1409.byte	102,15,58,15,237,4
1410.byte	102,69,15,58,15,201,8
1411.byte	102,69,15,58,15,237,12
1412	paddd	%xmm6,%xmm2
1413	pxor	%xmm2,%xmm14
1414	pshufb	.Lrol16(%rip),%xmm14
1415	paddd	%xmm14,%xmm10
1416	pxor	%xmm10,%xmm6
1417	movdqa	%xmm6,%xmm11
1418	pslld	$12,%xmm11
1419	psrld	$20,%xmm6
1420	pxor	%xmm11,%xmm6
1421	paddd	%xmm6,%xmm2
1422	pxor	%xmm2,%xmm14
1423	pshufb	.Lrol8(%rip),%xmm14
1424	paddd	%xmm14,%xmm10
1425	pxor	%xmm10,%xmm6
1426	movdqa	%xmm6,%xmm11
1427	pslld	$7,%xmm11
1428	psrld	$25,%xmm6
1429	pxor	%xmm11,%xmm6
1430.byte	102,15,58,15,246,4
1431.byte	102,69,15,58,15,210,8
1432.byte	102,69,15,58,15,246,12
1433	movdqa	0+80(%rbp),%xmm11
1434	movq	0+0+0(%rbp),%rax
1435	movq	%rax,%r15
1436	mulq	%r10
1437	movq	%rax,%r13
1438	movq	%rdx,%r14
1439	movq	0+0+0(%rbp),%rax
1440	mulq	%r11
1441	imulq	%r12,%r15
1442	addq	%rax,%r14
1443	adcq	%rdx,%r15
1444	movdqa	%xmm9,0+80(%rbp)
1445	paddd	%xmm7,%xmm3
1446	pxor	%xmm3,%xmm15
1447	pshufb	.Lrol16(%rip),%xmm15
1448	paddd	%xmm15,%xmm11
1449	pxor	%xmm11,%xmm7
1450	movdqa	%xmm7,%xmm9
1451	pslld	$12,%xmm9
1452	psrld	$20,%xmm7
1453	pxor	%xmm9,%xmm7
1454	paddd	%xmm7,%xmm3
1455	pxor	%xmm3,%xmm15
1456	pshufb	.Lrol8(%rip),%xmm15
1457	paddd	%xmm15,%xmm11
1458	pxor	%xmm11,%xmm7
1459	movdqa	%xmm7,%xmm9
1460	pslld	$7,%xmm9
1461	psrld	$25,%xmm7
1462	pxor	%xmm9,%xmm7
1463.byte	102,15,58,15,255,4
1464.byte	102,69,15,58,15,219,8
1465.byte	102,69,15,58,15,255,12
1466	movdqa	0+80(%rbp),%xmm9
1467	movq	8+0+0(%rbp),%rax
1468	movq	%rax,%r9
1469	mulq	%r10
1470	addq	%rax,%r14
1471	adcq	$0,%rdx
1472	movq	%rdx,%r10
1473	movq	8+0+0(%rbp),%rax
1474	mulq	%r11
1475	addq	%rax,%r15
1476	adcq	$0,%rdx
1477	movdqa	%xmm11,0+80(%rbp)
1478	paddd	%xmm4,%xmm0
1479	pxor	%xmm0,%xmm12
1480	pshufb	.Lrol16(%rip),%xmm12
1481	paddd	%xmm12,%xmm8
1482	pxor	%xmm8,%xmm4
1483	movdqa	%xmm4,%xmm11
1484	pslld	$12,%xmm11
1485	psrld	$20,%xmm4
1486	pxor	%xmm11,%xmm4
1487	paddd	%xmm4,%xmm0
1488	pxor	%xmm0,%xmm12
1489	pshufb	.Lrol8(%rip),%xmm12
1490	paddd	%xmm12,%xmm8
1491	pxor	%xmm8,%xmm4
1492	movdqa	%xmm4,%xmm11
1493	pslld	$7,%xmm11
1494	psrld	$25,%xmm4
1495	pxor	%xmm11,%xmm4
1496.byte	102,15,58,15,228,12
1497.byte	102,69,15,58,15,192,8
1498.byte	102,69,15,58,15,228,4
1499	paddd	%xmm5,%xmm1
1500	pxor	%xmm1,%xmm13
1501	pshufb	.Lrol16(%rip),%xmm13
1502	paddd	%xmm13,%xmm9
1503	pxor	%xmm9,%xmm5
1504	movdqa	%xmm5,%xmm11
1505	pslld	$12,%xmm11
1506	psrld	$20,%xmm5
1507	pxor	%xmm11,%xmm5
1508	paddd	%xmm5,%xmm1
1509	pxor	%xmm1,%xmm13
1510	pshufb	.Lrol8(%rip),%xmm13
1511	paddd	%xmm13,%xmm9
1512	pxor	%xmm9,%xmm5
1513	movdqa	%xmm5,%xmm11
1514	pslld	$7,%xmm11
1515	psrld	$25,%xmm5
1516	pxor	%xmm11,%xmm5
1517.byte	102,15,58,15,237,12
1518.byte	102,69,15,58,15,201,8
1519.byte	102,69,15,58,15,237,4
1520	imulq	%r12,%r9
1521	addq	%r10,%r15
1522	adcq	%rdx,%r9
1523	paddd	%xmm6,%xmm2
1524	pxor	%xmm2,%xmm14
1525	pshufb	.Lrol16(%rip),%xmm14
1526	paddd	%xmm14,%xmm10
1527	pxor	%xmm10,%xmm6
1528	movdqa	%xmm6,%xmm11
1529	pslld	$12,%xmm11
1530	psrld	$20,%xmm6
1531	pxor	%xmm11,%xmm6
1532	paddd	%xmm6,%xmm2
1533	pxor	%xmm2,%xmm14
1534	pshufb	.Lrol8(%rip),%xmm14
1535	paddd	%xmm14,%xmm10
1536	pxor	%xmm10,%xmm6
1537	movdqa	%xmm6,%xmm11
1538	pslld	$7,%xmm11
1539	psrld	$25,%xmm6
1540	pxor	%xmm11,%xmm6
1541.byte	102,15,58,15,246,12
1542.byte	102,69,15,58,15,210,8
1543.byte	102,69,15,58,15,246,4
1544	movdqa	0+80(%rbp),%xmm11
1545	movq	%r13,%r10
1546	movq	%r14,%r11
1547	movq	%r15,%r12
1548	andq	$3,%r12
1549	movq	%r15,%r13
1550	andq	$-4,%r13
1551	movq	%r9,%r14
1552	shrdq	$2,%r9,%r15
1553	shrq	$2,%r9
1554	addq	%r13,%r15
1555	adcq	%r14,%r9
1556	addq	%r15,%r10
1557	adcq	%r9,%r11
1558	adcq	$0,%r12
1559	movdqa	%xmm9,0+80(%rbp)
1560	paddd	%xmm7,%xmm3
1561	pxor	%xmm3,%xmm15
1562	pshufb	.Lrol16(%rip),%xmm15
1563	paddd	%xmm15,%xmm11
1564	pxor	%xmm11,%xmm7
1565	movdqa	%xmm7,%xmm9
1566	pslld	$12,%xmm9
1567	psrld	$20,%xmm7
1568	pxor	%xmm9,%xmm7
1569	paddd	%xmm7,%xmm3
1570	pxor	%xmm3,%xmm15
1571	pshufb	.Lrol8(%rip),%xmm15
1572	paddd	%xmm15,%xmm11
1573	pxor	%xmm11,%xmm7
1574	movdqa	%xmm7,%xmm9
1575	pslld	$7,%xmm9
1576	psrld	$25,%xmm7
1577	pxor	%xmm9,%xmm7
1578.byte	102,15,58,15,255,12
1579.byte	102,69,15,58,15,219,8
1580.byte	102,69,15,58,15,255,4
1581	movdqa	0+80(%rbp),%xmm9
1582
1583	addq	$16,%r8
1584	cmpq	$160,%r8
1585	jb	.Lopen_sse_tail_256_rounds_and_x1hash
1586
1587	movq	%rbx,%rcx
1588	andq	$-16,%rcx
1589.Lopen_sse_tail_256_hash:
1590	addq	0+0(%rsi,%r8,1),%r10
1591	adcq	8+0(%rsi,%r8,1),%r11
1592	adcq	$1,%r12
1593	movq	0+0+0(%rbp),%rax
1594	movq	%rax,%r15
1595	mulq	%r10
1596	movq	%rax,%r13
1597	movq	%rdx,%r14
1598	movq	0+0+0(%rbp),%rax
1599	mulq	%r11
1600	imulq	%r12,%r15
1601	addq	%rax,%r14
1602	adcq	%rdx,%r15
1603	movq	8+0+0(%rbp),%rax
1604	movq	%rax,%r9
1605	mulq	%r10
1606	addq	%rax,%r14
1607	adcq	$0,%rdx
1608	movq	%rdx,%r10
1609	movq	8+0+0(%rbp),%rax
1610	mulq	%r11
1611	addq	%rax,%r15
1612	adcq	$0,%rdx
1613	imulq	%r12,%r9
1614	addq	%r10,%r15
1615	adcq	%rdx,%r9
1616	movq	%r13,%r10
1617	movq	%r14,%r11
1618	movq	%r15,%r12
1619	andq	$3,%r12
1620	movq	%r15,%r13
1621	andq	$-4,%r13
1622	movq	%r9,%r14
1623	shrdq	$2,%r9,%r15
1624	shrq	$2,%r9
1625	addq	%r13,%r15
1626	adcq	%r14,%r9
1627	addq	%r15,%r10
1628	adcq	%r9,%r11
1629	adcq	$0,%r12
1630
1631	addq	$16,%r8
1632	cmpq	%rcx,%r8
1633	jb	.Lopen_sse_tail_256_hash
1634	paddd	.Lchacha20_consts(%rip),%xmm3
1635	paddd	0+48(%rbp),%xmm7
1636	paddd	0+64(%rbp),%xmm11
1637	paddd	0+144(%rbp),%xmm15
1638	paddd	.Lchacha20_consts(%rip),%xmm2
1639	paddd	0+48(%rbp),%xmm6
1640	paddd	0+64(%rbp),%xmm10
1641	paddd	0+128(%rbp),%xmm14
1642	paddd	.Lchacha20_consts(%rip),%xmm1
1643	paddd	0+48(%rbp),%xmm5
1644	paddd	0+64(%rbp),%xmm9
1645	paddd	0+112(%rbp),%xmm13
1646	paddd	.Lchacha20_consts(%rip),%xmm0
1647	paddd	0+48(%rbp),%xmm4
1648	paddd	0+64(%rbp),%xmm8
1649	paddd	0+96(%rbp),%xmm12
1650	movdqa	%xmm12,0+80(%rbp)
1651	movdqu	0 + 0(%rsi),%xmm12
1652	pxor	%xmm3,%xmm12
1653	movdqu	%xmm12,0 + 0(%rdi)
1654	movdqu	16 + 0(%rsi),%xmm12
1655	pxor	%xmm7,%xmm12
1656	movdqu	%xmm12,16 + 0(%rdi)
1657	movdqu	32 + 0(%rsi),%xmm12
1658	pxor	%xmm11,%xmm12
1659	movdqu	%xmm12,32 + 0(%rdi)
1660	movdqu	48 + 0(%rsi),%xmm12
1661	pxor	%xmm15,%xmm12
1662	movdqu	%xmm12,48 + 0(%rdi)
1663	movdqu	0 + 64(%rsi),%xmm3
1664	movdqu	16 + 64(%rsi),%xmm7
1665	movdqu	32 + 64(%rsi),%xmm11
1666	movdqu	48 + 64(%rsi),%xmm15
1667	pxor	%xmm3,%xmm2
1668	pxor	%xmm7,%xmm6
1669	pxor	%xmm11,%xmm10
1670	pxor	%xmm14,%xmm15
1671	movdqu	%xmm2,0 + 64(%rdi)
1672	movdqu	%xmm6,16 + 64(%rdi)
1673	movdqu	%xmm10,32 + 64(%rdi)
1674	movdqu	%xmm15,48 + 64(%rdi)
1675	movdqu	0 + 128(%rsi),%xmm3
1676	movdqu	16 + 128(%rsi),%xmm7
1677	movdqu	32 + 128(%rsi),%xmm11
1678	movdqu	48 + 128(%rsi),%xmm15
1679	pxor	%xmm3,%xmm1
1680	pxor	%xmm7,%xmm5
1681	pxor	%xmm11,%xmm9
1682	pxor	%xmm13,%xmm15
1683	movdqu	%xmm1,0 + 128(%rdi)
1684	movdqu	%xmm5,16 + 128(%rdi)
1685	movdqu	%xmm9,32 + 128(%rdi)
1686	movdqu	%xmm15,48 + 128(%rdi)
1687
1688	movdqa	0+80(%rbp),%xmm12
1689	subq	$192,%rbx
1690	leaq	192(%rsi),%rsi
1691	leaq	192(%rdi),%rdi
1692
1693
1694.Lopen_sse_tail_64_dec_loop:
1695	cmpq	$16,%rbx
1696	jb	.Lopen_sse_tail_16_init
1697	subq	$16,%rbx
1698	movdqu	(%rsi),%xmm3
1699	pxor	%xmm3,%xmm0
1700	movdqu	%xmm0,(%rdi)
1701	leaq	16(%rsi),%rsi
1702	leaq	16(%rdi),%rdi
1703	movdqa	%xmm4,%xmm0
1704	movdqa	%xmm8,%xmm4
1705	movdqa	%xmm12,%xmm8
1706	jmp	.Lopen_sse_tail_64_dec_loop
1707.Lopen_sse_tail_16_init:
1708	movdqa	%xmm0,%xmm1
1709
1710
1711.Lopen_sse_tail_16:
1712	testq	%rbx,%rbx
1713	jz	.Lopen_sse_finalize
1714
1715
1716
1717	pxor	%xmm3,%xmm3
1718	leaq	-1(%rsi,%rbx,1),%rsi
1719	movq	%rbx,%r8
1720.Lopen_sse_tail_16_compose:
1721	pslldq	$1,%xmm3
1722	pinsrb	$0,(%rsi),%xmm3
1723	subq	$1,%rsi
1724	subq	$1,%r8
1725	jnz	.Lopen_sse_tail_16_compose
1726
1727.byte	102,73,15,126,221
1728	pextrq	$1,%xmm3,%r14
1729
1730	pxor	%xmm1,%xmm3
1731
1732
1733.Lopen_sse_tail_16_extract:
1734	pextrb	$0,%xmm3,(%rdi)
1735	psrldq	$1,%xmm3
1736	addq	$1,%rdi
1737	subq	$1,%rbx
1738	jne	.Lopen_sse_tail_16_extract
1739
1740	addq	%r13,%r10
1741	adcq	%r14,%r11
1742	adcq	$1,%r12
1743	movq	0+0+0(%rbp),%rax
1744	movq	%rax,%r15
1745	mulq	%r10
1746	movq	%rax,%r13
1747	movq	%rdx,%r14
1748	movq	0+0+0(%rbp),%rax
1749	mulq	%r11
1750	imulq	%r12,%r15
1751	addq	%rax,%r14
1752	adcq	%rdx,%r15
1753	movq	8+0+0(%rbp),%rax
1754	movq	%rax,%r9
1755	mulq	%r10
1756	addq	%rax,%r14
1757	adcq	$0,%rdx
1758	movq	%rdx,%r10
1759	movq	8+0+0(%rbp),%rax
1760	mulq	%r11
1761	addq	%rax,%r15
1762	adcq	$0,%rdx
1763	imulq	%r12,%r9
1764	addq	%r10,%r15
1765	adcq	%rdx,%r9
1766	movq	%r13,%r10
1767	movq	%r14,%r11
1768	movq	%r15,%r12
1769	andq	$3,%r12
1770	movq	%r15,%r13
1771	andq	$-4,%r13
1772	movq	%r9,%r14
1773	shrdq	$2,%r9,%r15
1774	shrq	$2,%r9
1775	addq	%r13,%r15
1776	adcq	%r14,%r9
1777	addq	%r15,%r10
1778	adcq	%r9,%r11
1779	adcq	$0,%r12
1780
1781
1782.Lopen_sse_finalize:
1783	addq	0+0+32(%rbp),%r10
1784	adcq	8+0+32(%rbp),%r11
1785	adcq	$1,%r12
1786	movq	0+0+0(%rbp),%rax
1787	movq	%rax,%r15
1788	mulq	%r10
1789	movq	%rax,%r13
1790	movq	%rdx,%r14
1791	movq	0+0+0(%rbp),%rax
1792	mulq	%r11
1793	imulq	%r12,%r15
1794	addq	%rax,%r14
1795	adcq	%rdx,%r15
1796	movq	8+0+0(%rbp),%rax
1797	movq	%rax,%r9
1798	mulq	%r10
1799	addq	%rax,%r14
1800	adcq	$0,%rdx
1801	movq	%rdx,%r10
1802	movq	8+0+0(%rbp),%rax
1803	mulq	%r11
1804	addq	%rax,%r15
1805	adcq	$0,%rdx
1806	imulq	%r12,%r9
1807	addq	%r10,%r15
1808	adcq	%rdx,%r9
1809	movq	%r13,%r10
1810	movq	%r14,%r11
1811	movq	%r15,%r12
1812	andq	$3,%r12
1813	movq	%r15,%r13
1814	andq	$-4,%r13
1815	movq	%r9,%r14
1816	shrdq	$2,%r9,%r15
1817	shrq	$2,%r9
1818	addq	%r13,%r15
1819	adcq	%r14,%r9
1820	addq	%r15,%r10
1821	adcq	%r9,%r11
1822	adcq	$0,%r12
1823
1824
1825	movq	%r10,%r13
1826	movq	%r11,%r14
1827	movq	%r12,%r15
1828	subq	$-5,%r10
1829	sbbq	$-1,%r11
1830	sbbq	$3,%r12
1831	cmovcq	%r13,%r10
1832	cmovcq	%r14,%r11
1833	cmovcq	%r15,%r12
1834
1835	addq	0+0+16(%rbp),%r10
1836	adcq	8+0+16(%rbp),%r11
1837
1838.cfi_remember_state
1839	addq	$288 + 0 + 32,%rsp
1840.cfi_adjust_cfa_offset	-(288 + 32)
1841
1842	popq	%r9
1843.cfi_adjust_cfa_offset	-8
1844.cfi_restore	%r9
1845	movq	%r10,(%r9)
1846	movq	%r11,8(%r9)
1847	popq	%r15
1848.cfi_adjust_cfa_offset	-8
1849.cfi_restore	%r15
1850	popq	%r14
1851.cfi_adjust_cfa_offset	-8
1852.cfi_restore	%r14
1853	popq	%r13
1854.cfi_adjust_cfa_offset	-8
1855.cfi_restore	%r13
1856	popq	%r12
1857.cfi_adjust_cfa_offset	-8
1858.cfi_restore	%r12
1859	popq	%rbx
1860.cfi_adjust_cfa_offset	-8
1861.cfi_restore	%rbx
1862	popq	%rbp
1863.cfi_adjust_cfa_offset	-8
1864.cfi_restore	%rbp
1865	ret
1866
1867.Lopen_sse_128:
1868.cfi_restore_state
1869	movdqu	.Lchacha20_consts(%rip),%xmm0
1870	movdqa	%xmm0,%xmm1
1871	movdqa	%xmm0,%xmm2
1872	movdqu	0(%r9),%xmm4
1873	movdqa	%xmm4,%xmm5
1874	movdqa	%xmm4,%xmm6
1875	movdqu	16(%r9),%xmm8
1876	movdqa	%xmm8,%xmm9
1877	movdqa	%xmm8,%xmm10
1878	movdqu	32(%r9),%xmm12
1879	movdqa	%xmm12,%xmm13
1880	paddd	.Lsse_inc(%rip),%xmm13
1881	movdqa	%xmm13,%xmm14
1882	paddd	.Lsse_inc(%rip),%xmm14
1883	movdqa	%xmm4,%xmm7
1884	movdqa	%xmm8,%xmm11
1885	movdqa	%xmm13,%xmm15
1886	movq	$10,%r10
1887
1888.Lopen_sse_128_rounds:
1889	paddd	%xmm4,%xmm0
1890	pxor	%xmm0,%xmm12
1891	pshufb	.Lrol16(%rip),%xmm12
1892	paddd	%xmm12,%xmm8
1893	pxor	%xmm8,%xmm4
1894	movdqa	%xmm4,%xmm3
1895	pslld	$12,%xmm3
1896	psrld	$20,%xmm4
1897	pxor	%xmm3,%xmm4
1898	paddd	%xmm4,%xmm0
1899	pxor	%xmm0,%xmm12
1900	pshufb	.Lrol8(%rip),%xmm12
1901	paddd	%xmm12,%xmm8
1902	pxor	%xmm8,%xmm4
1903	movdqa	%xmm4,%xmm3
1904	pslld	$7,%xmm3
1905	psrld	$25,%xmm4
1906	pxor	%xmm3,%xmm4
1907.byte	102,15,58,15,228,4
1908.byte	102,69,15,58,15,192,8
1909.byte	102,69,15,58,15,228,12
1910	paddd	%xmm5,%xmm1
1911	pxor	%xmm1,%xmm13
1912	pshufb	.Lrol16(%rip),%xmm13
1913	paddd	%xmm13,%xmm9
1914	pxor	%xmm9,%xmm5
1915	movdqa	%xmm5,%xmm3
1916	pslld	$12,%xmm3
1917	psrld	$20,%xmm5
1918	pxor	%xmm3,%xmm5
1919	paddd	%xmm5,%xmm1
1920	pxor	%xmm1,%xmm13
1921	pshufb	.Lrol8(%rip),%xmm13
1922	paddd	%xmm13,%xmm9
1923	pxor	%xmm9,%xmm5
1924	movdqa	%xmm5,%xmm3
1925	pslld	$7,%xmm3
1926	psrld	$25,%xmm5
1927	pxor	%xmm3,%xmm5
1928.byte	102,15,58,15,237,4
1929.byte	102,69,15,58,15,201,8
1930.byte	102,69,15,58,15,237,12
1931	paddd	%xmm6,%xmm2
1932	pxor	%xmm2,%xmm14
1933	pshufb	.Lrol16(%rip),%xmm14
1934	paddd	%xmm14,%xmm10
1935	pxor	%xmm10,%xmm6
1936	movdqa	%xmm6,%xmm3
1937	pslld	$12,%xmm3
1938	psrld	$20,%xmm6
1939	pxor	%xmm3,%xmm6
1940	paddd	%xmm6,%xmm2
1941	pxor	%xmm2,%xmm14
1942	pshufb	.Lrol8(%rip),%xmm14
1943	paddd	%xmm14,%xmm10
1944	pxor	%xmm10,%xmm6
1945	movdqa	%xmm6,%xmm3
1946	pslld	$7,%xmm3
1947	psrld	$25,%xmm6
1948	pxor	%xmm3,%xmm6
1949.byte	102,15,58,15,246,4
1950.byte	102,69,15,58,15,210,8
1951.byte	102,69,15,58,15,246,12
1952	paddd	%xmm4,%xmm0
1953	pxor	%xmm0,%xmm12
1954	pshufb	.Lrol16(%rip),%xmm12
1955	paddd	%xmm12,%xmm8
1956	pxor	%xmm8,%xmm4
1957	movdqa	%xmm4,%xmm3
1958	pslld	$12,%xmm3
1959	psrld	$20,%xmm4
1960	pxor	%xmm3,%xmm4
1961	paddd	%xmm4,%xmm0
1962	pxor	%xmm0,%xmm12
1963	pshufb	.Lrol8(%rip),%xmm12
1964	paddd	%xmm12,%xmm8
1965	pxor	%xmm8,%xmm4
1966	movdqa	%xmm4,%xmm3
1967	pslld	$7,%xmm3
1968	psrld	$25,%xmm4
1969	pxor	%xmm3,%xmm4
1970.byte	102,15,58,15,228,12
1971.byte	102,69,15,58,15,192,8
1972.byte	102,69,15,58,15,228,4
1973	paddd	%xmm5,%xmm1
1974	pxor	%xmm1,%xmm13
1975	pshufb	.Lrol16(%rip),%xmm13
1976	paddd	%xmm13,%xmm9
1977	pxor	%xmm9,%xmm5
1978	movdqa	%xmm5,%xmm3
1979	pslld	$12,%xmm3
1980	psrld	$20,%xmm5
1981	pxor	%xmm3,%xmm5
1982	paddd	%xmm5,%xmm1
1983	pxor	%xmm1,%xmm13
1984	pshufb	.Lrol8(%rip),%xmm13
1985	paddd	%xmm13,%xmm9
1986	pxor	%xmm9,%xmm5
1987	movdqa	%xmm5,%xmm3
1988	pslld	$7,%xmm3
1989	psrld	$25,%xmm5
1990	pxor	%xmm3,%xmm5
1991.byte	102,15,58,15,237,12
1992.byte	102,69,15,58,15,201,8
1993.byte	102,69,15,58,15,237,4
1994	paddd	%xmm6,%xmm2
1995	pxor	%xmm2,%xmm14
1996	pshufb	.Lrol16(%rip),%xmm14
1997	paddd	%xmm14,%xmm10
1998	pxor	%xmm10,%xmm6
1999	movdqa	%xmm6,%xmm3
2000	pslld	$12,%xmm3
2001	psrld	$20,%xmm6
2002	pxor	%xmm3,%xmm6
2003	paddd	%xmm6,%xmm2
2004	pxor	%xmm2,%xmm14
2005	pshufb	.Lrol8(%rip),%xmm14
2006	paddd	%xmm14,%xmm10
2007	pxor	%xmm10,%xmm6
2008	movdqa	%xmm6,%xmm3
2009	pslld	$7,%xmm3
2010	psrld	$25,%xmm6
2011	pxor	%xmm3,%xmm6
2012.byte	102,15,58,15,246,12
2013.byte	102,69,15,58,15,210,8
2014.byte	102,69,15,58,15,246,4
2015
2016	decq	%r10
2017	jnz	.Lopen_sse_128_rounds
2018	paddd	.Lchacha20_consts(%rip),%xmm0
2019	paddd	.Lchacha20_consts(%rip),%xmm1
2020	paddd	.Lchacha20_consts(%rip),%xmm2
2021	paddd	%xmm7,%xmm4
2022	paddd	%xmm7,%xmm5
2023	paddd	%xmm7,%xmm6
2024	paddd	%xmm11,%xmm9
2025	paddd	%xmm11,%xmm10
2026	paddd	%xmm15,%xmm13
2027	paddd	.Lsse_inc(%rip),%xmm15
2028	paddd	%xmm15,%xmm14
2029
2030	pand	.Lclamp(%rip),%xmm0
2031	movdqa	%xmm0,0+0(%rbp)
2032	movdqa	%xmm4,0+16(%rbp)
2033
2034	movq	%r8,%r8
2035	call	poly_hash_ad_internal
2036.Lopen_sse_128_xor_hash:
2037	cmpq	$16,%rbx
2038	jb	.Lopen_sse_tail_16
2039	subq	$16,%rbx
2040	addq	0+0(%rsi),%r10
2041	adcq	8+0(%rsi),%r11
2042	adcq	$1,%r12
2043
2044
2045	movdqu	0(%rsi),%xmm3
2046	pxor	%xmm3,%xmm1
2047	movdqu	%xmm1,0(%rdi)
2048	leaq	16(%rsi),%rsi
2049	leaq	16(%rdi),%rdi
2050	movq	0+0+0(%rbp),%rax
2051	movq	%rax,%r15
2052	mulq	%r10
2053	movq	%rax,%r13
2054	movq	%rdx,%r14
2055	movq	0+0+0(%rbp),%rax
2056	mulq	%r11
2057	imulq	%r12,%r15
2058	addq	%rax,%r14
2059	adcq	%rdx,%r15
2060	movq	8+0+0(%rbp),%rax
2061	movq	%rax,%r9
2062	mulq	%r10
2063	addq	%rax,%r14
2064	adcq	$0,%rdx
2065	movq	%rdx,%r10
2066	movq	8+0+0(%rbp),%rax
2067	mulq	%r11
2068	addq	%rax,%r15
2069	adcq	$0,%rdx
2070	imulq	%r12,%r9
2071	addq	%r10,%r15
2072	adcq	%rdx,%r9
2073	movq	%r13,%r10
2074	movq	%r14,%r11
2075	movq	%r15,%r12
2076	andq	$3,%r12
2077	movq	%r15,%r13
2078	andq	$-4,%r13
2079	movq	%r9,%r14
2080	shrdq	$2,%r9,%r15
2081	shrq	$2,%r9
2082	addq	%r13,%r15
2083	adcq	%r14,%r9
2084	addq	%r15,%r10
2085	adcq	%r9,%r11
2086	adcq	$0,%r12
2087
2088
2089	movdqa	%xmm5,%xmm1
2090	movdqa	%xmm9,%xmm5
2091	movdqa	%xmm13,%xmm9
2092	movdqa	%xmm2,%xmm13
2093	movdqa	%xmm6,%xmm2
2094	movdqa	%xmm10,%xmm6
2095	movdqa	%xmm14,%xmm10
2096	jmp	.Lopen_sse_128_xor_hash
2097.size	chacha20_poly1305_open, .-chacha20_poly1305_open
2098.cfi_endproc
2099
2100
2101
2102
2103
2104
2105
2106.globl	chacha20_poly1305_seal
2107.hidden chacha20_poly1305_seal
2108.type	chacha20_poly1305_seal,@function
2109.align	64
2110chacha20_poly1305_seal:
2111.cfi_startproc
2112_CET_ENDBR
2113	pushq	%rbp
2114.cfi_adjust_cfa_offset	8
2115.cfi_offset	%rbp,-16
2116	pushq	%rbx
2117.cfi_adjust_cfa_offset	8
2118.cfi_offset	%rbx,-24
2119	pushq	%r12
2120.cfi_adjust_cfa_offset	8
2121.cfi_offset	%r12,-32
2122	pushq	%r13
2123.cfi_adjust_cfa_offset	8
2124.cfi_offset	%r13,-40
2125	pushq	%r14
2126.cfi_adjust_cfa_offset	8
2127.cfi_offset	%r14,-48
2128	pushq	%r15
2129.cfi_adjust_cfa_offset	8
2130.cfi_offset	%r15,-56
2131
2132
2133	pushq	%r9
2134.cfi_adjust_cfa_offset	8
2135.cfi_offset	%r9,-64
2136	subq	$288 + 0 + 32,%rsp
2137.cfi_adjust_cfa_offset	288 + 32
2138	leaq	32(%rsp),%rbp
2139	andq	$-32,%rbp
2140
2141	movq	56(%r9),%rbx
2142	addq	%rdx,%rbx
2143	movq	%r8,0+0+32(%rbp)
2144	movq	%rbx,8+0+32(%rbp)
2145	movq	%rdx,%rbx
2146
2147	movl	OPENSSL_ia32cap_P+8(%rip),%eax
2148	andl	$288,%eax
2149	xorl	$288,%eax
2150	jz	chacha20_poly1305_seal_avx2
2151
2152	cmpq	$128,%rbx
2153	jbe	.Lseal_sse_128
2154
2155	movdqa	.Lchacha20_consts(%rip),%xmm0
2156	movdqu	0(%r9),%xmm4
2157	movdqu	16(%r9),%xmm8
2158	movdqu	32(%r9),%xmm12
2159
2160	movdqa	%xmm0,%xmm1
2161	movdqa	%xmm0,%xmm2
2162	movdqa	%xmm0,%xmm3
2163	movdqa	%xmm4,%xmm5
2164	movdqa	%xmm4,%xmm6
2165	movdqa	%xmm4,%xmm7
2166	movdqa	%xmm8,%xmm9
2167	movdqa	%xmm8,%xmm10
2168	movdqa	%xmm8,%xmm11
2169	movdqa	%xmm12,%xmm15
2170	paddd	.Lsse_inc(%rip),%xmm12
2171	movdqa	%xmm12,%xmm14
2172	paddd	.Lsse_inc(%rip),%xmm12
2173	movdqa	%xmm12,%xmm13
2174	paddd	.Lsse_inc(%rip),%xmm12
2175
2176	movdqa	%xmm4,0+48(%rbp)
2177	movdqa	%xmm8,0+64(%rbp)
2178	movdqa	%xmm12,0+96(%rbp)
2179	movdqa	%xmm13,0+112(%rbp)
2180	movdqa	%xmm14,0+128(%rbp)
2181	movdqa	%xmm15,0+144(%rbp)
2182	movq	$10,%r10
2183.Lseal_sse_init_rounds:
2184	movdqa	%xmm8,0+80(%rbp)
2185	movdqa	.Lrol16(%rip),%xmm8
2186	paddd	%xmm7,%xmm3
2187	paddd	%xmm6,%xmm2
2188	paddd	%xmm5,%xmm1
2189	paddd	%xmm4,%xmm0
2190	pxor	%xmm3,%xmm15
2191	pxor	%xmm2,%xmm14
2192	pxor	%xmm1,%xmm13
2193	pxor	%xmm0,%xmm12
2194.byte	102,69,15,56,0,248
2195.byte	102,69,15,56,0,240
2196.byte	102,69,15,56,0,232
2197.byte	102,69,15,56,0,224
2198	movdqa	0+80(%rbp),%xmm8
2199	paddd	%xmm15,%xmm11
2200	paddd	%xmm14,%xmm10
2201	paddd	%xmm13,%xmm9
2202	paddd	%xmm12,%xmm8
2203	pxor	%xmm11,%xmm7
2204	pxor	%xmm10,%xmm6
2205	pxor	%xmm9,%xmm5
2206	pxor	%xmm8,%xmm4
2207	movdqa	%xmm8,0+80(%rbp)
2208	movdqa	%xmm7,%xmm8
2209	psrld	$20,%xmm8
2210	pslld	$32-20,%xmm7
2211	pxor	%xmm8,%xmm7
2212	movdqa	%xmm6,%xmm8
2213	psrld	$20,%xmm8
2214	pslld	$32-20,%xmm6
2215	pxor	%xmm8,%xmm6
2216	movdqa	%xmm5,%xmm8
2217	psrld	$20,%xmm8
2218	pslld	$32-20,%xmm5
2219	pxor	%xmm8,%xmm5
2220	movdqa	%xmm4,%xmm8
2221	psrld	$20,%xmm8
2222	pslld	$32-20,%xmm4
2223	pxor	%xmm8,%xmm4
2224	movdqa	.Lrol8(%rip),%xmm8
2225	paddd	%xmm7,%xmm3
2226	paddd	%xmm6,%xmm2
2227	paddd	%xmm5,%xmm1
2228	paddd	%xmm4,%xmm0
2229	pxor	%xmm3,%xmm15
2230	pxor	%xmm2,%xmm14
2231	pxor	%xmm1,%xmm13
2232	pxor	%xmm0,%xmm12
2233.byte	102,69,15,56,0,248
2234.byte	102,69,15,56,0,240
2235.byte	102,69,15,56,0,232
2236.byte	102,69,15,56,0,224
2237	movdqa	0+80(%rbp),%xmm8
2238	paddd	%xmm15,%xmm11
2239	paddd	%xmm14,%xmm10
2240	paddd	%xmm13,%xmm9
2241	paddd	%xmm12,%xmm8
2242	pxor	%xmm11,%xmm7
2243	pxor	%xmm10,%xmm6
2244	pxor	%xmm9,%xmm5
2245	pxor	%xmm8,%xmm4
2246	movdqa	%xmm8,0+80(%rbp)
2247	movdqa	%xmm7,%xmm8
2248	psrld	$25,%xmm8
2249	pslld	$32-25,%xmm7
2250	pxor	%xmm8,%xmm7
2251	movdqa	%xmm6,%xmm8
2252	psrld	$25,%xmm8
2253	pslld	$32-25,%xmm6
2254	pxor	%xmm8,%xmm6
2255	movdqa	%xmm5,%xmm8
2256	psrld	$25,%xmm8
2257	pslld	$32-25,%xmm5
2258	pxor	%xmm8,%xmm5
2259	movdqa	%xmm4,%xmm8
2260	psrld	$25,%xmm8
2261	pslld	$32-25,%xmm4
2262	pxor	%xmm8,%xmm4
2263	movdqa	0+80(%rbp),%xmm8
2264.byte	102,15,58,15,255,4
2265.byte	102,69,15,58,15,219,8
2266.byte	102,69,15,58,15,255,12
2267.byte	102,15,58,15,246,4
2268.byte	102,69,15,58,15,210,8
2269.byte	102,69,15,58,15,246,12
2270.byte	102,15,58,15,237,4
2271.byte	102,69,15,58,15,201,8
2272.byte	102,69,15,58,15,237,12
2273.byte	102,15,58,15,228,4
2274.byte	102,69,15,58,15,192,8
2275.byte	102,69,15,58,15,228,12
2276	movdqa	%xmm8,0+80(%rbp)
2277	movdqa	.Lrol16(%rip),%xmm8
2278	paddd	%xmm7,%xmm3
2279	paddd	%xmm6,%xmm2
2280	paddd	%xmm5,%xmm1
2281	paddd	%xmm4,%xmm0
2282	pxor	%xmm3,%xmm15
2283	pxor	%xmm2,%xmm14
2284	pxor	%xmm1,%xmm13
2285	pxor	%xmm0,%xmm12
2286.byte	102,69,15,56,0,248
2287.byte	102,69,15,56,0,240
2288.byte	102,69,15,56,0,232
2289.byte	102,69,15,56,0,224
2290	movdqa	0+80(%rbp),%xmm8
2291	paddd	%xmm15,%xmm11
2292	paddd	%xmm14,%xmm10
2293	paddd	%xmm13,%xmm9
2294	paddd	%xmm12,%xmm8
2295	pxor	%xmm11,%xmm7
2296	pxor	%xmm10,%xmm6
2297	pxor	%xmm9,%xmm5
2298	pxor	%xmm8,%xmm4
2299	movdqa	%xmm8,0+80(%rbp)
2300	movdqa	%xmm7,%xmm8
2301	psrld	$20,%xmm8
2302	pslld	$32-20,%xmm7
2303	pxor	%xmm8,%xmm7
2304	movdqa	%xmm6,%xmm8
2305	psrld	$20,%xmm8
2306	pslld	$32-20,%xmm6
2307	pxor	%xmm8,%xmm6
2308	movdqa	%xmm5,%xmm8
2309	psrld	$20,%xmm8
2310	pslld	$32-20,%xmm5
2311	pxor	%xmm8,%xmm5
2312	movdqa	%xmm4,%xmm8
2313	psrld	$20,%xmm8
2314	pslld	$32-20,%xmm4
2315	pxor	%xmm8,%xmm4
2316	movdqa	.Lrol8(%rip),%xmm8
2317	paddd	%xmm7,%xmm3
2318	paddd	%xmm6,%xmm2
2319	paddd	%xmm5,%xmm1
2320	paddd	%xmm4,%xmm0
2321	pxor	%xmm3,%xmm15
2322	pxor	%xmm2,%xmm14
2323	pxor	%xmm1,%xmm13
2324	pxor	%xmm0,%xmm12
2325.byte	102,69,15,56,0,248
2326.byte	102,69,15,56,0,240
2327.byte	102,69,15,56,0,232
2328.byte	102,69,15,56,0,224
2329	movdqa	0+80(%rbp),%xmm8
2330	paddd	%xmm15,%xmm11
2331	paddd	%xmm14,%xmm10
2332	paddd	%xmm13,%xmm9
2333	paddd	%xmm12,%xmm8
2334	pxor	%xmm11,%xmm7
2335	pxor	%xmm10,%xmm6
2336	pxor	%xmm9,%xmm5
2337	pxor	%xmm8,%xmm4
2338	movdqa	%xmm8,0+80(%rbp)
2339	movdqa	%xmm7,%xmm8
2340	psrld	$25,%xmm8
2341	pslld	$32-25,%xmm7
2342	pxor	%xmm8,%xmm7
2343	movdqa	%xmm6,%xmm8
2344	psrld	$25,%xmm8
2345	pslld	$32-25,%xmm6
2346	pxor	%xmm8,%xmm6
2347	movdqa	%xmm5,%xmm8
2348	psrld	$25,%xmm8
2349	pslld	$32-25,%xmm5
2350	pxor	%xmm8,%xmm5
2351	movdqa	%xmm4,%xmm8
2352	psrld	$25,%xmm8
2353	pslld	$32-25,%xmm4
2354	pxor	%xmm8,%xmm4
2355	movdqa	0+80(%rbp),%xmm8
2356.byte	102,15,58,15,255,12
2357.byte	102,69,15,58,15,219,8
2358.byte	102,69,15,58,15,255,4
2359.byte	102,15,58,15,246,12
2360.byte	102,69,15,58,15,210,8
2361.byte	102,69,15,58,15,246,4
2362.byte	102,15,58,15,237,12
2363.byte	102,69,15,58,15,201,8
2364.byte	102,69,15,58,15,237,4
2365.byte	102,15,58,15,228,12
2366.byte	102,69,15,58,15,192,8
2367.byte	102,69,15,58,15,228,4
2368
2369	decq	%r10
2370	jnz	.Lseal_sse_init_rounds
2371	paddd	.Lchacha20_consts(%rip),%xmm3
2372	paddd	0+48(%rbp),%xmm7
2373	paddd	0+64(%rbp),%xmm11
2374	paddd	0+144(%rbp),%xmm15
2375	paddd	.Lchacha20_consts(%rip),%xmm2
2376	paddd	0+48(%rbp),%xmm6
2377	paddd	0+64(%rbp),%xmm10
2378	paddd	0+128(%rbp),%xmm14
2379	paddd	.Lchacha20_consts(%rip),%xmm1
2380	paddd	0+48(%rbp),%xmm5
2381	paddd	0+64(%rbp),%xmm9
2382	paddd	0+112(%rbp),%xmm13
2383	paddd	.Lchacha20_consts(%rip),%xmm0
2384	paddd	0+48(%rbp),%xmm4
2385	paddd	0+64(%rbp),%xmm8
2386	paddd	0+96(%rbp),%xmm12
2387
2388
2389	pand	.Lclamp(%rip),%xmm3
2390	movdqa	%xmm3,0+0(%rbp)
2391	movdqa	%xmm7,0+16(%rbp)
2392
2393	movq	%r8,%r8
2394	call	poly_hash_ad_internal
2395	movdqu	0 + 0(%rsi),%xmm3
2396	movdqu	16 + 0(%rsi),%xmm7
2397	movdqu	32 + 0(%rsi),%xmm11
2398	movdqu	48 + 0(%rsi),%xmm15
2399	pxor	%xmm3,%xmm2
2400	pxor	%xmm7,%xmm6
2401	pxor	%xmm11,%xmm10
2402	pxor	%xmm14,%xmm15
2403	movdqu	%xmm2,0 + 0(%rdi)
2404	movdqu	%xmm6,16 + 0(%rdi)
2405	movdqu	%xmm10,32 + 0(%rdi)
2406	movdqu	%xmm15,48 + 0(%rdi)
2407	movdqu	0 + 64(%rsi),%xmm3
2408	movdqu	16 + 64(%rsi),%xmm7
2409	movdqu	32 + 64(%rsi),%xmm11
2410	movdqu	48 + 64(%rsi),%xmm15
2411	pxor	%xmm3,%xmm1
2412	pxor	%xmm7,%xmm5
2413	pxor	%xmm11,%xmm9
2414	pxor	%xmm13,%xmm15
2415	movdqu	%xmm1,0 + 64(%rdi)
2416	movdqu	%xmm5,16 + 64(%rdi)
2417	movdqu	%xmm9,32 + 64(%rdi)
2418	movdqu	%xmm15,48 + 64(%rdi)
2419
2420	cmpq	$192,%rbx
2421	ja	.Lseal_sse_main_init
2422	movq	$128,%rcx
2423	subq	$128,%rbx
2424	leaq	128(%rsi),%rsi
2425	jmp	.Lseal_sse_128_tail_hash
2426.Lseal_sse_main_init:
2427	movdqu	0 + 128(%rsi),%xmm3
2428	movdqu	16 + 128(%rsi),%xmm7
2429	movdqu	32 + 128(%rsi),%xmm11
2430	movdqu	48 + 128(%rsi),%xmm15
2431	pxor	%xmm3,%xmm0
2432	pxor	%xmm7,%xmm4
2433	pxor	%xmm11,%xmm8
2434	pxor	%xmm12,%xmm15
2435	movdqu	%xmm0,0 + 128(%rdi)
2436	movdqu	%xmm4,16 + 128(%rdi)
2437	movdqu	%xmm8,32 + 128(%rdi)
2438	movdqu	%xmm15,48 + 128(%rdi)
2439
2440	movq	$192,%rcx
2441	subq	$192,%rbx
2442	leaq	192(%rsi),%rsi
2443	movq	$2,%rcx
2444	movq	$8,%r8
2445	cmpq	$64,%rbx
2446	jbe	.Lseal_sse_tail_64
2447	cmpq	$128,%rbx
2448	jbe	.Lseal_sse_tail_128
2449	cmpq	$192,%rbx
2450	jbe	.Lseal_sse_tail_192
2451
2452.Lseal_sse_main_loop:
2453	movdqa	.Lchacha20_consts(%rip),%xmm0
2454	movdqa	0+48(%rbp),%xmm4
2455	movdqa	0+64(%rbp),%xmm8
2456	movdqa	%xmm0,%xmm1
2457	movdqa	%xmm4,%xmm5
2458	movdqa	%xmm8,%xmm9
2459	movdqa	%xmm0,%xmm2
2460	movdqa	%xmm4,%xmm6
2461	movdqa	%xmm8,%xmm10
2462	movdqa	%xmm0,%xmm3
2463	movdqa	%xmm4,%xmm7
2464	movdqa	%xmm8,%xmm11
2465	movdqa	0+96(%rbp),%xmm15
2466	paddd	.Lsse_inc(%rip),%xmm15
2467	movdqa	%xmm15,%xmm14
2468	paddd	.Lsse_inc(%rip),%xmm14
2469	movdqa	%xmm14,%xmm13
2470	paddd	.Lsse_inc(%rip),%xmm13
2471	movdqa	%xmm13,%xmm12
2472	paddd	.Lsse_inc(%rip),%xmm12
2473	movdqa	%xmm12,0+96(%rbp)
2474	movdqa	%xmm13,0+112(%rbp)
2475	movdqa	%xmm14,0+128(%rbp)
2476	movdqa	%xmm15,0+144(%rbp)
2477
2478.align	32
2479.Lseal_sse_main_rounds:
2480	movdqa	%xmm8,0+80(%rbp)
2481	movdqa	.Lrol16(%rip),%xmm8
2482	paddd	%xmm7,%xmm3
2483	paddd	%xmm6,%xmm2
2484	paddd	%xmm5,%xmm1
2485	paddd	%xmm4,%xmm0
2486	pxor	%xmm3,%xmm15
2487	pxor	%xmm2,%xmm14
2488	pxor	%xmm1,%xmm13
2489	pxor	%xmm0,%xmm12
2490.byte	102,69,15,56,0,248
2491.byte	102,69,15,56,0,240
2492.byte	102,69,15,56,0,232
2493.byte	102,69,15,56,0,224
2494	movdqa	0+80(%rbp),%xmm8
2495	paddd	%xmm15,%xmm11
2496	paddd	%xmm14,%xmm10
2497	paddd	%xmm13,%xmm9
2498	paddd	%xmm12,%xmm8
2499	pxor	%xmm11,%xmm7
2500	addq	0+0(%rdi),%r10
2501	adcq	8+0(%rdi),%r11
2502	adcq	$1,%r12
2503	pxor	%xmm10,%xmm6
2504	pxor	%xmm9,%xmm5
2505	pxor	%xmm8,%xmm4
2506	movdqa	%xmm8,0+80(%rbp)
2507	movdqa	%xmm7,%xmm8
2508	psrld	$20,%xmm8
2509	pslld	$32-20,%xmm7
2510	pxor	%xmm8,%xmm7
2511	movdqa	%xmm6,%xmm8
2512	psrld	$20,%xmm8
2513	pslld	$32-20,%xmm6
2514	pxor	%xmm8,%xmm6
2515	movdqa	%xmm5,%xmm8
2516	psrld	$20,%xmm8
2517	pslld	$32-20,%xmm5
2518	pxor	%xmm8,%xmm5
2519	movdqa	%xmm4,%xmm8
2520	psrld	$20,%xmm8
2521	pslld	$32-20,%xmm4
2522	pxor	%xmm8,%xmm4
2523	movq	0+0+0(%rbp),%rax
2524	movq	%rax,%r15
2525	mulq	%r10
2526	movq	%rax,%r13
2527	movq	%rdx,%r14
2528	movq	0+0+0(%rbp),%rax
2529	mulq	%r11
2530	imulq	%r12,%r15
2531	addq	%rax,%r14
2532	adcq	%rdx,%r15
2533	movdqa	.Lrol8(%rip),%xmm8
2534	paddd	%xmm7,%xmm3
2535	paddd	%xmm6,%xmm2
2536	paddd	%xmm5,%xmm1
2537	paddd	%xmm4,%xmm0
2538	pxor	%xmm3,%xmm15
2539	pxor	%xmm2,%xmm14
2540	pxor	%xmm1,%xmm13
2541	pxor	%xmm0,%xmm12
2542.byte	102,69,15,56,0,248
2543.byte	102,69,15,56,0,240
2544.byte	102,69,15,56,0,232
2545.byte	102,69,15,56,0,224
2546	movdqa	0+80(%rbp),%xmm8
2547	paddd	%xmm15,%xmm11
2548	paddd	%xmm14,%xmm10
2549	paddd	%xmm13,%xmm9
2550	paddd	%xmm12,%xmm8
2551	pxor	%xmm11,%xmm7
2552	pxor	%xmm10,%xmm6
2553	movq	8+0+0(%rbp),%rax
2554	movq	%rax,%r9
2555	mulq	%r10
2556	addq	%rax,%r14
2557	adcq	$0,%rdx
2558	movq	%rdx,%r10
2559	movq	8+0+0(%rbp),%rax
2560	mulq	%r11
2561	addq	%rax,%r15
2562	adcq	$0,%rdx
2563	pxor	%xmm9,%xmm5
2564	pxor	%xmm8,%xmm4
2565	movdqa	%xmm8,0+80(%rbp)
2566	movdqa	%xmm7,%xmm8
2567	psrld	$25,%xmm8
2568	pslld	$32-25,%xmm7
2569	pxor	%xmm8,%xmm7
2570	movdqa	%xmm6,%xmm8
2571	psrld	$25,%xmm8
2572	pslld	$32-25,%xmm6
2573	pxor	%xmm8,%xmm6
2574	movdqa	%xmm5,%xmm8
2575	psrld	$25,%xmm8
2576	pslld	$32-25,%xmm5
2577	pxor	%xmm8,%xmm5
2578	movdqa	%xmm4,%xmm8
2579	psrld	$25,%xmm8
2580	pslld	$32-25,%xmm4
2581	pxor	%xmm8,%xmm4
2582	movdqa	0+80(%rbp),%xmm8
2583	imulq	%r12,%r9
2584	addq	%r10,%r15
2585	adcq	%rdx,%r9
2586.byte	102,15,58,15,255,4
2587.byte	102,69,15,58,15,219,8
2588.byte	102,69,15,58,15,255,12
2589.byte	102,15,58,15,246,4
2590.byte	102,69,15,58,15,210,8
2591.byte	102,69,15,58,15,246,12
2592.byte	102,15,58,15,237,4
2593.byte	102,69,15,58,15,201,8
2594.byte	102,69,15,58,15,237,12
2595.byte	102,15,58,15,228,4
2596.byte	102,69,15,58,15,192,8
2597.byte	102,69,15,58,15,228,12
2598	movdqa	%xmm8,0+80(%rbp)
2599	movdqa	.Lrol16(%rip),%xmm8
2600	paddd	%xmm7,%xmm3
2601	paddd	%xmm6,%xmm2
2602	paddd	%xmm5,%xmm1
2603	paddd	%xmm4,%xmm0
2604	pxor	%xmm3,%xmm15
2605	pxor	%xmm2,%xmm14
2606	movq	%r13,%r10
2607	movq	%r14,%r11
2608	movq	%r15,%r12
2609	andq	$3,%r12
2610	movq	%r15,%r13
2611	andq	$-4,%r13
2612	movq	%r9,%r14
2613	shrdq	$2,%r9,%r15
2614	shrq	$2,%r9
2615	addq	%r13,%r15
2616	adcq	%r14,%r9
2617	addq	%r15,%r10
2618	adcq	%r9,%r11
2619	adcq	$0,%r12
2620	pxor	%xmm1,%xmm13
2621	pxor	%xmm0,%xmm12
2622.byte	102,69,15,56,0,248
2623.byte	102,69,15,56,0,240
2624.byte	102,69,15,56,0,232
2625.byte	102,69,15,56,0,224
2626	movdqa	0+80(%rbp),%xmm8
2627	paddd	%xmm15,%xmm11
2628	paddd	%xmm14,%xmm10
2629	paddd	%xmm13,%xmm9
2630	paddd	%xmm12,%xmm8
2631	pxor	%xmm11,%xmm7
2632	pxor	%xmm10,%xmm6
2633	pxor	%xmm9,%xmm5
2634	pxor	%xmm8,%xmm4
2635	movdqa	%xmm8,0+80(%rbp)
2636	movdqa	%xmm7,%xmm8
2637	psrld	$20,%xmm8
2638	pslld	$32-20,%xmm7
2639	pxor	%xmm8,%xmm7
2640	movdqa	%xmm6,%xmm8
2641	psrld	$20,%xmm8
2642	pslld	$32-20,%xmm6
2643	pxor	%xmm8,%xmm6
2644	movdqa	%xmm5,%xmm8
2645	psrld	$20,%xmm8
2646	pslld	$32-20,%xmm5
2647	pxor	%xmm8,%xmm5
2648	movdqa	%xmm4,%xmm8
2649	psrld	$20,%xmm8
2650	pslld	$32-20,%xmm4
2651	pxor	%xmm8,%xmm4
2652	movdqa	.Lrol8(%rip),%xmm8
2653	paddd	%xmm7,%xmm3
2654	paddd	%xmm6,%xmm2
2655	paddd	%xmm5,%xmm1
2656	paddd	%xmm4,%xmm0
2657	pxor	%xmm3,%xmm15
2658	pxor	%xmm2,%xmm14
2659	pxor	%xmm1,%xmm13
2660	pxor	%xmm0,%xmm12
2661.byte	102,69,15,56,0,248
2662.byte	102,69,15,56,0,240
2663.byte	102,69,15,56,0,232
2664.byte	102,69,15,56,0,224
2665	movdqa	0+80(%rbp),%xmm8
2666	paddd	%xmm15,%xmm11
2667	paddd	%xmm14,%xmm10
2668	paddd	%xmm13,%xmm9
2669	paddd	%xmm12,%xmm8
2670	pxor	%xmm11,%xmm7
2671	pxor	%xmm10,%xmm6
2672	pxor	%xmm9,%xmm5
2673	pxor	%xmm8,%xmm4
2674	movdqa	%xmm8,0+80(%rbp)
2675	movdqa	%xmm7,%xmm8
2676	psrld	$25,%xmm8
2677	pslld	$32-25,%xmm7
2678	pxor	%xmm8,%xmm7
2679	movdqa	%xmm6,%xmm8
2680	psrld	$25,%xmm8
2681	pslld	$32-25,%xmm6
2682	pxor	%xmm8,%xmm6
2683	movdqa	%xmm5,%xmm8
2684	psrld	$25,%xmm8
2685	pslld	$32-25,%xmm5
2686	pxor	%xmm8,%xmm5
2687	movdqa	%xmm4,%xmm8
2688	psrld	$25,%xmm8
2689	pslld	$32-25,%xmm4
2690	pxor	%xmm8,%xmm4
2691	movdqa	0+80(%rbp),%xmm8
2692.byte	102,15,58,15,255,12
2693.byte	102,69,15,58,15,219,8
2694.byte	102,69,15,58,15,255,4
2695.byte	102,15,58,15,246,12
2696.byte	102,69,15,58,15,210,8
2697.byte	102,69,15,58,15,246,4
2698.byte	102,15,58,15,237,12
2699.byte	102,69,15,58,15,201,8
2700.byte	102,69,15,58,15,237,4
2701.byte	102,15,58,15,228,12
2702.byte	102,69,15,58,15,192,8
2703.byte	102,69,15,58,15,228,4
2704
2705	leaq	16(%rdi),%rdi
2706	decq	%r8
2707	jge	.Lseal_sse_main_rounds
2708	addq	0+0(%rdi),%r10
2709	adcq	8+0(%rdi),%r11
2710	adcq	$1,%r12
2711	movq	0+0+0(%rbp),%rax
2712	movq	%rax,%r15
2713	mulq	%r10
2714	movq	%rax,%r13
2715	movq	%rdx,%r14
2716	movq	0+0+0(%rbp),%rax
2717	mulq	%r11
2718	imulq	%r12,%r15
2719	addq	%rax,%r14
2720	adcq	%rdx,%r15
2721	movq	8+0+0(%rbp),%rax
2722	movq	%rax,%r9
2723	mulq	%r10
2724	addq	%rax,%r14
2725	adcq	$0,%rdx
2726	movq	%rdx,%r10
2727	movq	8+0+0(%rbp),%rax
2728	mulq	%r11
2729	addq	%rax,%r15
2730	adcq	$0,%rdx
2731	imulq	%r12,%r9
2732	addq	%r10,%r15
2733	adcq	%rdx,%r9
2734	movq	%r13,%r10
2735	movq	%r14,%r11
2736	movq	%r15,%r12
2737	andq	$3,%r12
2738	movq	%r15,%r13
2739	andq	$-4,%r13
2740	movq	%r9,%r14
2741	shrdq	$2,%r9,%r15
2742	shrq	$2,%r9
2743	addq	%r13,%r15
2744	adcq	%r14,%r9
2745	addq	%r15,%r10
2746	adcq	%r9,%r11
2747	adcq	$0,%r12
2748
2749	leaq	16(%rdi),%rdi
2750	decq	%rcx
2751	jg	.Lseal_sse_main_rounds
2752	paddd	.Lchacha20_consts(%rip),%xmm3
2753	paddd	0+48(%rbp),%xmm7
2754	paddd	0+64(%rbp),%xmm11
2755	paddd	0+144(%rbp),%xmm15
2756	paddd	.Lchacha20_consts(%rip),%xmm2
2757	paddd	0+48(%rbp),%xmm6
2758	paddd	0+64(%rbp),%xmm10
2759	paddd	0+128(%rbp),%xmm14
2760	paddd	.Lchacha20_consts(%rip),%xmm1
2761	paddd	0+48(%rbp),%xmm5
2762	paddd	0+64(%rbp),%xmm9
2763	paddd	0+112(%rbp),%xmm13
2764	paddd	.Lchacha20_consts(%rip),%xmm0
2765	paddd	0+48(%rbp),%xmm4
2766	paddd	0+64(%rbp),%xmm8
2767	paddd	0+96(%rbp),%xmm12
2768
2769	movdqa	%xmm14,0+80(%rbp)
2770	movdqa	%xmm14,0+80(%rbp)
2771	movdqu	0 + 0(%rsi),%xmm14
2772	pxor	%xmm3,%xmm14
2773	movdqu	%xmm14,0 + 0(%rdi)
2774	movdqu	16 + 0(%rsi),%xmm14
2775	pxor	%xmm7,%xmm14
2776	movdqu	%xmm14,16 + 0(%rdi)
2777	movdqu	32 + 0(%rsi),%xmm14
2778	pxor	%xmm11,%xmm14
2779	movdqu	%xmm14,32 + 0(%rdi)
2780	movdqu	48 + 0(%rsi),%xmm14
2781	pxor	%xmm15,%xmm14
2782	movdqu	%xmm14,48 + 0(%rdi)
2783
2784	movdqa	0+80(%rbp),%xmm14
2785	movdqu	0 + 64(%rsi),%xmm3
2786	movdqu	16 + 64(%rsi),%xmm7
2787	movdqu	32 + 64(%rsi),%xmm11
2788	movdqu	48 + 64(%rsi),%xmm15
2789	pxor	%xmm3,%xmm2
2790	pxor	%xmm7,%xmm6
2791	pxor	%xmm11,%xmm10
2792	pxor	%xmm14,%xmm15
2793	movdqu	%xmm2,0 + 64(%rdi)
2794	movdqu	%xmm6,16 + 64(%rdi)
2795	movdqu	%xmm10,32 + 64(%rdi)
2796	movdqu	%xmm15,48 + 64(%rdi)
2797	movdqu	0 + 128(%rsi),%xmm3
2798	movdqu	16 + 128(%rsi),%xmm7
2799	movdqu	32 + 128(%rsi),%xmm11
2800	movdqu	48 + 128(%rsi),%xmm15
2801	pxor	%xmm3,%xmm1
2802	pxor	%xmm7,%xmm5
2803	pxor	%xmm11,%xmm9
2804	pxor	%xmm13,%xmm15
2805	movdqu	%xmm1,0 + 128(%rdi)
2806	movdqu	%xmm5,16 + 128(%rdi)
2807	movdqu	%xmm9,32 + 128(%rdi)
2808	movdqu	%xmm15,48 + 128(%rdi)
2809
2810	cmpq	$256,%rbx
2811	ja	.Lseal_sse_main_loop_xor
2812
2813	movq	$192,%rcx
2814	subq	$192,%rbx
2815	leaq	192(%rsi),%rsi
2816	jmp	.Lseal_sse_128_tail_hash
2817.Lseal_sse_main_loop_xor:
2818	movdqu	0 + 192(%rsi),%xmm3
2819	movdqu	16 + 192(%rsi),%xmm7
2820	movdqu	32 + 192(%rsi),%xmm11
2821	movdqu	48 + 192(%rsi),%xmm15
2822	pxor	%xmm3,%xmm0
2823	pxor	%xmm7,%xmm4
2824	pxor	%xmm11,%xmm8
2825	pxor	%xmm12,%xmm15
2826	movdqu	%xmm0,0 + 192(%rdi)
2827	movdqu	%xmm4,16 + 192(%rdi)
2828	movdqu	%xmm8,32 + 192(%rdi)
2829	movdqu	%xmm15,48 + 192(%rdi)
2830
2831	leaq	256(%rsi),%rsi
2832	subq	$256,%rbx
2833	movq	$6,%rcx
2834	movq	$4,%r8
2835	cmpq	$192,%rbx
2836	jg	.Lseal_sse_main_loop
2837	movq	%rbx,%rcx
2838	testq	%rbx,%rbx
2839	je	.Lseal_sse_128_tail_hash
2840	movq	$6,%rcx
2841	cmpq	$128,%rbx
2842	ja	.Lseal_sse_tail_192
2843	cmpq	$64,%rbx
2844	ja	.Lseal_sse_tail_128
2845
2846.Lseal_sse_tail_64:
2847	movdqa	.Lchacha20_consts(%rip),%xmm0
2848	movdqa	0+48(%rbp),%xmm4
2849	movdqa	0+64(%rbp),%xmm8
2850	movdqa	0+96(%rbp),%xmm12
2851	paddd	.Lsse_inc(%rip),%xmm12
2852	movdqa	%xmm12,0+96(%rbp)
2853
2854.Lseal_sse_tail_64_rounds_and_x2hash:
2855	addq	0+0(%rdi),%r10
2856	adcq	8+0(%rdi),%r11
2857	adcq	$1,%r12
2858	movq	0+0+0(%rbp),%rax
2859	movq	%rax,%r15
2860	mulq	%r10
2861	movq	%rax,%r13
2862	movq	%rdx,%r14
2863	movq	0+0+0(%rbp),%rax
2864	mulq	%r11
2865	imulq	%r12,%r15
2866	addq	%rax,%r14
2867	adcq	%rdx,%r15
2868	movq	8+0+0(%rbp),%rax
2869	movq	%rax,%r9
2870	mulq	%r10
2871	addq	%rax,%r14
2872	adcq	$0,%rdx
2873	movq	%rdx,%r10
2874	movq	8+0+0(%rbp),%rax
2875	mulq	%r11
2876	addq	%rax,%r15
2877	adcq	$0,%rdx
2878	imulq	%r12,%r9
2879	addq	%r10,%r15
2880	adcq	%rdx,%r9
2881	movq	%r13,%r10
2882	movq	%r14,%r11
2883	movq	%r15,%r12
2884	andq	$3,%r12
2885	movq	%r15,%r13
2886	andq	$-4,%r13
2887	movq	%r9,%r14
2888	shrdq	$2,%r9,%r15
2889	shrq	$2,%r9
2890	addq	%r13,%r15
2891	adcq	%r14,%r9
2892	addq	%r15,%r10
2893	adcq	%r9,%r11
2894	adcq	$0,%r12
2895
2896	leaq	16(%rdi),%rdi
2897.Lseal_sse_tail_64_rounds_and_x1hash:
2898	paddd	%xmm4,%xmm0
2899	pxor	%xmm0,%xmm12
2900	pshufb	.Lrol16(%rip),%xmm12
2901	paddd	%xmm12,%xmm8
2902	pxor	%xmm8,%xmm4
2903	movdqa	%xmm4,%xmm3
2904	pslld	$12,%xmm3
2905	psrld	$20,%xmm4
2906	pxor	%xmm3,%xmm4
2907	paddd	%xmm4,%xmm0
2908	pxor	%xmm0,%xmm12
2909	pshufb	.Lrol8(%rip),%xmm12
2910	paddd	%xmm12,%xmm8
2911	pxor	%xmm8,%xmm4
2912	movdqa	%xmm4,%xmm3
2913	pslld	$7,%xmm3
2914	psrld	$25,%xmm4
2915	pxor	%xmm3,%xmm4
2916.byte	102,15,58,15,228,4
2917.byte	102,69,15,58,15,192,8
2918.byte	102,69,15,58,15,228,12
2919	paddd	%xmm4,%xmm0
2920	pxor	%xmm0,%xmm12
2921	pshufb	.Lrol16(%rip),%xmm12
2922	paddd	%xmm12,%xmm8
2923	pxor	%xmm8,%xmm4
2924	movdqa	%xmm4,%xmm3
2925	pslld	$12,%xmm3
2926	psrld	$20,%xmm4
2927	pxor	%xmm3,%xmm4
2928	paddd	%xmm4,%xmm0
2929	pxor	%xmm0,%xmm12
2930	pshufb	.Lrol8(%rip),%xmm12
2931	paddd	%xmm12,%xmm8
2932	pxor	%xmm8,%xmm4
2933	movdqa	%xmm4,%xmm3
2934	pslld	$7,%xmm3
2935	psrld	$25,%xmm4
2936	pxor	%xmm3,%xmm4
2937.byte	102,15,58,15,228,12
2938.byte	102,69,15,58,15,192,8
2939.byte	102,69,15,58,15,228,4
2940	addq	0+0(%rdi),%r10
2941	adcq	8+0(%rdi),%r11
2942	adcq	$1,%r12
2943	movq	0+0+0(%rbp),%rax
2944	movq	%rax,%r15
2945	mulq	%r10
2946	movq	%rax,%r13
2947	movq	%rdx,%r14
2948	movq	0+0+0(%rbp),%rax
2949	mulq	%r11
2950	imulq	%r12,%r15
2951	addq	%rax,%r14
2952	adcq	%rdx,%r15
2953	movq	8+0+0(%rbp),%rax
2954	movq	%rax,%r9
2955	mulq	%r10
2956	addq	%rax,%r14
2957	adcq	$0,%rdx
2958	movq	%rdx,%r10
2959	movq	8+0+0(%rbp),%rax
2960	mulq	%r11
2961	addq	%rax,%r15
2962	adcq	$0,%rdx
2963	imulq	%r12,%r9
2964	addq	%r10,%r15
2965	adcq	%rdx,%r9
2966	movq	%r13,%r10
2967	movq	%r14,%r11
2968	movq	%r15,%r12
2969	andq	$3,%r12
2970	movq	%r15,%r13
2971	andq	$-4,%r13
2972	movq	%r9,%r14
2973	shrdq	$2,%r9,%r15
2974	shrq	$2,%r9
2975	addq	%r13,%r15
2976	adcq	%r14,%r9
2977	addq	%r15,%r10
2978	adcq	%r9,%r11
2979	adcq	$0,%r12
2980
2981	leaq	16(%rdi),%rdi
2982	decq	%rcx
2983	jg	.Lseal_sse_tail_64_rounds_and_x2hash
2984	decq	%r8
2985	jge	.Lseal_sse_tail_64_rounds_and_x1hash
2986	paddd	.Lchacha20_consts(%rip),%xmm0
2987	paddd	0+48(%rbp),%xmm4
2988	paddd	0+64(%rbp),%xmm8
2989	paddd	0+96(%rbp),%xmm12
2990
2991	jmp	.Lseal_sse_128_tail_xor
2992
2993.Lseal_sse_tail_128:
2994	movdqa	.Lchacha20_consts(%rip),%xmm0
2995	movdqa	0+48(%rbp),%xmm4
2996	movdqa	0+64(%rbp),%xmm8
2997	movdqa	%xmm0,%xmm1
2998	movdqa	%xmm4,%xmm5
2999	movdqa	%xmm8,%xmm9
3000	movdqa	0+96(%rbp),%xmm13
3001	paddd	.Lsse_inc(%rip),%xmm13
3002	movdqa	%xmm13,%xmm12
3003	paddd	.Lsse_inc(%rip),%xmm12
3004	movdqa	%xmm12,0+96(%rbp)
3005	movdqa	%xmm13,0+112(%rbp)
3006
3007.Lseal_sse_tail_128_rounds_and_x2hash:
3008	addq	0+0(%rdi),%r10
3009	adcq	8+0(%rdi),%r11
3010	adcq	$1,%r12
3011	movq	0+0+0(%rbp),%rax
3012	movq	%rax,%r15
3013	mulq	%r10
3014	movq	%rax,%r13
3015	movq	%rdx,%r14
3016	movq	0+0+0(%rbp),%rax
3017	mulq	%r11
3018	imulq	%r12,%r15
3019	addq	%rax,%r14
3020	adcq	%rdx,%r15
3021	movq	8+0+0(%rbp),%rax
3022	movq	%rax,%r9
3023	mulq	%r10
3024	addq	%rax,%r14
3025	adcq	$0,%rdx
3026	movq	%rdx,%r10
3027	movq	8+0+0(%rbp),%rax
3028	mulq	%r11
3029	addq	%rax,%r15
3030	adcq	$0,%rdx
3031	imulq	%r12,%r9
3032	addq	%r10,%r15
3033	adcq	%rdx,%r9
3034	movq	%r13,%r10
3035	movq	%r14,%r11
3036	movq	%r15,%r12
3037	andq	$3,%r12
3038	movq	%r15,%r13
3039	andq	$-4,%r13
3040	movq	%r9,%r14
3041	shrdq	$2,%r9,%r15
3042	shrq	$2,%r9
3043	addq	%r13,%r15
3044	adcq	%r14,%r9
3045	addq	%r15,%r10
3046	adcq	%r9,%r11
3047	adcq	$0,%r12
3048
3049	leaq	16(%rdi),%rdi
3050.Lseal_sse_tail_128_rounds_and_x1hash:
3051	paddd	%xmm4,%xmm0
3052	pxor	%xmm0,%xmm12
3053	pshufb	.Lrol16(%rip),%xmm12
3054	paddd	%xmm12,%xmm8
3055	pxor	%xmm8,%xmm4
3056	movdqa	%xmm4,%xmm3
3057	pslld	$12,%xmm3
3058	psrld	$20,%xmm4
3059	pxor	%xmm3,%xmm4
3060	paddd	%xmm4,%xmm0
3061	pxor	%xmm0,%xmm12
3062	pshufb	.Lrol8(%rip),%xmm12
3063	paddd	%xmm12,%xmm8
3064	pxor	%xmm8,%xmm4
3065	movdqa	%xmm4,%xmm3
3066	pslld	$7,%xmm3
3067	psrld	$25,%xmm4
3068	pxor	%xmm3,%xmm4
3069.byte	102,15,58,15,228,4
3070.byte	102,69,15,58,15,192,8
3071.byte	102,69,15,58,15,228,12
3072	paddd	%xmm5,%xmm1
3073	pxor	%xmm1,%xmm13
3074	pshufb	.Lrol16(%rip),%xmm13
3075	paddd	%xmm13,%xmm9
3076	pxor	%xmm9,%xmm5
3077	movdqa	%xmm5,%xmm3
3078	pslld	$12,%xmm3
3079	psrld	$20,%xmm5
3080	pxor	%xmm3,%xmm5
3081	paddd	%xmm5,%xmm1
3082	pxor	%xmm1,%xmm13
3083	pshufb	.Lrol8(%rip),%xmm13
3084	paddd	%xmm13,%xmm9
3085	pxor	%xmm9,%xmm5
3086	movdqa	%xmm5,%xmm3
3087	pslld	$7,%xmm3
3088	psrld	$25,%xmm5
3089	pxor	%xmm3,%xmm5
3090.byte	102,15,58,15,237,4
3091.byte	102,69,15,58,15,201,8
3092.byte	102,69,15,58,15,237,12
3093	addq	0+0(%rdi),%r10
3094	adcq	8+0(%rdi),%r11
3095	adcq	$1,%r12
3096	movq	0+0+0(%rbp),%rax
3097	movq	%rax,%r15
3098	mulq	%r10
3099	movq	%rax,%r13
3100	movq	%rdx,%r14
3101	movq	0+0+0(%rbp),%rax
3102	mulq	%r11
3103	imulq	%r12,%r15
3104	addq	%rax,%r14
3105	adcq	%rdx,%r15
3106	movq	8+0+0(%rbp),%rax
3107	movq	%rax,%r9
3108	mulq	%r10
3109	addq	%rax,%r14
3110	adcq	$0,%rdx
3111	movq	%rdx,%r10
3112	movq	8+0+0(%rbp),%rax
3113	mulq	%r11
3114	addq	%rax,%r15
3115	adcq	$0,%rdx
3116	imulq	%r12,%r9
3117	addq	%r10,%r15
3118	adcq	%rdx,%r9
3119	movq	%r13,%r10
3120	movq	%r14,%r11
3121	movq	%r15,%r12
3122	andq	$3,%r12
3123	movq	%r15,%r13
3124	andq	$-4,%r13
3125	movq	%r9,%r14
3126	shrdq	$2,%r9,%r15
3127	shrq	$2,%r9
3128	addq	%r13,%r15
3129	adcq	%r14,%r9
3130	addq	%r15,%r10
3131	adcq	%r9,%r11
3132	adcq	$0,%r12
3133	paddd	%xmm4,%xmm0
3134	pxor	%xmm0,%xmm12
3135	pshufb	.Lrol16(%rip),%xmm12
3136	paddd	%xmm12,%xmm8
3137	pxor	%xmm8,%xmm4
3138	movdqa	%xmm4,%xmm3
3139	pslld	$12,%xmm3
3140	psrld	$20,%xmm4
3141	pxor	%xmm3,%xmm4
3142	paddd	%xmm4,%xmm0
3143	pxor	%xmm0,%xmm12
3144	pshufb	.Lrol8(%rip),%xmm12
3145	paddd	%xmm12,%xmm8
3146	pxor	%xmm8,%xmm4
3147	movdqa	%xmm4,%xmm3
3148	pslld	$7,%xmm3
3149	psrld	$25,%xmm4
3150	pxor	%xmm3,%xmm4
3151.byte	102,15,58,15,228,12
3152.byte	102,69,15,58,15,192,8
3153.byte	102,69,15,58,15,228,4
3154	paddd	%xmm5,%xmm1
3155	pxor	%xmm1,%xmm13
3156	pshufb	.Lrol16(%rip),%xmm13
3157	paddd	%xmm13,%xmm9
3158	pxor	%xmm9,%xmm5
3159	movdqa	%xmm5,%xmm3
3160	pslld	$12,%xmm3
3161	psrld	$20,%xmm5
3162	pxor	%xmm3,%xmm5
3163	paddd	%xmm5,%xmm1
3164	pxor	%xmm1,%xmm13
3165	pshufb	.Lrol8(%rip),%xmm13
3166	paddd	%xmm13,%xmm9
3167	pxor	%xmm9,%xmm5
3168	movdqa	%xmm5,%xmm3
3169	pslld	$7,%xmm3
3170	psrld	$25,%xmm5
3171	pxor	%xmm3,%xmm5
3172.byte	102,15,58,15,237,12
3173.byte	102,69,15,58,15,201,8
3174.byte	102,69,15,58,15,237,4
3175
3176	leaq	16(%rdi),%rdi
3177	decq	%rcx
3178	jg	.Lseal_sse_tail_128_rounds_and_x2hash
3179	decq	%r8
3180	jge	.Lseal_sse_tail_128_rounds_and_x1hash
3181	paddd	.Lchacha20_consts(%rip),%xmm1
3182	paddd	0+48(%rbp),%xmm5
3183	paddd	0+64(%rbp),%xmm9
3184	paddd	0+112(%rbp),%xmm13
3185	paddd	.Lchacha20_consts(%rip),%xmm0
3186	paddd	0+48(%rbp),%xmm4
3187	paddd	0+64(%rbp),%xmm8
3188	paddd	0+96(%rbp),%xmm12
3189	movdqu	0 + 0(%rsi),%xmm3
3190	movdqu	16 + 0(%rsi),%xmm7
3191	movdqu	32 + 0(%rsi),%xmm11
3192	movdqu	48 + 0(%rsi),%xmm15
3193	pxor	%xmm3,%xmm1
3194	pxor	%xmm7,%xmm5
3195	pxor	%xmm11,%xmm9
3196	pxor	%xmm13,%xmm15
3197	movdqu	%xmm1,0 + 0(%rdi)
3198	movdqu	%xmm5,16 + 0(%rdi)
3199	movdqu	%xmm9,32 + 0(%rdi)
3200	movdqu	%xmm15,48 + 0(%rdi)
3201
3202	movq	$64,%rcx
3203	subq	$64,%rbx
3204	leaq	64(%rsi),%rsi
3205	jmp	.Lseal_sse_128_tail_hash
3206
3207.Lseal_sse_tail_192:
3208	movdqa	.Lchacha20_consts(%rip),%xmm0
3209	movdqa	0+48(%rbp),%xmm4
3210	movdqa	0+64(%rbp),%xmm8
3211	movdqa	%xmm0,%xmm1
3212	movdqa	%xmm4,%xmm5
3213	movdqa	%xmm8,%xmm9
3214	movdqa	%xmm0,%xmm2
3215	movdqa	%xmm4,%xmm6
3216	movdqa	%xmm8,%xmm10
3217	movdqa	0+96(%rbp),%xmm14
3218	paddd	.Lsse_inc(%rip),%xmm14
3219	movdqa	%xmm14,%xmm13
3220	paddd	.Lsse_inc(%rip),%xmm13
3221	movdqa	%xmm13,%xmm12
3222	paddd	.Lsse_inc(%rip),%xmm12
3223	movdqa	%xmm12,0+96(%rbp)
3224	movdqa	%xmm13,0+112(%rbp)
3225	movdqa	%xmm14,0+128(%rbp)
3226
3227.Lseal_sse_tail_192_rounds_and_x2hash:
3228	addq	0+0(%rdi),%r10
3229	adcq	8+0(%rdi),%r11
3230	adcq	$1,%r12
3231	movq	0+0+0(%rbp),%rax
3232	movq	%rax,%r15
3233	mulq	%r10
3234	movq	%rax,%r13
3235	movq	%rdx,%r14
3236	movq	0+0+0(%rbp),%rax
3237	mulq	%r11
3238	imulq	%r12,%r15
3239	addq	%rax,%r14
3240	adcq	%rdx,%r15
3241	movq	8+0+0(%rbp),%rax
3242	movq	%rax,%r9
3243	mulq	%r10
3244	addq	%rax,%r14
3245	adcq	$0,%rdx
3246	movq	%rdx,%r10
3247	movq	8+0+0(%rbp),%rax
3248	mulq	%r11
3249	addq	%rax,%r15
3250	adcq	$0,%rdx
3251	imulq	%r12,%r9
3252	addq	%r10,%r15
3253	adcq	%rdx,%r9
3254	movq	%r13,%r10
3255	movq	%r14,%r11
3256	movq	%r15,%r12
3257	andq	$3,%r12
3258	movq	%r15,%r13
3259	andq	$-4,%r13
3260	movq	%r9,%r14
3261	shrdq	$2,%r9,%r15
3262	shrq	$2,%r9
3263	addq	%r13,%r15
3264	adcq	%r14,%r9
3265	addq	%r15,%r10
3266	adcq	%r9,%r11
3267	adcq	$0,%r12
3268
3269	leaq	16(%rdi),%rdi
3270.Lseal_sse_tail_192_rounds_and_x1hash:
3271	paddd	%xmm4,%xmm0
3272	pxor	%xmm0,%xmm12
3273	pshufb	.Lrol16(%rip),%xmm12
3274	paddd	%xmm12,%xmm8
3275	pxor	%xmm8,%xmm4
3276	movdqa	%xmm4,%xmm3
3277	pslld	$12,%xmm3
3278	psrld	$20,%xmm4
3279	pxor	%xmm3,%xmm4
3280	paddd	%xmm4,%xmm0
3281	pxor	%xmm0,%xmm12
3282	pshufb	.Lrol8(%rip),%xmm12
3283	paddd	%xmm12,%xmm8
3284	pxor	%xmm8,%xmm4
3285	movdqa	%xmm4,%xmm3
3286	pslld	$7,%xmm3
3287	psrld	$25,%xmm4
3288	pxor	%xmm3,%xmm4
3289.byte	102,15,58,15,228,4
3290.byte	102,69,15,58,15,192,8
3291.byte	102,69,15,58,15,228,12
3292	paddd	%xmm5,%xmm1
3293	pxor	%xmm1,%xmm13
3294	pshufb	.Lrol16(%rip),%xmm13
3295	paddd	%xmm13,%xmm9
3296	pxor	%xmm9,%xmm5
3297	movdqa	%xmm5,%xmm3
3298	pslld	$12,%xmm3
3299	psrld	$20,%xmm5
3300	pxor	%xmm3,%xmm5
3301	paddd	%xmm5,%xmm1
3302	pxor	%xmm1,%xmm13
3303	pshufb	.Lrol8(%rip),%xmm13
3304	paddd	%xmm13,%xmm9
3305	pxor	%xmm9,%xmm5
3306	movdqa	%xmm5,%xmm3
3307	pslld	$7,%xmm3
3308	psrld	$25,%xmm5
3309	pxor	%xmm3,%xmm5
3310.byte	102,15,58,15,237,4
3311.byte	102,69,15,58,15,201,8
3312.byte	102,69,15,58,15,237,12
3313	paddd	%xmm6,%xmm2
3314	pxor	%xmm2,%xmm14
3315	pshufb	.Lrol16(%rip),%xmm14
3316	paddd	%xmm14,%xmm10
3317	pxor	%xmm10,%xmm6
3318	movdqa	%xmm6,%xmm3
3319	pslld	$12,%xmm3
3320	psrld	$20,%xmm6
3321	pxor	%xmm3,%xmm6
3322	paddd	%xmm6,%xmm2
3323	pxor	%xmm2,%xmm14
3324	pshufb	.Lrol8(%rip),%xmm14
3325	paddd	%xmm14,%xmm10
3326	pxor	%xmm10,%xmm6
3327	movdqa	%xmm6,%xmm3
3328	pslld	$7,%xmm3
3329	psrld	$25,%xmm6
3330	pxor	%xmm3,%xmm6
3331.byte	102,15,58,15,246,4
3332.byte	102,69,15,58,15,210,8
3333.byte	102,69,15,58,15,246,12
3334	addq	0+0(%rdi),%r10
3335	adcq	8+0(%rdi),%r11
3336	adcq	$1,%r12
3337	movq	0+0+0(%rbp),%rax
3338	movq	%rax,%r15
3339	mulq	%r10
3340	movq	%rax,%r13
3341	movq	%rdx,%r14
3342	movq	0+0+0(%rbp),%rax
3343	mulq	%r11
3344	imulq	%r12,%r15
3345	addq	%rax,%r14
3346	adcq	%rdx,%r15
3347	movq	8+0+0(%rbp),%rax
3348	movq	%rax,%r9
3349	mulq	%r10
3350	addq	%rax,%r14
3351	adcq	$0,%rdx
3352	movq	%rdx,%r10
3353	movq	8+0+0(%rbp),%rax
3354	mulq	%r11
3355	addq	%rax,%r15
3356	adcq	$0,%rdx
3357	imulq	%r12,%r9
3358	addq	%r10,%r15
3359	adcq	%rdx,%r9
3360	movq	%r13,%r10
3361	movq	%r14,%r11
3362	movq	%r15,%r12
3363	andq	$3,%r12
3364	movq	%r15,%r13
3365	andq	$-4,%r13
3366	movq	%r9,%r14
3367	shrdq	$2,%r9,%r15
3368	shrq	$2,%r9
3369	addq	%r13,%r15
3370	adcq	%r14,%r9
3371	addq	%r15,%r10
3372	adcq	%r9,%r11
3373	adcq	$0,%r12
3374	paddd	%xmm4,%xmm0
3375	pxor	%xmm0,%xmm12
3376	pshufb	.Lrol16(%rip),%xmm12
3377	paddd	%xmm12,%xmm8
3378	pxor	%xmm8,%xmm4
3379	movdqa	%xmm4,%xmm3
3380	pslld	$12,%xmm3
3381	psrld	$20,%xmm4
3382	pxor	%xmm3,%xmm4
3383	paddd	%xmm4,%xmm0
3384	pxor	%xmm0,%xmm12
3385	pshufb	.Lrol8(%rip),%xmm12
3386	paddd	%xmm12,%xmm8
3387	pxor	%xmm8,%xmm4
3388	movdqa	%xmm4,%xmm3
3389	pslld	$7,%xmm3
3390	psrld	$25,%xmm4
3391	pxor	%xmm3,%xmm4
3392.byte	102,15,58,15,228,12
3393.byte	102,69,15,58,15,192,8
3394.byte	102,69,15,58,15,228,4
3395	paddd	%xmm5,%xmm1
3396	pxor	%xmm1,%xmm13
3397	pshufb	.Lrol16(%rip),%xmm13
3398	paddd	%xmm13,%xmm9
3399	pxor	%xmm9,%xmm5
3400	movdqa	%xmm5,%xmm3
3401	pslld	$12,%xmm3
3402	psrld	$20,%xmm5
3403	pxor	%xmm3,%xmm5
3404	paddd	%xmm5,%xmm1
3405	pxor	%xmm1,%xmm13
3406	pshufb	.Lrol8(%rip),%xmm13
3407	paddd	%xmm13,%xmm9
3408	pxor	%xmm9,%xmm5
3409	movdqa	%xmm5,%xmm3
3410	pslld	$7,%xmm3
3411	psrld	$25,%xmm5
3412	pxor	%xmm3,%xmm5
3413.byte	102,15,58,15,237,12
3414.byte	102,69,15,58,15,201,8
3415.byte	102,69,15,58,15,237,4
3416	paddd	%xmm6,%xmm2
3417	pxor	%xmm2,%xmm14
3418	pshufb	.Lrol16(%rip),%xmm14
3419	paddd	%xmm14,%xmm10
3420	pxor	%xmm10,%xmm6
3421	movdqa	%xmm6,%xmm3
3422	pslld	$12,%xmm3
3423	psrld	$20,%xmm6
3424	pxor	%xmm3,%xmm6
3425	paddd	%xmm6,%xmm2
3426	pxor	%xmm2,%xmm14
3427	pshufb	.Lrol8(%rip),%xmm14
3428	paddd	%xmm14,%xmm10
3429	pxor	%xmm10,%xmm6
3430	movdqa	%xmm6,%xmm3
3431	pslld	$7,%xmm3
3432	psrld	$25,%xmm6
3433	pxor	%xmm3,%xmm6
3434.byte	102,15,58,15,246,12
3435.byte	102,69,15,58,15,210,8
3436.byte	102,69,15,58,15,246,4
3437
3438	leaq	16(%rdi),%rdi
3439	decq	%rcx
3440	jg	.Lseal_sse_tail_192_rounds_and_x2hash
3441	decq	%r8
3442	jge	.Lseal_sse_tail_192_rounds_and_x1hash
3443	paddd	.Lchacha20_consts(%rip),%xmm2
3444	paddd	0+48(%rbp),%xmm6
3445	paddd	0+64(%rbp),%xmm10
3446	paddd	0+128(%rbp),%xmm14
3447	paddd	.Lchacha20_consts(%rip),%xmm1
3448	paddd	0+48(%rbp),%xmm5
3449	paddd	0+64(%rbp),%xmm9
3450	paddd	0+112(%rbp),%xmm13
3451	paddd	.Lchacha20_consts(%rip),%xmm0
3452	paddd	0+48(%rbp),%xmm4
3453	paddd	0+64(%rbp),%xmm8
3454	paddd	0+96(%rbp),%xmm12
3455	movdqu	0 + 0(%rsi),%xmm3
3456	movdqu	16 + 0(%rsi),%xmm7
3457	movdqu	32 + 0(%rsi),%xmm11
3458	movdqu	48 + 0(%rsi),%xmm15
3459	pxor	%xmm3,%xmm2
3460	pxor	%xmm7,%xmm6
3461	pxor	%xmm11,%xmm10
3462	pxor	%xmm14,%xmm15
3463	movdqu	%xmm2,0 + 0(%rdi)
3464	movdqu	%xmm6,16 + 0(%rdi)
3465	movdqu	%xmm10,32 + 0(%rdi)
3466	movdqu	%xmm15,48 + 0(%rdi)
3467	movdqu	0 + 64(%rsi),%xmm3
3468	movdqu	16 + 64(%rsi),%xmm7
3469	movdqu	32 + 64(%rsi),%xmm11
3470	movdqu	48 + 64(%rsi),%xmm15
3471	pxor	%xmm3,%xmm1
3472	pxor	%xmm7,%xmm5
3473	pxor	%xmm11,%xmm9
3474	pxor	%xmm13,%xmm15
3475	movdqu	%xmm1,0 + 64(%rdi)
3476	movdqu	%xmm5,16 + 64(%rdi)
3477	movdqu	%xmm9,32 + 64(%rdi)
3478	movdqu	%xmm15,48 + 64(%rdi)
3479
3480	movq	$128,%rcx
3481	subq	$128,%rbx
3482	leaq	128(%rsi),%rsi
3483
3484.Lseal_sse_128_tail_hash:
3485	cmpq	$16,%rcx
3486	jb	.Lseal_sse_128_tail_xor
3487	addq	0+0(%rdi),%r10
3488	adcq	8+0(%rdi),%r11
3489	adcq	$1,%r12
3490	movq	0+0+0(%rbp),%rax
3491	movq	%rax,%r15
3492	mulq	%r10
3493	movq	%rax,%r13
3494	movq	%rdx,%r14
3495	movq	0+0+0(%rbp),%rax
3496	mulq	%r11
3497	imulq	%r12,%r15
3498	addq	%rax,%r14
3499	adcq	%rdx,%r15
3500	movq	8+0+0(%rbp),%rax
3501	movq	%rax,%r9
3502	mulq	%r10
3503	addq	%rax,%r14
3504	adcq	$0,%rdx
3505	movq	%rdx,%r10
3506	movq	8+0+0(%rbp),%rax
3507	mulq	%r11
3508	addq	%rax,%r15
3509	adcq	$0,%rdx
3510	imulq	%r12,%r9
3511	addq	%r10,%r15
3512	adcq	%rdx,%r9
3513	movq	%r13,%r10
3514	movq	%r14,%r11
3515	movq	%r15,%r12
3516	andq	$3,%r12
3517	movq	%r15,%r13
3518	andq	$-4,%r13
3519	movq	%r9,%r14
3520	shrdq	$2,%r9,%r15
3521	shrq	$2,%r9
3522	addq	%r13,%r15
3523	adcq	%r14,%r9
3524	addq	%r15,%r10
3525	adcq	%r9,%r11
3526	adcq	$0,%r12
3527
3528	subq	$16,%rcx
3529	leaq	16(%rdi),%rdi
3530	jmp	.Lseal_sse_128_tail_hash
3531
3532.Lseal_sse_128_tail_xor:
3533	cmpq	$16,%rbx
3534	jb	.Lseal_sse_tail_16
3535	subq	$16,%rbx
3536
3537	movdqu	0(%rsi),%xmm3
3538	pxor	%xmm3,%xmm0
3539	movdqu	%xmm0,0(%rdi)
3540
3541	addq	0(%rdi),%r10
3542	adcq	8(%rdi),%r11
3543	adcq	$1,%r12
3544	leaq	16(%rsi),%rsi
3545	leaq	16(%rdi),%rdi
3546	movq	0+0+0(%rbp),%rax
3547	movq	%rax,%r15
3548	mulq	%r10
3549	movq	%rax,%r13
3550	movq	%rdx,%r14
3551	movq	0+0+0(%rbp),%rax
3552	mulq	%r11
3553	imulq	%r12,%r15
3554	addq	%rax,%r14
3555	adcq	%rdx,%r15
3556	movq	8+0+0(%rbp),%rax
3557	movq	%rax,%r9
3558	mulq	%r10
3559	addq	%rax,%r14
3560	adcq	$0,%rdx
3561	movq	%rdx,%r10
3562	movq	8+0+0(%rbp),%rax
3563	mulq	%r11
3564	addq	%rax,%r15
3565	adcq	$0,%rdx
3566	imulq	%r12,%r9
3567	addq	%r10,%r15
3568	adcq	%rdx,%r9
3569	movq	%r13,%r10
3570	movq	%r14,%r11
3571	movq	%r15,%r12
3572	andq	$3,%r12
3573	movq	%r15,%r13
3574	andq	$-4,%r13
3575	movq	%r9,%r14
3576	shrdq	$2,%r9,%r15
3577	shrq	$2,%r9
3578	addq	%r13,%r15
3579	adcq	%r14,%r9
3580	addq	%r15,%r10
3581	adcq	%r9,%r11
3582	adcq	$0,%r12
3583
3584
3585	movdqa	%xmm4,%xmm0
3586	movdqa	%xmm8,%xmm4
3587	movdqa	%xmm12,%xmm8
3588	movdqa	%xmm1,%xmm12
3589	movdqa	%xmm5,%xmm1
3590	movdqa	%xmm9,%xmm5
3591	movdqa	%xmm13,%xmm9
3592	jmp	.Lseal_sse_128_tail_xor
3593
3594.Lseal_sse_tail_16:
3595	testq	%rbx,%rbx
3596	jz	.Lprocess_blocks_of_extra_in
3597
3598	movq	%rbx,%r8
3599	movq	%rbx,%rcx
3600	leaq	-1(%rsi,%rbx,1),%rsi
3601	pxor	%xmm15,%xmm15
3602.Lseal_sse_tail_16_compose:
3603	pslldq	$1,%xmm15
3604	pinsrb	$0,(%rsi),%xmm15
3605	leaq	-1(%rsi),%rsi
3606	decq	%rcx
3607	jne	.Lseal_sse_tail_16_compose
3608
3609
3610	pxor	%xmm0,%xmm15
3611
3612
3613	movq	%rbx,%rcx
3614	movdqu	%xmm15,%xmm0
3615.Lseal_sse_tail_16_extract:
3616	pextrb	$0,%xmm0,(%rdi)
3617	psrldq	$1,%xmm0
3618	addq	$1,%rdi
3619	subq	$1,%rcx
3620	jnz	.Lseal_sse_tail_16_extract
3621
3622
3623
3624
3625
3626
3627
3628
3629	movq	288 + 0 + 32(%rsp),%r9
3630	movq	56(%r9),%r14
3631	movq	48(%r9),%r13
3632	testq	%r14,%r14
3633	jz	.Lprocess_partial_block
3634
3635	movq	$16,%r15
3636	subq	%rbx,%r15
3637	cmpq	%r15,%r14
3638
3639	jge	.Lload_extra_in
3640	movq	%r14,%r15
3641
3642.Lload_extra_in:
3643
3644
3645	leaq	-1(%r13,%r15,1),%rsi
3646
3647
3648	addq	%r15,%r13
3649	subq	%r15,%r14
3650	movq	%r13,48(%r9)
3651	movq	%r14,56(%r9)
3652
3653
3654
3655	addq	%r15,%r8
3656
3657
3658	pxor	%xmm11,%xmm11
3659.Lload_extra_load_loop:
3660	pslldq	$1,%xmm11
3661	pinsrb	$0,(%rsi),%xmm11
3662	leaq	-1(%rsi),%rsi
3663	subq	$1,%r15
3664	jnz	.Lload_extra_load_loop
3665
3666
3667
3668
3669	movq	%rbx,%r15
3670
3671.Lload_extra_shift_loop:
3672	pslldq	$1,%xmm11
3673	subq	$1,%r15
3674	jnz	.Lload_extra_shift_loop
3675
3676
3677
3678
3679	leaq	.Land_masks(%rip),%r15
3680	shlq	$4,%rbx
3681	pand	-16(%r15,%rbx,1),%xmm15
3682
3683
3684	por	%xmm11,%xmm15
3685
3686
3687
3688.byte	102,77,15,126,253
3689	pextrq	$1,%xmm15,%r14
3690	addq	%r13,%r10
3691	adcq	%r14,%r11
3692	adcq	$1,%r12
3693	movq	0+0+0(%rbp),%rax
3694	movq	%rax,%r15
3695	mulq	%r10
3696	movq	%rax,%r13
3697	movq	%rdx,%r14
3698	movq	0+0+0(%rbp),%rax
3699	mulq	%r11
3700	imulq	%r12,%r15
3701	addq	%rax,%r14
3702	adcq	%rdx,%r15
3703	movq	8+0+0(%rbp),%rax
3704	movq	%rax,%r9
3705	mulq	%r10
3706	addq	%rax,%r14
3707	adcq	$0,%rdx
3708	movq	%rdx,%r10
3709	movq	8+0+0(%rbp),%rax
3710	mulq	%r11
3711	addq	%rax,%r15
3712	adcq	$0,%rdx
3713	imulq	%r12,%r9
3714	addq	%r10,%r15
3715	adcq	%rdx,%r9
3716	movq	%r13,%r10
3717	movq	%r14,%r11
3718	movq	%r15,%r12
3719	andq	$3,%r12
3720	movq	%r15,%r13
3721	andq	$-4,%r13
3722	movq	%r9,%r14
3723	shrdq	$2,%r9,%r15
3724	shrq	$2,%r9
3725	addq	%r13,%r15
3726	adcq	%r14,%r9
3727	addq	%r15,%r10
3728	adcq	%r9,%r11
3729	adcq	$0,%r12
3730
3731
3732.Lprocess_blocks_of_extra_in:
3733
3734	movq	288+32+0 (%rsp),%r9
3735	movq	48(%r9),%rsi
3736	movq	56(%r9),%r8
3737	movq	%r8,%rcx
3738	shrq	$4,%r8
3739
3740.Lprocess_extra_hash_loop:
3741	jz	process_extra_in_trailer
3742	addq	0+0(%rsi),%r10
3743	adcq	8+0(%rsi),%r11
3744	adcq	$1,%r12
3745	movq	0+0+0(%rbp),%rax
3746	movq	%rax,%r15
3747	mulq	%r10
3748	movq	%rax,%r13
3749	movq	%rdx,%r14
3750	movq	0+0+0(%rbp),%rax
3751	mulq	%r11
3752	imulq	%r12,%r15
3753	addq	%rax,%r14
3754	adcq	%rdx,%r15
3755	movq	8+0+0(%rbp),%rax
3756	movq	%rax,%r9
3757	mulq	%r10
3758	addq	%rax,%r14
3759	adcq	$0,%rdx
3760	movq	%rdx,%r10
3761	movq	8+0+0(%rbp),%rax
3762	mulq	%r11
3763	addq	%rax,%r15
3764	adcq	$0,%rdx
3765	imulq	%r12,%r9
3766	addq	%r10,%r15
3767	adcq	%rdx,%r9
3768	movq	%r13,%r10
3769	movq	%r14,%r11
3770	movq	%r15,%r12
3771	andq	$3,%r12
3772	movq	%r15,%r13
3773	andq	$-4,%r13
3774	movq	%r9,%r14
3775	shrdq	$2,%r9,%r15
3776	shrq	$2,%r9
3777	addq	%r13,%r15
3778	adcq	%r14,%r9
3779	addq	%r15,%r10
3780	adcq	%r9,%r11
3781	adcq	$0,%r12
3782
3783	leaq	16(%rsi),%rsi
3784	subq	$1,%r8
3785	jmp	.Lprocess_extra_hash_loop
3786process_extra_in_trailer:
3787	andq	$15,%rcx
3788	movq	%rcx,%rbx
3789	jz	.Ldo_length_block
3790	leaq	-1(%rsi,%rcx,1),%rsi
3791
3792.Lprocess_extra_in_trailer_load:
3793	pslldq	$1,%xmm15
3794	pinsrb	$0,(%rsi),%xmm15
3795	leaq	-1(%rsi),%rsi
3796	subq	$1,%rcx
3797	jnz	.Lprocess_extra_in_trailer_load
3798
3799.Lprocess_partial_block:
3800
3801	leaq	.Land_masks(%rip),%r15
3802	shlq	$4,%rbx
3803	pand	-16(%r15,%rbx,1),%xmm15
3804.byte	102,77,15,126,253
3805	pextrq	$1,%xmm15,%r14
3806	addq	%r13,%r10
3807	adcq	%r14,%r11
3808	adcq	$1,%r12
3809	movq	0+0+0(%rbp),%rax
3810	movq	%rax,%r15
3811	mulq	%r10
3812	movq	%rax,%r13
3813	movq	%rdx,%r14
3814	movq	0+0+0(%rbp),%rax
3815	mulq	%r11
3816	imulq	%r12,%r15
3817	addq	%rax,%r14
3818	adcq	%rdx,%r15
3819	movq	8+0+0(%rbp),%rax
3820	movq	%rax,%r9
3821	mulq	%r10
3822	addq	%rax,%r14
3823	adcq	$0,%rdx
3824	movq	%rdx,%r10
3825	movq	8+0+0(%rbp),%rax
3826	mulq	%r11
3827	addq	%rax,%r15
3828	adcq	$0,%rdx
3829	imulq	%r12,%r9
3830	addq	%r10,%r15
3831	adcq	%rdx,%r9
3832	movq	%r13,%r10
3833	movq	%r14,%r11
3834	movq	%r15,%r12
3835	andq	$3,%r12
3836	movq	%r15,%r13
3837	andq	$-4,%r13
3838	movq	%r9,%r14
3839	shrdq	$2,%r9,%r15
3840	shrq	$2,%r9
3841	addq	%r13,%r15
3842	adcq	%r14,%r9
3843	addq	%r15,%r10
3844	adcq	%r9,%r11
3845	adcq	$0,%r12
3846
3847
3848.Ldo_length_block:
3849	addq	0+0+32(%rbp),%r10
3850	adcq	8+0+32(%rbp),%r11
3851	adcq	$1,%r12
3852	movq	0+0+0(%rbp),%rax
3853	movq	%rax,%r15
3854	mulq	%r10
3855	movq	%rax,%r13
3856	movq	%rdx,%r14
3857	movq	0+0+0(%rbp),%rax
3858	mulq	%r11
3859	imulq	%r12,%r15
3860	addq	%rax,%r14
3861	adcq	%rdx,%r15
3862	movq	8+0+0(%rbp),%rax
3863	movq	%rax,%r9
3864	mulq	%r10
3865	addq	%rax,%r14
3866	adcq	$0,%rdx
3867	movq	%rdx,%r10
3868	movq	8+0+0(%rbp),%rax
3869	mulq	%r11
3870	addq	%rax,%r15
3871	adcq	$0,%rdx
3872	imulq	%r12,%r9
3873	addq	%r10,%r15
3874	adcq	%rdx,%r9
3875	movq	%r13,%r10
3876	movq	%r14,%r11
3877	movq	%r15,%r12
3878	andq	$3,%r12
3879	movq	%r15,%r13
3880	andq	$-4,%r13
3881	movq	%r9,%r14
3882	shrdq	$2,%r9,%r15
3883	shrq	$2,%r9
3884	addq	%r13,%r15
3885	adcq	%r14,%r9
3886	addq	%r15,%r10
3887	adcq	%r9,%r11
3888	adcq	$0,%r12
3889
3890
3891	movq	%r10,%r13
3892	movq	%r11,%r14
3893	movq	%r12,%r15
3894	subq	$-5,%r10
3895	sbbq	$-1,%r11
3896	sbbq	$3,%r12
3897	cmovcq	%r13,%r10
3898	cmovcq	%r14,%r11
3899	cmovcq	%r15,%r12
3900
3901	addq	0+0+16(%rbp),%r10
3902	adcq	8+0+16(%rbp),%r11
3903
3904.cfi_remember_state
3905	addq	$288 + 0 + 32,%rsp
3906.cfi_adjust_cfa_offset	-(288 + 32)
3907
3908	popq	%r9
3909.cfi_adjust_cfa_offset	-8
3910.cfi_restore	%r9
3911	movq	%r10,(%r9)
3912	movq	%r11,8(%r9)
3913	popq	%r15
3914.cfi_adjust_cfa_offset	-8
3915.cfi_restore	%r15
3916	popq	%r14
3917.cfi_adjust_cfa_offset	-8
3918.cfi_restore	%r14
3919	popq	%r13
3920.cfi_adjust_cfa_offset	-8
3921.cfi_restore	%r13
3922	popq	%r12
3923.cfi_adjust_cfa_offset	-8
3924.cfi_restore	%r12
3925	popq	%rbx
3926.cfi_adjust_cfa_offset	-8
3927.cfi_restore	%rbx
3928	popq	%rbp
3929.cfi_adjust_cfa_offset	-8
3930.cfi_restore	%rbp
3931	ret
3932
3933.Lseal_sse_128:
3934.cfi_restore_state
3935	movdqu	.Lchacha20_consts(%rip),%xmm0
3936	movdqa	%xmm0,%xmm1
3937	movdqa	%xmm0,%xmm2
3938	movdqu	0(%r9),%xmm4
3939	movdqa	%xmm4,%xmm5
3940	movdqa	%xmm4,%xmm6
3941	movdqu	16(%r9),%xmm8
3942	movdqa	%xmm8,%xmm9
3943	movdqa	%xmm8,%xmm10
3944	movdqu	32(%r9),%xmm14
3945	movdqa	%xmm14,%xmm12
3946	paddd	.Lsse_inc(%rip),%xmm12
3947	movdqa	%xmm12,%xmm13
3948	paddd	.Lsse_inc(%rip),%xmm13
3949	movdqa	%xmm4,%xmm7
3950	movdqa	%xmm8,%xmm11
3951	movdqa	%xmm12,%xmm15
3952	movq	$10,%r10
3953
3954.Lseal_sse_128_rounds:
3955	paddd	%xmm4,%xmm0
3956	pxor	%xmm0,%xmm12
3957	pshufb	.Lrol16(%rip),%xmm12
3958	paddd	%xmm12,%xmm8
3959	pxor	%xmm8,%xmm4
3960	movdqa	%xmm4,%xmm3
3961	pslld	$12,%xmm3
3962	psrld	$20,%xmm4
3963	pxor	%xmm3,%xmm4
3964	paddd	%xmm4,%xmm0
3965	pxor	%xmm0,%xmm12
3966	pshufb	.Lrol8(%rip),%xmm12
3967	paddd	%xmm12,%xmm8
3968	pxor	%xmm8,%xmm4
3969	movdqa	%xmm4,%xmm3
3970	pslld	$7,%xmm3
3971	psrld	$25,%xmm4
3972	pxor	%xmm3,%xmm4
3973.byte	102,15,58,15,228,4
3974.byte	102,69,15,58,15,192,8
3975.byte	102,69,15,58,15,228,12
3976	paddd	%xmm5,%xmm1
3977	pxor	%xmm1,%xmm13
3978	pshufb	.Lrol16(%rip),%xmm13
3979	paddd	%xmm13,%xmm9
3980	pxor	%xmm9,%xmm5
3981	movdqa	%xmm5,%xmm3
3982	pslld	$12,%xmm3
3983	psrld	$20,%xmm5
3984	pxor	%xmm3,%xmm5
3985	paddd	%xmm5,%xmm1
3986	pxor	%xmm1,%xmm13
3987	pshufb	.Lrol8(%rip),%xmm13
3988	paddd	%xmm13,%xmm9
3989	pxor	%xmm9,%xmm5
3990	movdqa	%xmm5,%xmm3
3991	pslld	$7,%xmm3
3992	psrld	$25,%xmm5
3993	pxor	%xmm3,%xmm5
3994.byte	102,15,58,15,237,4
3995.byte	102,69,15,58,15,201,8
3996.byte	102,69,15,58,15,237,12
3997	paddd	%xmm6,%xmm2
3998	pxor	%xmm2,%xmm14
3999	pshufb	.Lrol16(%rip),%xmm14
4000	paddd	%xmm14,%xmm10
4001	pxor	%xmm10,%xmm6
4002	movdqa	%xmm6,%xmm3
4003	pslld	$12,%xmm3
4004	psrld	$20,%xmm6
4005	pxor	%xmm3,%xmm6
4006	paddd	%xmm6,%xmm2
4007	pxor	%xmm2,%xmm14
4008	pshufb	.Lrol8(%rip),%xmm14
4009	paddd	%xmm14,%xmm10
4010	pxor	%xmm10,%xmm6
4011	movdqa	%xmm6,%xmm3
4012	pslld	$7,%xmm3
4013	psrld	$25,%xmm6
4014	pxor	%xmm3,%xmm6
4015.byte	102,15,58,15,246,4
4016.byte	102,69,15,58,15,210,8
4017.byte	102,69,15,58,15,246,12
4018	paddd	%xmm4,%xmm0
4019	pxor	%xmm0,%xmm12
4020	pshufb	.Lrol16(%rip),%xmm12
4021	paddd	%xmm12,%xmm8
4022	pxor	%xmm8,%xmm4
4023	movdqa	%xmm4,%xmm3
4024	pslld	$12,%xmm3
4025	psrld	$20,%xmm4
4026	pxor	%xmm3,%xmm4
4027	paddd	%xmm4,%xmm0
4028	pxor	%xmm0,%xmm12
4029	pshufb	.Lrol8(%rip),%xmm12
4030	paddd	%xmm12,%xmm8
4031	pxor	%xmm8,%xmm4
4032	movdqa	%xmm4,%xmm3
4033	pslld	$7,%xmm3
4034	psrld	$25,%xmm4
4035	pxor	%xmm3,%xmm4
4036.byte	102,15,58,15,228,12
4037.byte	102,69,15,58,15,192,8
4038.byte	102,69,15,58,15,228,4
4039	paddd	%xmm5,%xmm1
4040	pxor	%xmm1,%xmm13
4041	pshufb	.Lrol16(%rip),%xmm13
4042	paddd	%xmm13,%xmm9
4043	pxor	%xmm9,%xmm5
4044	movdqa	%xmm5,%xmm3
4045	pslld	$12,%xmm3
4046	psrld	$20,%xmm5
4047	pxor	%xmm3,%xmm5
4048	paddd	%xmm5,%xmm1
4049	pxor	%xmm1,%xmm13
4050	pshufb	.Lrol8(%rip),%xmm13
4051	paddd	%xmm13,%xmm9
4052	pxor	%xmm9,%xmm5
4053	movdqa	%xmm5,%xmm3
4054	pslld	$7,%xmm3
4055	psrld	$25,%xmm5
4056	pxor	%xmm3,%xmm5
4057.byte	102,15,58,15,237,12
4058.byte	102,69,15,58,15,201,8
4059.byte	102,69,15,58,15,237,4
4060	paddd	%xmm6,%xmm2
4061	pxor	%xmm2,%xmm14
4062	pshufb	.Lrol16(%rip),%xmm14
4063	paddd	%xmm14,%xmm10
4064	pxor	%xmm10,%xmm6
4065	movdqa	%xmm6,%xmm3
4066	pslld	$12,%xmm3
4067	psrld	$20,%xmm6
4068	pxor	%xmm3,%xmm6
4069	paddd	%xmm6,%xmm2
4070	pxor	%xmm2,%xmm14
4071	pshufb	.Lrol8(%rip),%xmm14
4072	paddd	%xmm14,%xmm10
4073	pxor	%xmm10,%xmm6
4074	movdqa	%xmm6,%xmm3
4075	pslld	$7,%xmm3
4076	psrld	$25,%xmm6
4077	pxor	%xmm3,%xmm6
4078.byte	102,15,58,15,246,12
4079.byte	102,69,15,58,15,210,8
4080.byte	102,69,15,58,15,246,4
4081
4082	decq	%r10
4083	jnz	.Lseal_sse_128_rounds
4084	paddd	.Lchacha20_consts(%rip),%xmm0
4085	paddd	.Lchacha20_consts(%rip),%xmm1
4086	paddd	.Lchacha20_consts(%rip),%xmm2
4087	paddd	%xmm7,%xmm4
4088	paddd	%xmm7,%xmm5
4089	paddd	%xmm7,%xmm6
4090	paddd	%xmm11,%xmm8
4091	paddd	%xmm11,%xmm9
4092	paddd	%xmm15,%xmm12
4093	paddd	.Lsse_inc(%rip),%xmm15
4094	paddd	%xmm15,%xmm13
4095
4096	pand	.Lclamp(%rip),%xmm2
4097	movdqa	%xmm2,0+0(%rbp)
4098	movdqa	%xmm6,0+16(%rbp)
4099
4100	movq	%r8,%r8
4101	call	poly_hash_ad_internal
4102	jmp	.Lseal_sse_128_tail_xor
4103.size	chacha20_poly1305_seal, .-chacha20_poly1305_seal
4104.cfi_endproc
4105
4106
4107.type	chacha20_poly1305_open_avx2,@function
4108.align	64
4109chacha20_poly1305_open_avx2:
4110.cfi_startproc
4111
4112
4113.cfi_adjust_cfa_offset	8
4114.cfi_offset	%rbp,-16
4115.cfi_adjust_cfa_offset	8
4116.cfi_offset	%rbx,-24
4117.cfi_adjust_cfa_offset	8
4118.cfi_offset	%r12,-32
4119.cfi_adjust_cfa_offset	8
4120.cfi_offset	%r13,-40
4121.cfi_adjust_cfa_offset	8
4122.cfi_offset	%r14,-48
4123.cfi_adjust_cfa_offset	8
4124.cfi_offset	%r15,-56
4125.cfi_adjust_cfa_offset	8
4126.cfi_offset	%r9,-64
4127.cfi_adjust_cfa_offset	288 + 32
4128
4129	vzeroupper
4130	vmovdqa	.Lchacha20_consts(%rip),%ymm0
4131	vbroadcasti128	0(%r9),%ymm4
4132	vbroadcasti128	16(%r9),%ymm8
4133	vbroadcasti128	32(%r9),%ymm12
4134	vpaddd	.Lavx2_init(%rip),%ymm12,%ymm12
4135	cmpq	$192,%rbx
4136	jbe	.Lopen_avx2_192
4137	cmpq	$320,%rbx
4138	jbe	.Lopen_avx2_320
4139
4140	vmovdqa	%ymm4,0+64(%rbp)
4141	vmovdqa	%ymm8,0+96(%rbp)
4142	vmovdqa	%ymm12,0+160(%rbp)
4143	movq	$10,%r10
4144.Lopen_avx2_init_rounds:
4145	vpaddd	%ymm4,%ymm0,%ymm0
4146	vpxor	%ymm0,%ymm12,%ymm12
4147	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
4148	vpaddd	%ymm12,%ymm8,%ymm8
4149	vpxor	%ymm8,%ymm4,%ymm4
4150	vpsrld	$20,%ymm4,%ymm3
4151	vpslld	$12,%ymm4,%ymm4
4152	vpxor	%ymm3,%ymm4,%ymm4
4153	vpaddd	%ymm4,%ymm0,%ymm0
4154	vpxor	%ymm0,%ymm12,%ymm12
4155	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
4156	vpaddd	%ymm12,%ymm8,%ymm8
4157	vpxor	%ymm8,%ymm4,%ymm4
4158	vpslld	$7,%ymm4,%ymm3
4159	vpsrld	$25,%ymm4,%ymm4
4160	vpxor	%ymm3,%ymm4,%ymm4
4161	vpalignr	$12,%ymm12,%ymm12,%ymm12
4162	vpalignr	$8,%ymm8,%ymm8,%ymm8
4163	vpalignr	$4,%ymm4,%ymm4,%ymm4
4164	vpaddd	%ymm4,%ymm0,%ymm0
4165	vpxor	%ymm0,%ymm12,%ymm12
4166	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
4167	vpaddd	%ymm12,%ymm8,%ymm8
4168	vpxor	%ymm8,%ymm4,%ymm4
4169	vpsrld	$20,%ymm4,%ymm3
4170	vpslld	$12,%ymm4,%ymm4
4171	vpxor	%ymm3,%ymm4,%ymm4
4172	vpaddd	%ymm4,%ymm0,%ymm0
4173	vpxor	%ymm0,%ymm12,%ymm12
4174	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
4175	vpaddd	%ymm12,%ymm8,%ymm8
4176	vpxor	%ymm8,%ymm4,%ymm4
4177	vpslld	$7,%ymm4,%ymm3
4178	vpsrld	$25,%ymm4,%ymm4
4179	vpxor	%ymm3,%ymm4,%ymm4
4180	vpalignr	$4,%ymm12,%ymm12,%ymm12
4181	vpalignr	$8,%ymm8,%ymm8,%ymm8
4182	vpalignr	$12,%ymm4,%ymm4,%ymm4
4183
4184	decq	%r10
4185	jne	.Lopen_avx2_init_rounds
4186	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
4187	vpaddd	0+64(%rbp),%ymm4,%ymm4
4188	vpaddd	0+96(%rbp),%ymm8,%ymm8
4189	vpaddd	0+160(%rbp),%ymm12,%ymm12
4190
4191	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
4192
4193	vpand	.Lclamp(%rip),%ymm3,%ymm3
4194	vmovdqa	%ymm3,0+0(%rbp)
4195
4196	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
4197	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
4198
4199	movq	%r8,%r8
4200	call	poly_hash_ad_internal
4201
4202	xorq	%rcx,%rcx
4203.Lopen_avx2_init_hash:
4204	addq	0+0(%rsi,%rcx,1),%r10
4205	adcq	8+0(%rsi,%rcx,1),%r11
4206	adcq	$1,%r12
4207	movq	0+0+0(%rbp),%rax
4208	movq	%rax,%r15
4209	mulq	%r10
4210	movq	%rax,%r13
4211	movq	%rdx,%r14
4212	movq	0+0+0(%rbp),%rax
4213	mulq	%r11
4214	imulq	%r12,%r15
4215	addq	%rax,%r14
4216	adcq	%rdx,%r15
4217	movq	8+0+0(%rbp),%rax
4218	movq	%rax,%r9
4219	mulq	%r10
4220	addq	%rax,%r14
4221	adcq	$0,%rdx
4222	movq	%rdx,%r10
4223	movq	8+0+0(%rbp),%rax
4224	mulq	%r11
4225	addq	%rax,%r15
4226	adcq	$0,%rdx
4227	imulq	%r12,%r9
4228	addq	%r10,%r15
4229	adcq	%rdx,%r9
4230	movq	%r13,%r10
4231	movq	%r14,%r11
4232	movq	%r15,%r12
4233	andq	$3,%r12
4234	movq	%r15,%r13
4235	andq	$-4,%r13
4236	movq	%r9,%r14
4237	shrdq	$2,%r9,%r15
4238	shrq	$2,%r9
4239	addq	%r13,%r15
4240	adcq	%r14,%r9
4241	addq	%r15,%r10
4242	adcq	%r9,%r11
4243	adcq	$0,%r12
4244
4245	addq	$16,%rcx
4246	cmpq	$64,%rcx
4247	jne	.Lopen_avx2_init_hash
4248
4249	vpxor	0(%rsi),%ymm0,%ymm0
4250	vpxor	32(%rsi),%ymm4,%ymm4
4251
4252	vmovdqu	%ymm0,0(%rdi)
4253	vmovdqu	%ymm4,32(%rdi)
4254	leaq	64(%rsi),%rsi
4255	leaq	64(%rdi),%rdi
4256	subq	$64,%rbx
4257.Lopen_avx2_main_loop:
4258
4259	cmpq	$512,%rbx
4260	jb	.Lopen_avx2_main_loop_done
4261	vmovdqa	.Lchacha20_consts(%rip),%ymm0
4262	vmovdqa	0+64(%rbp),%ymm4
4263	vmovdqa	0+96(%rbp),%ymm8
4264	vmovdqa	%ymm0,%ymm1
4265	vmovdqa	%ymm4,%ymm5
4266	vmovdqa	%ymm8,%ymm9
4267	vmovdqa	%ymm0,%ymm2
4268	vmovdqa	%ymm4,%ymm6
4269	vmovdqa	%ymm8,%ymm10
4270	vmovdqa	%ymm0,%ymm3
4271	vmovdqa	%ymm4,%ymm7
4272	vmovdqa	%ymm8,%ymm11
4273	vmovdqa	.Lavx2_inc(%rip),%ymm12
4274	vpaddd	0+160(%rbp),%ymm12,%ymm15
4275	vpaddd	%ymm15,%ymm12,%ymm14
4276	vpaddd	%ymm14,%ymm12,%ymm13
4277	vpaddd	%ymm13,%ymm12,%ymm12
4278	vmovdqa	%ymm15,0+256(%rbp)
4279	vmovdqa	%ymm14,0+224(%rbp)
4280	vmovdqa	%ymm13,0+192(%rbp)
4281	vmovdqa	%ymm12,0+160(%rbp)
4282
4283	xorq	%rcx,%rcx
4284.Lopen_avx2_main_loop_rounds:
4285	addq	0+0(%rsi,%rcx,1),%r10
4286	adcq	8+0(%rsi,%rcx,1),%r11
4287	adcq	$1,%r12
4288	vmovdqa	%ymm8,0+128(%rbp)
4289	vmovdqa	.Lrol16(%rip),%ymm8
4290	vpaddd	%ymm7,%ymm3,%ymm3
4291	vpaddd	%ymm6,%ymm2,%ymm2
4292	vpaddd	%ymm5,%ymm1,%ymm1
4293	vpaddd	%ymm4,%ymm0,%ymm0
4294	vpxor	%ymm3,%ymm15,%ymm15
4295	vpxor	%ymm2,%ymm14,%ymm14
4296	vpxor	%ymm1,%ymm13,%ymm13
4297	vpxor	%ymm0,%ymm12,%ymm12
4298	movq	0+0+0(%rbp),%rdx
4299	movq	%rdx,%r15
4300	mulxq	%r10,%r13,%r14
4301	mulxq	%r11,%rax,%rdx
4302	imulq	%r12,%r15
4303	addq	%rax,%r14
4304	adcq	%rdx,%r15
4305	vpshufb	%ymm8,%ymm15,%ymm15
4306	vpshufb	%ymm8,%ymm14,%ymm14
4307	vpshufb	%ymm8,%ymm13,%ymm13
4308	vpshufb	%ymm8,%ymm12,%ymm12
4309	vpaddd	%ymm15,%ymm11,%ymm11
4310	vpaddd	%ymm14,%ymm10,%ymm10
4311	vpaddd	%ymm13,%ymm9,%ymm9
4312	vpaddd	0+128(%rbp),%ymm12,%ymm8
4313	vpxor	%ymm11,%ymm7,%ymm7
4314	movq	8+0+0(%rbp),%rdx
4315	mulxq	%r10,%r10,%rax
4316	addq	%r10,%r14
4317	mulxq	%r11,%r11,%r9
4318	adcq	%r11,%r15
4319	adcq	$0,%r9
4320	imulq	%r12,%rdx
4321	vpxor	%ymm10,%ymm6,%ymm6
4322	vpxor	%ymm9,%ymm5,%ymm5
4323	vpxor	%ymm8,%ymm4,%ymm4
4324	vmovdqa	%ymm8,0+128(%rbp)
4325	vpsrld	$20,%ymm7,%ymm8
4326	vpslld	$32-20,%ymm7,%ymm7
4327	vpxor	%ymm8,%ymm7,%ymm7
4328	vpsrld	$20,%ymm6,%ymm8
4329	vpslld	$32-20,%ymm6,%ymm6
4330	vpxor	%ymm8,%ymm6,%ymm6
4331	vpsrld	$20,%ymm5,%ymm8
4332	vpslld	$32-20,%ymm5,%ymm5
4333	addq	%rax,%r15
4334	adcq	%rdx,%r9
4335	vpxor	%ymm8,%ymm5,%ymm5
4336	vpsrld	$20,%ymm4,%ymm8
4337	vpslld	$32-20,%ymm4,%ymm4
4338	vpxor	%ymm8,%ymm4,%ymm4
4339	vmovdqa	.Lrol8(%rip),%ymm8
4340	vpaddd	%ymm7,%ymm3,%ymm3
4341	vpaddd	%ymm6,%ymm2,%ymm2
4342	vpaddd	%ymm5,%ymm1,%ymm1
4343	vpaddd	%ymm4,%ymm0,%ymm0
4344	vpxor	%ymm3,%ymm15,%ymm15
4345	movq	%r13,%r10
4346	movq	%r14,%r11
4347	movq	%r15,%r12
4348	andq	$3,%r12
4349	movq	%r15,%r13
4350	andq	$-4,%r13
4351	movq	%r9,%r14
4352	shrdq	$2,%r9,%r15
4353	shrq	$2,%r9
4354	addq	%r13,%r15
4355	adcq	%r14,%r9
4356	addq	%r15,%r10
4357	adcq	%r9,%r11
4358	adcq	$0,%r12
4359	vpxor	%ymm2,%ymm14,%ymm14
4360	vpxor	%ymm1,%ymm13,%ymm13
4361	vpxor	%ymm0,%ymm12,%ymm12
4362	vpshufb	%ymm8,%ymm15,%ymm15
4363	vpshufb	%ymm8,%ymm14,%ymm14
4364	vpshufb	%ymm8,%ymm13,%ymm13
4365	vpshufb	%ymm8,%ymm12,%ymm12
4366	vpaddd	%ymm15,%ymm11,%ymm11
4367	vpaddd	%ymm14,%ymm10,%ymm10
4368	addq	0+16(%rsi,%rcx,1),%r10
4369	adcq	8+16(%rsi,%rcx,1),%r11
4370	adcq	$1,%r12
4371	vpaddd	%ymm13,%ymm9,%ymm9
4372	vpaddd	0+128(%rbp),%ymm12,%ymm8
4373	vpxor	%ymm11,%ymm7,%ymm7
4374	vpxor	%ymm10,%ymm6,%ymm6
4375	vpxor	%ymm9,%ymm5,%ymm5
4376	vpxor	%ymm8,%ymm4,%ymm4
4377	vmovdqa	%ymm8,0+128(%rbp)
4378	vpsrld	$25,%ymm7,%ymm8
4379	movq	0+0+0(%rbp),%rdx
4380	movq	%rdx,%r15
4381	mulxq	%r10,%r13,%r14
4382	mulxq	%r11,%rax,%rdx
4383	imulq	%r12,%r15
4384	addq	%rax,%r14
4385	adcq	%rdx,%r15
4386	vpslld	$32-25,%ymm7,%ymm7
4387	vpxor	%ymm8,%ymm7,%ymm7
4388	vpsrld	$25,%ymm6,%ymm8
4389	vpslld	$32-25,%ymm6,%ymm6
4390	vpxor	%ymm8,%ymm6,%ymm6
4391	vpsrld	$25,%ymm5,%ymm8
4392	vpslld	$32-25,%ymm5,%ymm5
4393	vpxor	%ymm8,%ymm5,%ymm5
4394	vpsrld	$25,%ymm4,%ymm8
4395	vpslld	$32-25,%ymm4,%ymm4
4396	vpxor	%ymm8,%ymm4,%ymm4
4397	vmovdqa	0+128(%rbp),%ymm8
4398	vpalignr	$4,%ymm7,%ymm7,%ymm7
4399	vpalignr	$8,%ymm11,%ymm11,%ymm11
4400	vpalignr	$12,%ymm15,%ymm15,%ymm15
4401	vpalignr	$4,%ymm6,%ymm6,%ymm6
4402	vpalignr	$8,%ymm10,%ymm10,%ymm10
4403	vpalignr	$12,%ymm14,%ymm14,%ymm14
4404	movq	8+0+0(%rbp),%rdx
4405	mulxq	%r10,%r10,%rax
4406	addq	%r10,%r14
4407	mulxq	%r11,%r11,%r9
4408	adcq	%r11,%r15
4409	adcq	$0,%r9
4410	imulq	%r12,%rdx
4411	vpalignr	$4,%ymm5,%ymm5,%ymm5
4412	vpalignr	$8,%ymm9,%ymm9,%ymm9
4413	vpalignr	$12,%ymm13,%ymm13,%ymm13
4414	vpalignr	$4,%ymm4,%ymm4,%ymm4
4415	vpalignr	$8,%ymm8,%ymm8,%ymm8
4416	vpalignr	$12,%ymm12,%ymm12,%ymm12
4417	vmovdqa	%ymm8,0+128(%rbp)
4418	vmovdqa	.Lrol16(%rip),%ymm8
4419	vpaddd	%ymm7,%ymm3,%ymm3
4420	vpaddd	%ymm6,%ymm2,%ymm2
4421	vpaddd	%ymm5,%ymm1,%ymm1
4422	vpaddd	%ymm4,%ymm0,%ymm0
4423	vpxor	%ymm3,%ymm15,%ymm15
4424	vpxor	%ymm2,%ymm14,%ymm14
4425	vpxor	%ymm1,%ymm13,%ymm13
4426	vpxor	%ymm0,%ymm12,%ymm12
4427	vpshufb	%ymm8,%ymm15,%ymm15
4428	vpshufb	%ymm8,%ymm14,%ymm14
4429	addq	%rax,%r15
4430	adcq	%rdx,%r9
4431	vpshufb	%ymm8,%ymm13,%ymm13
4432	vpshufb	%ymm8,%ymm12,%ymm12
4433	vpaddd	%ymm15,%ymm11,%ymm11
4434	vpaddd	%ymm14,%ymm10,%ymm10
4435	vpaddd	%ymm13,%ymm9,%ymm9
4436	vpaddd	0+128(%rbp),%ymm12,%ymm8
4437	vpxor	%ymm11,%ymm7,%ymm7
4438	vpxor	%ymm10,%ymm6,%ymm6
4439	vpxor	%ymm9,%ymm5,%ymm5
4440	movq	%r13,%r10
4441	movq	%r14,%r11
4442	movq	%r15,%r12
4443	andq	$3,%r12
4444	movq	%r15,%r13
4445	andq	$-4,%r13
4446	movq	%r9,%r14
4447	shrdq	$2,%r9,%r15
4448	shrq	$2,%r9
4449	addq	%r13,%r15
4450	adcq	%r14,%r9
4451	addq	%r15,%r10
4452	adcq	%r9,%r11
4453	adcq	$0,%r12
4454	vpxor	%ymm8,%ymm4,%ymm4
4455	vmovdqa	%ymm8,0+128(%rbp)
4456	vpsrld	$20,%ymm7,%ymm8
4457	vpslld	$32-20,%ymm7,%ymm7
4458	vpxor	%ymm8,%ymm7,%ymm7
4459	vpsrld	$20,%ymm6,%ymm8
4460	vpslld	$32-20,%ymm6,%ymm6
4461	vpxor	%ymm8,%ymm6,%ymm6
4462	addq	0+32(%rsi,%rcx,1),%r10
4463	adcq	8+32(%rsi,%rcx,1),%r11
4464	adcq	$1,%r12
4465
4466	leaq	48(%rcx),%rcx
4467	vpsrld	$20,%ymm5,%ymm8
4468	vpslld	$32-20,%ymm5,%ymm5
4469	vpxor	%ymm8,%ymm5,%ymm5
4470	vpsrld	$20,%ymm4,%ymm8
4471	vpslld	$32-20,%ymm4,%ymm4
4472	vpxor	%ymm8,%ymm4,%ymm4
4473	vmovdqa	.Lrol8(%rip),%ymm8
4474	vpaddd	%ymm7,%ymm3,%ymm3
4475	vpaddd	%ymm6,%ymm2,%ymm2
4476	vpaddd	%ymm5,%ymm1,%ymm1
4477	vpaddd	%ymm4,%ymm0,%ymm0
4478	vpxor	%ymm3,%ymm15,%ymm15
4479	vpxor	%ymm2,%ymm14,%ymm14
4480	vpxor	%ymm1,%ymm13,%ymm13
4481	vpxor	%ymm0,%ymm12,%ymm12
4482	vpshufb	%ymm8,%ymm15,%ymm15
4483	vpshufb	%ymm8,%ymm14,%ymm14
4484	vpshufb	%ymm8,%ymm13,%ymm13
4485	movq	0+0+0(%rbp),%rdx
4486	movq	%rdx,%r15
4487	mulxq	%r10,%r13,%r14
4488	mulxq	%r11,%rax,%rdx
4489	imulq	%r12,%r15
4490	addq	%rax,%r14
4491	adcq	%rdx,%r15
4492	vpshufb	%ymm8,%ymm12,%ymm12
4493	vpaddd	%ymm15,%ymm11,%ymm11
4494	vpaddd	%ymm14,%ymm10,%ymm10
4495	vpaddd	%ymm13,%ymm9,%ymm9
4496	vpaddd	0+128(%rbp),%ymm12,%ymm8
4497	vpxor	%ymm11,%ymm7,%ymm7
4498	vpxor	%ymm10,%ymm6,%ymm6
4499	vpxor	%ymm9,%ymm5,%ymm5
4500	movq	8+0+0(%rbp),%rdx
4501	mulxq	%r10,%r10,%rax
4502	addq	%r10,%r14
4503	mulxq	%r11,%r11,%r9
4504	adcq	%r11,%r15
4505	adcq	$0,%r9
4506	imulq	%r12,%rdx
4507	vpxor	%ymm8,%ymm4,%ymm4
4508	vmovdqa	%ymm8,0+128(%rbp)
4509	vpsrld	$25,%ymm7,%ymm8
4510	vpslld	$32-25,%ymm7,%ymm7
4511	vpxor	%ymm8,%ymm7,%ymm7
4512	vpsrld	$25,%ymm6,%ymm8
4513	vpslld	$32-25,%ymm6,%ymm6
4514	vpxor	%ymm8,%ymm6,%ymm6
4515	addq	%rax,%r15
4516	adcq	%rdx,%r9
4517	vpsrld	$25,%ymm5,%ymm8
4518	vpslld	$32-25,%ymm5,%ymm5
4519	vpxor	%ymm8,%ymm5,%ymm5
4520	vpsrld	$25,%ymm4,%ymm8
4521	vpslld	$32-25,%ymm4,%ymm4
4522	vpxor	%ymm8,%ymm4,%ymm4
4523	vmovdqa	0+128(%rbp),%ymm8
4524	vpalignr	$12,%ymm7,%ymm7,%ymm7
4525	vpalignr	$8,%ymm11,%ymm11,%ymm11
4526	vpalignr	$4,%ymm15,%ymm15,%ymm15
4527	vpalignr	$12,%ymm6,%ymm6,%ymm6
4528	vpalignr	$8,%ymm10,%ymm10,%ymm10
4529	vpalignr	$4,%ymm14,%ymm14,%ymm14
4530	vpalignr	$12,%ymm5,%ymm5,%ymm5
4531	vpalignr	$8,%ymm9,%ymm9,%ymm9
4532	vpalignr	$4,%ymm13,%ymm13,%ymm13
4533	vpalignr	$12,%ymm4,%ymm4,%ymm4
4534	vpalignr	$8,%ymm8,%ymm8,%ymm8
4535	movq	%r13,%r10
4536	movq	%r14,%r11
4537	movq	%r15,%r12
4538	andq	$3,%r12
4539	movq	%r15,%r13
4540	andq	$-4,%r13
4541	movq	%r9,%r14
4542	shrdq	$2,%r9,%r15
4543	shrq	$2,%r9
4544	addq	%r13,%r15
4545	adcq	%r14,%r9
4546	addq	%r15,%r10
4547	adcq	%r9,%r11
4548	adcq	$0,%r12
4549	vpalignr	$4,%ymm12,%ymm12,%ymm12
4550
4551	cmpq	$60*8,%rcx
4552	jne	.Lopen_avx2_main_loop_rounds
4553	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
4554	vpaddd	0+64(%rbp),%ymm7,%ymm7
4555	vpaddd	0+96(%rbp),%ymm11,%ymm11
4556	vpaddd	0+256(%rbp),%ymm15,%ymm15
4557	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
4558	vpaddd	0+64(%rbp),%ymm6,%ymm6
4559	vpaddd	0+96(%rbp),%ymm10,%ymm10
4560	vpaddd	0+224(%rbp),%ymm14,%ymm14
4561	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
4562	vpaddd	0+64(%rbp),%ymm5,%ymm5
4563	vpaddd	0+96(%rbp),%ymm9,%ymm9
4564	vpaddd	0+192(%rbp),%ymm13,%ymm13
4565	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
4566	vpaddd	0+64(%rbp),%ymm4,%ymm4
4567	vpaddd	0+96(%rbp),%ymm8,%ymm8
4568	vpaddd	0+160(%rbp),%ymm12,%ymm12
4569
4570	vmovdqa	%ymm0,0+128(%rbp)
4571	addq	0+60*8(%rsi),%r10
4572	adcq	8+60*8(%rsi),%r11
4573	adcq	$1,%r12
4574	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
4575	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
4576	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
4577	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
4578	vpxor	0+0(%rsi),%ymm0,%ymm0
4579	vpxor	32+0(%rsi),%ymm3,%ymm3
4580	vpxor	64+0(%rsi),%ymm7,%ymm7
4581	vpxor	96+0(%rsi),%ymm11,%ymm11
4582	vmovdqu	%ymm0,0+0(%rdi)
4583	vmovdqu	%ymm3,32+0(%rdi)
4584	vmovdqu	%ymm7,64+0(%rdi)
4585	vmovdqu	%ymm11,96+0(%rdi)
4586
4587	vmovdqa	0+128(%rbp),%ymm0
4588	movq	0+0+0(%rbp),%rax
4589	movq	%rax,%r15
4590	mulq	%r10
4591	movq	%rax,%r13
4592	movq	%rdx,%r14
4593	movq	0+0+0(%rbp),%rax
4594	mulq	%r11
4595	imulq	%r12,%r15
4596	addq	%rax,%r14
4597	adcq	%rdx,%r15
4598	movq	8+0+0(%rbp),%rax
4599	movq	%rax,%r9
4600	mulq	%r10
4601	addq	%rax,%r14
4602	adcq	$0,%rdx
4603	movq	%rdx,%r10
4604	movq	8+0+0(%rbp),%rax
4605	mulq	%r11
4606	addq	%rax,%r15
4607	adcq	$0,%rdx
4608	imulq	%r12,%r9
4609	addq	%r10,%r15
4610	adcq	%rdx,%r9
4611	movq	%r13,%r10
4612	movq	%r14,%r11
4613	movq	%r15,%r12
4614	andq	$3,%r12
4615	movq	%r15,%r13
4616	andq	$-4,%r13
4617	movq	%r9,%r14
4618	shrdq	$2,%r9,%r15
4619	shrq	$2,%r9
4620	addq	%r13,%r15
4621	adcq	%r14,%r9
4622	addq	%r15,%r10
4623	adcq	%r9,%r11
4624	adcq	$0,%r12
4625	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
4626	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
4627	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
4628	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
4629	vpxor	0+128(%rsi),%ymm3,%ymm3
4630	vpxor	32+128(%rsi),%ymm2,%ymm2
4631	vpxor	64+128(%rsi),%ymm6,%ymm6
4632	vpxor	96+128(%rsi),%ymm10,%ymm10
4633	vmovdqu	%ymm3,0+128(%rdi)
4634	vmovdqu	%ymm2,32+128(%rdi)
4635	vmovdqu	%ymm6,64+128(%rdi)
4636	vmovdqu	%ymm10,96+128(%rdi)
4637	addq	0+60*8+16(%rsi),%r10
4638	adcq	8+60*8+16(%rsi),%r11
4639	adcq	$1,%r12
4640	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
4641	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
4642	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
4643	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
4644	vpxor	0+256(%rsi),%ymm3,%ymm3
4645	vpxor	32+256(%rsi),%ymm1,%ymm1
4646	vpxor	64+256(%rsi),%ymm5,%ymm5
4647	vpxor	96+256(%rsi),%ymm9,%ymm9
4648	vmovdqu	%ymm3,0+256(%rdi)
4649	vmovdqu	%ymm1,32+256(%rdi)
4650	vmovdqu	%ymm5,64+256(%rdi)
4651	vmovdqu	%ymm9,96+256(%rdi)
4652	movq	0+0+0(%rbp),%rax
4653	movq	%rax,%r15
4654	mulq	%r10
4655	movq	%rax,%r13
4656	movq	%rdx,%r14
4657	movq	0+0+0(%rbp),%rax
4658	mulq	%r11
4659	imulq	%r12,%r15
4660	addq	%rax,%r14
4661	adcq	%rdx,%r15
4662	movq	8+0+0(%rbp),%rax
4663	movq	%rax,%r9
4664	mulq	%r10
4665	addq	%rax,%r14
4666	adcq	$0,%rdx
4667	movq	%rdx,%r10
4668	movq	8+0+0(%rbp),%rax
4669	mulq	%r11
4670	addq	%rax,%r15
4671	adcq	$0,%rdx
4672	imulq	%r12,%r9
4673	addq	%r10,%r15
4674	adcq	%rdx,%r9
4675	movq	%r13,%r10
4676	movq	%r14,%r11
4677	movq	%r15,%r12
4678	andq	$3,%r12
4679	movq	%r15,%r13
4680	andq	$-4,%r13
4681	movq	%r9,%r14
4682	shrdq	$2,%r9,%r15
4683	shrq	$2,%r9
4684	addq	%r13,%r15
4685	adcq	%r14,%r9
4686	addq	%r15,%r10
4687	adcq	%r9,%r11
4688	adcq	$0,%r12
4689	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
4690	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
4691	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
4692	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
4693	vpxor	0+384(%rsi),%ymm3,%ymm3
4694	vpxor	32+384(%rsi),%ymm0,%ymm0
4695	vpxor	64+384(%rsi),%ymm4,%ymm4
4696	vpxor	96+384(%rsi),%ymm8,%ymm8
4697	vmovdqu	%ymm3,0+384(%rdi)
4698	vmovdqu	%ymm0,32+384(%rdi)
4699	vmovdqu	%ymm4,64+384(%rdi)
4700	vmovdqu	%ymm8,96+384(%rdi)
4701
4702	leaq	512(%rsi),%rsi
4703	leaq	512(%rdi),%rdi
4704	subq	$512,%rbx
4705	jmp	.Lopen_avx2_main_loop
4706.Lopen_avx2_main_loop_done:
4707	testq	%rbx,%rbx
4708	vzeroupper
4709	je	.Lopen_sse_finalize
4710
4711	cmpq	$384,%rbx
4712	ja	.Lopen_avx2_tail_512
4713	cmpq	$256,%rbx
4714	ja	.Lopen_avx2_tail_384
4715	cmpq	$128,%rbx
4716	ja	.Lopen_avx2_tail_256
4717	vmovdqa	.Lchacha20_consts(%rip),%ymm0
4718	vmovdqa	0+64(%rbp),%ymm4
4719	vmovdqa	0+96(%rbp),%ymm8
4720	vmovdqa	.Lavx2_inc(%rip),%ymm12
4721	vpaddd	0+160(%rbp),%ymm12,%ymm12
4722	vmovdqa	%ymm12,0+160(%rbp)
4723
4724	xorq	%r8,%r8
4725	movq	%rbx,%rcx
4726	andq	$-16,%rcx
4727	testq	%rcx,%rcx
4728	je	.Lopen_avx2_tail_128_rounds
4729.Lopen_avx2_tail_128_rounds_and_x1hash:
4730	addq	0+0(%rsi,%r8,1),%r10
4731	adcq	8+0(%rsi,%r8,1),%r11
4732	adcq	$1,%r12
4733	movq	0+0+0(%rbp),%rax
4734	movq	%rax,%r15
4735	mulq	%r10
4736	movq	%rax,%r13
4737	movq	%rdx,%r14
4738	movq	0+0+0(%rbp),%rax
4739	mulq	%r11
4740	imulq	%r12,%r15
4741	addq	%rax,%r14
4742	adcq	%rdx,%r15
4743	movq	8+0+0(%rbp),%rax
4744	movq	%rax,%r9
4745	mulq	%r10
4746	addq	%rax,%r14
4747	adcq	$0,%rdx
4748	movq	%rdx,%r10
4749	movq	8+0+0(%rbp),%rax
4750	mulq	%r11
4751	addq	%rax,%r15
4752	adcq	$0,%rdx
4753	imulq	%r12,%r9
4754	addq	%r10,%r15
4755	adcq	%rdx,%r9
4756	movq	%r13,%r10
4757	movq	%r14,%r11
4758	movq	%r15,%r12
4759	andq	$3,%r12
4760	movq	%r15,%r13
4761	andq	$-4,%r13
4762	movq	%r9,%r14
4763	shrdq	$2,%r9,%r15
4764	shrq	$2,%r9
4765	addq	%r13,%r15
4766	adcq	%r14,%r9
4767	addq	%r15,%r10
4768	adcq	%r9,%r11
4769	adcq	$0,%r12
4770
4771.Lopen_avx2_tail_128_rounds:
4772	addq	$16,%r8
4773	vpaddd	%ymm4,%ymm0,%ymm0
4774	vpxor	%ymm0,%ymm12,%ymm12
4775	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
4776	vpaddd	%ymm12,%ymm8,%ymm8
4777	vpxor	%ymm8,%ymm4,%ymm4
4778	vpsrld	$20,%ymm4,%ymm3
4779	vpslld	$12,%ymm4,%ymm4
4780	vpxor	%ymm3,%ymm4,%ymm4
4781	vpaddd	%ymm4,%ymm0,%ymm0
4782	vpxor	%ymm0,%ymm12,%ymm12
4783	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
4784	vpaddd	%ymm12,%ymm8,%ymm8
4785	vpxor	%ymm8,%ymm4,%ymm4
4786	vpslld	$7,%ymm4,%ymm3
4787	vpsrld	$25,%ymm4,%ymm4
4788	vpxor	%ymm3,%ymm4,%ymm4
4789	vpalignr	$12,%ymm12,%ymm12,%ymm12
4790	vpalignr	$8,%ymm8,%ymm8,%ymm8
4791	vpalignr	$4,%ymm4,%ymm4,%ymm4
4792	vpaddd	%ymm4,%ymm0,%ymm0
4793	vpxor	%ymm0,%ymm12,%ymm12
4794	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
4795	vpaddd	%ymm12,%ymm8,%ymm8
4796	vpxor	%ymm8,%ymm4,%ymm4
4797	vpsrld	$20,%ymm4,%ymm3
4798	vpslld	$12,%ymm4,%ymm4
4799	vpxor	%ymm3,%ymm4,%ymm4
4800	vpaddd	%ymm4,%ymm0,%ymm0
4801	vpxor	%ymm0,%ymm12,%ymm12
4802	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
4803	vpaddd	%ymm12,%ymm8,%ymm8
4804	vpxor	%ymm8,%ymm4,%ymm4
4805	vpslld	$7,%ymm4,%ymm3
4806	vpsrld	$25,%ymm4,%ymm4
4807	vpxor	%ymm3,%ymm4,%ymm4
4808	vpalignr	$4,%ymm12,%ymm12,%ymm12
4809	vpalignr	$8,%ymm8,%ymm8,%ymm8
4810	vpalignr	$12,%ymm4,%ymm4,%ymm4
4811
4812	cmpq	%rcx,%r8
4813	jb	.Lopen_avx2_tail_128_rounds_and_x1hash
4814	cmpq	$160,%r8
4815	jne	.Lopen_avx2_tail_128_rounds
4816	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
4817	vpaddd	0+64(%rbp),%ymm4,%ymm4
4818	vpaddd	0+96(%rbp),%ymm8,%ymm8
4819	vpaddd	0+160(%rbp),%ymm12,%ymm12
4820	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
4821	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
4822	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
4823	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
4824	vmovdqa	%ymm3,%ymm8
4825
4826	jmp	.Lopen_avx2_tail_128_xor
4827
4828.Lopen_avx2_tail_256:
4829	vmovdqa	.Lchacha20_consts(%rip),%ymm0
4830	vmovdqa	0+64(%rbp),%ymm4
4831	vmovdqa	0+96(%rbp),%ymm8
4832	vmovdqa	%ymm0,%ymm1
4833	vmovdqa	%ymm4,%ymm5
4834	vmovdqa	%ymm8,%ymm9
4835	vmovdqa	.Lavx2_inc(%rip),%ymm12
4836	vpaddd	0+160(%rbp),%ymm12,%ymm13
4837	vpaddd	%ymm13,%ymm12,%ymm12
4838	vmovdqa	%ymm12,0+160(%rbp)
4839	vmovdqa	%ymm13,0+192(%rbp)
4840
4841	movq	%rbx,0+128(%rbp)
4842	movq	%rbx,%rcx
4843	subq	$128,%rcx
4844	shrq	$4,%rcx
4845	movq	$10,%r8
4846	cmpq	$10,%rcx
4847	cmovgq	%r8,%rcx
4848	movq	%rsi,%rbx
4849	xorq	%r8,%r8
4850.Lopen_avx2_tail_256_rounds_and_x1hash:
4851	addq	0+0(%rbx),%r10
4852	adcq	8+0(%rbx),%r11
4853	adcq	$1,%r12
4854	movq	0+0+0(%rbp),%rdx
4855	movq	%rdx,%r15
4856	mulxq	%r10,%r13,%r14
4857	mulxq	%r11,%rax,%rdx
4858	imulq	%r12,%r15
4859	addq	%rax,%r14
4860	adcq	%rdx,%r15
4861	movq	8+0+0(%rbp),%rdx
4862	mulxq	%r10,%r10,%rax
4863	addq	%r10,%r14
4864	mulxq	%r11,%r11,%r9
4865	adcq	%r11,%r15
4866	adcq	$0,%r9
4867	imulq	%r12,%rdx
4868	addq	%rax,%r15
4869	adcq	%rdx,%r9
4870	movq	%r13,%r10
4871	movq	%r14,%r11
4872	movq	%r15,%r12
4873	andq	$3,%r12
4874	movq	%r15,%r13
4875	andq	$-4,%r13
4876	movq	%r9,%r14
4877	shrdq	$2,%r9,%r15
4878	shrq	$2,%r9
4879	addq	%r13,%r15
4880	adcq	%r14,%r9
4881	addq	%r15,%r10
4882	adcq	%r9,%r11
4883	adcq	$0,%r12
4884
4885	leaq	16(%rbx),%rbx
4886.Lopen_avx2_tail_256_rounds:
4887	vpaddd	%ymm4,%ymm0,%ymm0
4888	vpxor	%ymm0,%ymm12,%ymm12
4889	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
4890	vpaddd	%ymm12,%ymm8,%ymm8
4891	vpxor	%ymm8,%ymm4,%ymm4
4892	vpsrld	$20,%ymm4,%ymm3
4893	vpslld	$12,%ymm4,%ymm4
4894	vpxor	%ymm3,%ymm4,%ymm4
4895	vpaddd	%ymm4,%ymm0,%ymm0
4896	vpxor	%ymm0,%ymm12,%ymm12
4897	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
4898	vpaddd	%ymm12,%ymm8,%ymm8
4899	vpxor	%ymm8,%ymm4,%ymm4
4900	vpslld	$7,%ymm4,%ymm3
4901	vpsrld	$25,%ymm4,%ymm4
4902	vpxor	%ymm3,%ymm4,%ymm4
4903	vpalignr	$12,%ymm12,%ymm12,%ymm12
4904	vpalignr	$8,%ymm8,%ymm8,%ymm8
4905	vpalignr	$4,%ymm4,%ymm4,%ymm4
4906	vpaddd	%ymm5,%ymm1,%ymm1
4907	vpxor	%ymm1,%ymm13,%ymm13
4908	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
4909	vpaddd	%ymm13,%ymm9,%ymm9
4910	vpxor	%ymm9,%ymm5,%ymm5
4911	vpsrld	$20,%ymm5,%ymm3
4912	vpslld	$12,%ymm5,%ymm5
4913	vpxor	%ymm3,%ymm5,%ymm5
4914	vpaddd	%ymm5,%ymm1,%ymm1
4915	vpxor	%ymm1,%ymm13,%ymm13
4916	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
4917	vpaddd	%ymm13,%ymm9,%ymm9
4918	vpxor	%ymm9,%ymm5,%ymm5
4919	vpslld	$7,%ymm5,%ymm3
4920	vpsrld	$25,%ymm5,%ymm5
4921	vpxor	%ymm3,%ymm5,%ymm5
4922	vpalignr	$12,%ymm13,%ymm13,%ymm13
4923	vpalignr	$8,%ymm9,%ymm9,%ymm9
4924	vpalignr	$4,%ymm5,%ymm5,%ymm5
4925
4926	incq	%r8
4927	vpaddd	%ymm4,%ymm0,%ymm0
4928	vpxor	%ymm0,%ymm12,%ymm12
4929	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
4930	vpaddd	%ymm12,%ymm8,%ymm8
4931	vpxor	%ymm8,%ymm4,%ymm4
4932	vpsrld	$20,%ymm4,%ymm3
4933	vpslld	$12,%ymm4,%ymm4
4934	vpxor	%ymm3,%ymm4,%ymm4
4935	vpaddd	%ymm4,%ymm0,%ymm0
4936	vpxor	%ymm0,%ymm12,%ymm12
4937	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
4938	vpaddd	%ymm12,%ymm8,%ymm8
4939	vpxor	%ymm8,%ymm4,%ymm4
4940	vpslld	$7,%ymm4,%ymm3
4941	vpsrld	$25,%ymm4,%ymm4
4942	vpxor	%ymm3,%ymm4,%ymm4
4943	vpalignr	$4,%ymm12,%ymm12,%ymm12
4944	vpalignr	$8,%ymm8,%ymm8,%ymm8
4945	vpalignr	$12,%ymm4,%ymm4,%ymm4
4946	vpaddd	%ymm5,%ymm1,%ymm1
4947	vpxor	%ymm1,%ymm13,%ymm13
4948	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
4949	vpaddd	%ymm13,%ymm9,%ymm9
4950	vpxor	%ymm9,%ymm5,%ymm5
4951	vpsrld	$20,%ymm5,%ymm3
4952	vpslld	$12,%ymm5,%ymm5
4953	vpxor	%ymm3,%ymm5,%ymm5
4954	vpaddd	%ymm5,%ymm1,%ymm1
4955	vpxor	%ymm1,%ymm13,%ymm13
4956	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
4957	vpaddd	%ymm13,%ymm9,%ymm9
4958	vpxor	%ymm9,%ymm5,%ymm5
4959	vpslld	$7,%ymm5,%ymm3
4960	vpsrld	$25,%ymm5,%ymm5
4961	vpxor	%ymm3,%ymm5,%ymm5
4962	vpalignr	$4,%ymm13,%ymm13,%ymm13
4963	vpalignr	$8,%ymm9,%ymm9,%ymm9
4964	vpalignr	$12,%ymm5,%ymm5,%ymm5
4965	vpaddd	%ymm6,%ymm2,%ymm2
4966	vpxor	%ymm2,%ymm14,%ymm14
4967	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
4968	vpaddd	%ymm14,%ymm10,%ymm10
4969	vpxor	%ymm10,%ymm6,%ymm6
4970	vpsrld	$20,%ymm6,%ymm3
4971	vpslld	$12,%ymm6,%ymm6
4972	vpxor	%ymm3,%ymm6,%ymm6
4973	vpaddd	%ymm6,%ymm2,%ymm2
4974	vpxor	%ymm2,%ymm14,%ymm14
4975	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
4976	vpaddd	%ymm14,%ymm10,%ymm10
4977	vpxor	%ymm10,%ymm6,%ymm6
4978	vpslld	$7,%ymm6,%ymm3
4979	vpsrld	$25,%ymm6,%ymm6
4980	vpxor	%ymm3,%ymm6,%ymm6
4981	vpalignr	$4,%ymm14,%ymm14,%ymm14
4982	vpalignr	$8,%ymm10,%ymm10,%ymm10
4983	vpalignr	$12,%ymm6,%ymm6,%ymm6
4984
4985	cmpq	%rcx,%r8
4986	jb	.Lopen_avx2_tail_256_rounds_and_x1hash
4987	cmpq	$10,%r8
4988	jne	.Lopen_avx2_tail_256_rounds
4989	movq	%rbx,%r8
4990	subq	%rsi,%rbx
4991	movq	%rbx,%rcx
4992	movq	0+128(%rbp),%rbx
4993.Lopen_avx2_tail_256_hash:
4994	addq	$16,%rcx
4995	cmpq	%rbx,%rcx
4996	jg	.Lopen_avx2_tail_256_done
4997	addq	0+0(%r8),%r10
4998	adcq	8+0(%r8),%r11
4999	adcq	$1,%r12
5000	movq	0+0+0(%rbp),%rdx
5001	movq	%rdx,%r15
5002	mulxq	%r10,%r13,%r14
5003	mulxq	%r11,%rax,%rdx
5004	imulq	%r12,%r15
5005	addq	%rax,%r14
5006	adcq	%rdx,%r15
5007	movq	8+0+0(%rbp),%rdx
5008	mulxq	%r10,%r10,%rax
5009	addq	%r10,%r14
5010	mulxq	%r11,%r11,%r9
5011	adcq	%r11,%r15
5012	adcq	$0,%r9
5013	imulq	%r12,%rdx
5014	addq	%rax,%r15
5015	adcq	%rdx,%r9
5016	movq	%r13,%r10
5017	movq	%r14,%r11
5018	movq	%r15,%r12
5019	andq	$3,%r12
5020	movq	%r15,%r13
5021	andq	$-4,%r13
5022	movq	%r9,%r14
5023	shrdq	$2,%r9,%r15
5024	shrq	$2,%r9
5025	addq	%r13,%r15
5026	adcq	%r14,%r9
5027	addq	%r15,%r10
5028	adcq	%r9,%r11
5029	adcq	$0,%r12
5030
5031	leaq	16(%r8),%r8
5032	jmp	.Lopen_avx2_tail_256_hash
5033.Lopen_avx2_tail_256_done:
5034	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
5035	vpaddd	0+64(%rbp),%ymm5,%ymm5
5036	vpaddd	0+96(%rbp),%ymm9,%ymm9
5037	vpaddd	0+192(%rbp),%ymm13,%ymm13
5038	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
5039	vpaddd	0+64(%rbp),%ymm4,%ymm4
5040	vpaddd	0+96(%rbp),%ymm8,%ymm8
5041	vpaddd	0+160(%rbp),%ymm12,%ymm12
5042	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
5043	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
5044	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
5045	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
5046	vpxor	0+0(%rsi),%ymm3,%ymm3
5047	vpxor	32+0(%rsi),%ymm1,%ymm1
5048	vpxor	64+0(%rsi),%ymm5,%ymm5
5049	vpxor	96+0(%rsi),%ymm9,%ymm9
5050	vmovdqu	%ymm3,0+0(%rdi)
5051	vmovdqu	%ymm1,32+0(%rdi)
5052	vmovdqu	%ymm5,64+0(%rdi)
5053	vmovdqu	%ymm9,96+0(%rdi)
5054	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
5055	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
5056	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
5057	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
5058	vmovdqa	%ymm3,%ymm8
5059
5060	leaq	128(%rsi),%rsi
5061	leaq	128(%rdi),%rdi
5062	subq	$128,%rbx
5063	jmp	.Lopen_avx2_tail_128_xor
5064
5065.Lopen_avx2_tail_384:
5066	vmovdqa	.Lchacha20_consts(%rip),%ymm0
5067	vmovdqa	0+64(%rbp),%ymm4
5068	vmovdqa	0+96(%rbp),%ymm8
5069	vmovdqa	%ymm0,%ymm1
5070	vmovdqa	%ymm4,%ymm5
5071	vmovdqa	%ymm8,%ymm9
5072	vmovdqa	%ymm0,%ymm2
5073	vmovdqa	%ymm4,%ymm6
5074	vmovdqa	%ymm8,%ymm10
5075	vmovdqa	.Lavx2_inc(%rip),%ymm12
5076	vpaddd	0+160(%rbp),%ymm12,%ymm14
5077	vpaddd	%ymm14,%ymm12,%ymm13
5078	vpaddd	%ymm13,%ymm12,%ymm12
5079	vmovdqa	%ymm12,0+160(%rbp)
5080	vmovdqa	%ymm13,0+192(%rbp)
5081	vmovdqa	%ymm14,0+224(%rbp)
5082
5083	movq	%rbx,0+128(%rbp)
5084	movq	%rbx,%rcx
5085	subq	$256,%rcx
5086	shrq	$4,%rcx
5087	addq	$6,%rcx
5088	movq	$10,%r8
5089	cmpq	$10,%rcx
5090	cmovgq	%r8,%rcx
5091	movq	%rsi,%rbx
5092	xorq	%r8,%r8
5093.Lopen_avx2_tail_384_rounds_and_x2hash:
5094	addq	0+0(%rbx),%r10
5095	adcq	8+0(%rbx),%r11
5096	adcq	$1,%r12
5097	movq	0+0+0(%rbp),%rdx
5098	movq	%rdx,%r15
5099	mulxq	%r10,%r13,%r14
5100	mulxq	%r11,%rax,%rdx
5101	imulq	%r12,%r15
5102	addq	%rax,%r14
5103	adcq	%rdx,%r15
5104	movq	8+0+0(%rbp),%rdx
5105	mulxq	%r10,%r10,%rax
5106	addq	%r10,%r14
5107	mulxq	%r11,%r11,%r9
5108	adcq	%r11,%r15
5109	adcq	$0,%r9
5110	imulq	%r12,%rdx
5111	addq	%rax,%r15
5112	adcq	%rdx,%r9
5113	movq	%r13,%r10
5114	movq	%r14,%r11
5115	movq	%r15,%r12
5116	andq	$3,%r12
5117	movq	%r15,%r13
5118	andq	$-4,%r13
5119	movq	%r9,%r14
5120	shrdq	$2,%r9,%r15
5121	shrq	$2,%r9
5122	addq	%r13,%r15
5123	adcq	%r14,%r9
5124	addq	%r15,%r10
5125	adcq	%r9,%r11
5126	adcq	$0,%r12
5127
5128	leaq	16(%rbx),%rbx
5129.Lopen_avx2_tail_384_rounds_and_x1hash:
5130	vpaddd	%ymm6,%ymm2,%ymm2
5131	vpxor	%ymm2,%ymm14,%ymm14
5132	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
5133	vpaddd	%ymm14,%ymm10,%ymm10
5134	vpxor	%ymm10,%ymm6,%ymm6
5135	vpsrld	$20,%ymm6,%ymm3
5136	vpslld	$12,%ymm6,%ymm6
5137	vpxor	%ymm3,%ymm6,%ymm6
5138	vpaddd	%ymm6,%ymm2,%ymm2
5139	vpxor	%ymm2,%ymm14,%ymm14
5140	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
5141	vpaddd	%ymm14,%ymm10,%ymm10
5142	vpxor	%ymm10,%ymm6,%ymm6
5143	vpslld	$7,%ymm6,%ymm3
5144	vpsrld	$25,%ymm6,%ymm6
5145	vpxor	%ymm3,%ymm6,%ymm6
5146	vpalignr	$12,%ymm14,%ymm14,%ymm14
5147	vpalignr	$8,%ymm10,%ymm10,%ymm10
5148	vpalignr	$4,%ymm6,%ymm6,%ymm6
5149	vpaddd	%ymm5,%ymm1,%ymm1
5150	vpxor	%ymm1,%ymm13,%ymm13
5151	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
5152	vpaddd	%ymm13,%ymm9,%ymm9
5153	vpxor	%ymm9,%ymm5,%ymm5
5154	vpsrld	$20,%ymm5,%ymm3
5155	vpslld	$12,%ymm5,%ymm5
5156	vpxor	%ymm3,%ymm5,%ymm5
5157	vpaddd	%ymm5,%ymm1,%ymm1
5158	vpxor	%ymm1,%ymm13,%ymm13
5159	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
5160	vpaddd	%ymm13,%ymm9,%ymm9
5161	vpxor	%ymm9,%ymm5,%ymm5
5162	vpslld	$7,%ymm5,%ymm3
5163	vpsrld	$25,%ymm5,%ymm5
5164	vpxor	%ymm3,%ymm5,%ymm5
5165	vpalignr	$12,%ymm13,%ymm13,%ymm13
5166	vpalignr	$8,%ymm9,%ymm9,%ymm9
5167	vpalignr	$4,%ymm5,%ymm5,%ymm5
5168	vpaddd	%ymm4,%ymm0,%ymm0
5169	vpxor	%ymm0,%ymm12,%ymm12
5170	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
5171	vpaddd	%ymm12,%ymm8,%ymm8
5172	vpxor	%ymm8,%ymm4,%ymm4
5173	vpsrld	$20,%ymm4,%ymm3
5174	vpslld	$12,%ymm4,%ymm4
5175	vpxor	%ymm3,%ymm4,%ymm4
5176	vpaddd	%ymm4,%ymm0,%ymm0
5177	vpxor	%ymm0,%ymm12,%ymm12
5178	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
5179	vpaddd	%ymm12,%ymm8,%ymm8
5180	vpxor	%ymm8,%ymm4,%ymm4
5181	vpslld	$7,%ymm4,%ymm3
5182	vpsrld	$25,%ymm4,%ymm4
5183	vpxor	%ymm3,%ymm4,%ymm4
5184	vpalignr	$12,%ymm12,%ymm12,%ymm12
5185	vpalignr	$8,%ymm8,%ymm8,%ymm8
5186	vpalignr	$4,%ymm4,%ymm4,%ymm4
5187	addq	0+0(%rbx),%r10
5188	adcq	8+0(%rbx),%r11
5189	adcq	$1,%r12
5190	movq	0+0+0(%rbp),%rax
5191	movq	%rax,%r15
5192	mulq	%r10
5193	movq	%rax,%r13
5194	movq	%rdx,%r14
5195	movq	0+0+0(%rbp),%rax
5196	mulq	%r11
5197	imulq	%r12,%r15
5198	addq	%rax,%r14
5199	adcq	%rdx,%r15
5200	movq	8+0+0(%rbp),%rax
5201	movq	%rax,%r9
5202	mulq	%r10
5203	addq	%rax,%r14
5204	adcq	$0,%rdx
5205	movq	%rdx,%r10
5206	movq	8+0+0(%rbp),%rax
5207	mulq	%r11
5208	addq	%rax,%r15
5209	adcq	$0,%rdx
5210	imulq	%r12,%r9
5211	addq	%r10,%r15
5212	adcq	%rdx,%r9
5213	movq	%r13,%r10
5214	movq	%r14,%r11
5215	movq	%r15,%r12
5216	andq	$3,%r12
5217	movq	%r15,%r13
5218	andq	$-4,%r13
5219	movq	%r9,%r14
5220	shrdq	$2,%r9,%r15
5221	shrq	$2,%r9
5222	addq	%r13,%r15
5223	adcq	%r14,%r9
5224	addq	%r15,%r10
5225	adcq	%r9,%r11
5226	adcq	$0,%r12
5227
5228	leaq	16(%rbx),%rbx
5229	incq	%r8
5230	vpaddd	%ymm6,%ymm2,%ymm2
5231	vpxor	%ymm2,%ymm14,%ymm14
5232	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
5233	vpaddd	%ymm14,%ymm10,%ymm10
5234	vpxor	%ymm10,%ymm6,%ymm6
5235	vpsrld	$20,%ymm6,%ymm3
5236	vpslld	$12,%ymm6,%ymm6
5237	vpxor	%ymm3,%ymm6,%ymm6
5238	vpaddd	%ymm6,%ymm2,%ymm2
5239	vpxor	%ymm2,%ymm14,%ymm14
5240	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
5241	vpaddd	%ymm14,%ymm10,%ymm10
5242	vpxor	%ymm10,%ymm6,%ymm6
5243	vpslld	$7,%ymm6,%ymm3
5244	vpsrld	$25,%ymm6,%ymm6
5245	vpxor	%ymm3,%ymm6,%ymm6
5246	vpalignr	$4,%ymm14,%ymm14,%ymm14
5247	vpalignr	$8,%ymm10,%ymm10,%ymm10
5248	vpalignr	$12,%ymm6,%ymm6,%ymm6
5249	vpaddd	%ymm5,%ymm1,%ymm1
5250	vpxor	%ymm1,%ymm13,%ymm13
5251	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
5252	vpaddd	%ymm13,%ymm9,%ymm9
5253	vpxor	%ymm9,%ymm5,%ymm5
5254	vpsrld	$20,%ymm5,%ymm3
5255	vpslld	$12,%ymm5,%ymm5
5256	vpxor	%ymm3,%ymm5,%ymm5
5257	vpaddd	%ymm5,%ymm1,%ymm1
5258	vpxor	%ymm1,%ymm13,%ymm13
5259	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
5260	vpaddd	%ymm13,%ymm9,%ymm9
5261	vpxor	%ymm9,%ymm5,%ymm5
5262	vpslld	$7,%ymm5,%ymm3
5263	vpsrld	$25,%ymm5,%ymm5
5264	vpxor	%ymm3,%ymm5,%ymm5
5265	vpalignr	$4,%ymm13,%ymm13,%ymm13
5266	vpalignr	$8,%ymm9,%ymm9,%ymm9
5267	vpalignr	$12,%ymm5,%ymm5,%ymm5
5268	vpaddd	%ymm4,%ymm0,%ymm0
5269	vpxor	%ymm0,%ymm12,%ymm12
5270	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
5271	vpaddd	%ymm12,%ymm8,%ymm8
5272	vpxor	%ymm8,%ymm4,%ymm4
5273	vpsrld	$20,%ymm4,%ymm3
5274	vpslld	$12,%ymm4,%ymm4
5275	vpxor	%ymm3,%ymm4,%ymm4
5276	vpaddd	%ymm4,%ymm0,%ymm0
5277	vpxor	%ymm0,%ymm12,%ymm12
5278	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
5279	vpaddd	%ymm12,%ymm8,%ymm8
5280	vpxor	%ymm8,%ymm4,%ymm4
5281	vpslld	$7,%ymm4,%ymm3
5282	vpsrld	$25,%ymm4,%ymm4
5283	vpxor	%ymm3,%ymm4,%ymm4
5284	vpalignr	$4,%ymm12,%ymm12,%ymm12
5285	vpalignr	$8,%ymm8,%ymm8,%ymm8
5286	vpalignr	$12,%ymm4,%ymm4,%ymm4
5287
5288	cmpq	%rcx,%r8
5289	jb	.Lopen_avx2_tail_384_rounds_and_x2hash
5290	cmpq	$10,%r8
5291	jne	.Lopen_avx2_tail_384_rounds_and_x1hash
5292	movq	%rbx,%r8
5293	subq	%rsi,%rbx
5294	movq	%rbx,%rcx
5295	movq	0+128(%rbp),%rbx
5296.Lopen_avx2_384_tail_hash:
5297	addq	$16,%rcx
5298	cmpq	%rbx,%rcx
5299	jg	.Lopen_avx2_384_tail_done
5300	addq	0+0(%r8),%r10
5301	adcq	8+0(%r8),%r11
5302	adcq	$1,%r12
5303	movq	0+0+0(%rbp),%rdx
5304	movq	%rdx,%r15
5305	mulxq	%r10,%r13,%r14
5306	mulxq	%r11,%rax,%rdx
5307	imulq	%r12,%r15
5308	addq	%rax,%r14
5309	adcq	%rdx,%r15
5310	movq	8+0+0(%rbp),%rdx
5311	mulxq	%r10,%r10,%rax
5312	addq	%r10,%r14
5313	mulxq	%r11,%r11,%r9
5314	adcq	%r11,%r15
5315	adcq	$0,%r9
5316	imulq	%r12,%rdx
5317	addq	%rax,%r15
5318	adcq	%rdx,%r9
5319	movq	%r13,%r10
5320	movq	%r14,%r11
5321	movq	%r15,%r12
5322	andq	$3,%r12
5323	movq	%r15,%r13
5324	andq	$-4,%r13
5325	movq	%r9,%r14
5326	shrdq	$2,%r9,%r15
5327	shrq	$2,%r9
5328	addq	%r13,%r15
5329	adcq	%r14,%r9
5330	addq	%r15,%r10
5331	adcq	%r9,%r11
5332	adcq	$0,%r12
5333
5334	leaq	16(%r8),%r8
5335	jmp	.Lopen_avx2_384_tail_hash
5336.Lopen_avx2_384_tail_done:
5337	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
5338	vpaddd	0+64(%rbp),%ymm6,%ymm6
5339	vpaddd	0+96(%rbp),%ymm10,%ymm10
5340	vpaddd	0+224(%rbp),%ymm14,%ymm14
5341	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
5342	vpaddd	0+64(%rbp),%ymm5,%ymm5
5343	vpaddd	0+96(%rbp),%ymm9,%ymm9
5344	vpaddd	0+192(%rbp),%ymm13,%ymm13
5345	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
5346	vpaddd	0+64(%rbp),%ymm4,%ymm4
5347	vpaddd	0+96(%rbp),%ymm8,%ymm8
5348	vpaddd	0+160(%rbp),%ymm12,%ymm12
5349	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
5350	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
5351	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
5352	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
5353	vpxor	0+0(%rsi),%ymm3,%ymm3
5354	vpxor	32+0(%rsi),%ymm2,%ymm2
5355	vpxor	64+0(%rsi),%ymm6,%ymm6
5356	vpxor	96+0(%rsi),%ymm10,%ymm10
5357	vmovdqu	%ymm3,0+0(%rdi)
5358	vmovdqu	%ymm2,32+0(%rdi)
5359	vmovdqu	%ymm6,64+0(%rdi)
5360	vmovdqu	%ymm10,96+0(%rdi)
5361	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
5362	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
5363	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
5364	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
5365	vpxor	0+128(%rsi),%ymm3,%ymm3
5366	vpxor	32+128(%rsi),%ymm1,%ymm1
5367	vpxor	64+128(%rsi),%ymm5,%ymm5
5368	vpxor	96+128(%rsi),%ymm9,%ymm9
5369	vmovdqu	%ymm3,0+128(%rdi)
5370	vmovdqu	%ymm1,32+128(%rdi)
5371	vmovdqu	%ymm5,64+128(%rdi)
5372	vmovdqu	%ymm9,96+128(%rdi)
5373	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
5374	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
5375	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
5376	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
5377	vmovdqa	%ymm3,%ymm8
5378
5379	leaq	256(%rsi),%rsi
5380	leaq	256(%rdi),%rdi
5381	subq	$256,%rbx
5382	jmp	.Lopen_avx2_tail_128_xor
5383
5384.Lopen_avx2_tail_512:
5385	vmovdqa	.Lchacha20_consts(%rip),%ymm0
5386	vmovdqa	0+64(%rbp),%ymm4
5387	vmovdqa	0+96(%rbp),%ymm8
5388	vmovdqa	%ymm0,%ymm1
5389	vmovdqa	%ymm4,%ymm5
5390	vmovdqa	%ymm8,%ymm9
5391	vmovdqa	%ymm0,%ymm2
5392	vmovdqa	%ymm4,%ymm6
5393	vmovdqa	%ymm8,%ymm10
5394	vmovdqa	%ymm0,%ymm3
5395	vmovdqa	%ymm4,%ymm7
5396	vmovdqa	%ymm8,%ymm11
5397	vmovdqa	.Lavx2_inc(%rip),%ymm12
5398	vpaddd	0+160(%rbp),%ymm12,%ymm15
5399	vpaddd	%ymm15,%ymm12,%ymm14
5400	vpaddd	%ymm14,%ymm12,%ymm13
5401	vpaddd	%ymm13,%ymm12,%ymm12
5402	vmovdqa	%ymm15,0+256(%rbp)
5403	vmovdqa	%ymm14,0+224(%rbp)
5404	vmovdqa	%ymm13,0+192(%rbp)
5405	vmovdqa	%ymm12,0+160(%rbp)
5406
5407	xorq	%rcx,%rcx
5408	movq	%rsi,%r8
5409.Lopen_avx2_tail_512_rounds_and_x2hash:
5410	addq	0+0(%r8),%r10
5411	adcq	8+0(%r8),%r11
5412	adcq	$1,%r12
5413	movq	0+0+0(%rbp),%rax
5414	movq	%rax,%r15
5415	mulq	%r10
5416	movq	%rax,%r13
5417	movq	%rdx,%r14
5418	movq	0+0+0(%rbp),%rax
5419	mulq	%r11
5420	imulq	%r12,%r15
5421	addq	%rax,%r14
5422	adcq	%rdx,%r15
5423	movq	8+0+0(%rbp),%rax
5424	movq	%rax,%r9
5425	mulq	%r10
5426	addq	%rax,%r14
5427	adcq	$0,%rdx
5428	movq	%rdx,%r10
5429	movq	8+0+0(%rbp),%rax
5430	mulq	%r11
5431	addq	%rax,%r15
5432	adcq	$0,%rdx
5433	imulq	%r12,%r9
5434	addq	%r10,%r15
5435	adcq	%rdx,%r9
5436	movq	%r13,%r10
5437	movq	%r14,%r11
5438	movq	%r15,%r12
5439	andq	$3,%r12
5440	movq	%r15,%r13
5441	andq	$-4,%r13
5442	movq	%r9,%r14
5443	shrdq	$2,%r9,%r15
5444	shrq	$2,%r9
5445	addq	%r13,%r15
5446	adcq	%r14,%r9
5447	addq	%r15,%r10
5448	adcq	%r9,%r11
5449	adcq	$0,%r12
5450
5451	leaq	16(%r8),%r8
5452.Lopen_avx2_tail_512_rounds_and_x1hash:
5453	vmovdqa	%ymm8,0+128(%rbp)
5454	vmovdqa	.Lrol16(%rip),%ymm8
5455	vpaddd	%ymm7,%ymm3,%ymm3
5456	vpaddd	%ymm6,%ymm2,%ymm2
5457	vpaddd	%ymm5,%ymm1,%ymm1
5458	vpaddd	%ymm4,%ymm0,%ymm0
5459	vpxor	%ymm3,%ymm15,%ymm15
5460	vpxor	%ymm2,%ymm14,%ymm14
5461	vpxor	%ymm1,%ymm13,%ymm13
5462	vpxor	%ymm0,%ymm12,%ymm12
5463	vpshufb	%ymm8,%ymm15,%ymm15
5464	vpshufb	%ymm8,%ymm14,%ymm14
5465	vpshufb	%ymm8,%ymm13,%ymm13
5466	vpshufb	%ymm8,%ymm12,%ymm12
5467	vpaddd	%ymm15,%ymm11,%ymm11
5468	vpaddd	%ymm14,%ymm10,%ymm10
5469	vpaddd	%ymm13,%ymm9,%ymm9
5470	vpaddd	0+128(%rbp),%ymm12,%ymm8
5471	vpxor	%ymm11,%ymm7,%ymm7
5472	vpxor	%ymm10,%ymm6,%ymm6
5473	vpxor	%ymm9,%ymm5,%ymm5
5474	vpxor	%ymm8,%ymm4,%ymm4
5475	vmovdqa	%ymm8,0+128(%rbp)
5476	vpsrld	$20,%ymm7,%ymm8
5477	vpslld	$32-20,%ymm7,%ymm7
5478	vpxor	%ymm8,%ymm7,%ymm7
5479	vpsrld	$20,%ymm6,%ymm8
5480	vpslld	$32-20,%ymm6,%ymm6
5481	vpxor	%ymm8,%ymm6,%ymm6
5482	vpsrld	$20,%ymm5,%ymm8
5483	vpslld	$32-20,%ymm5,%ymm5
5484	vpxor	%ymm8,%ymm5,%ymm5
5485	vpsrld	$20,%ymm4,%ymm8
5486	vpslld	$32-20,%ymm4,%ymm4
5487	vpxor	%ymm8,%ymm4,%ymm4
5488	vmovdqa	.Lrol8(%rip),%ymm8
5489	vpaddd	%ymm7,%ymm3,%ymm3
5490	addq	0+0(%r8),%r10
5491	adcq	8+0(%r8),%r11
5492	adcq	$1,%r12
5493	movq	0+0+0(%rbp),%rdx
5494	movq	%rdx,%r15
5495	mulxq	%r10,%r13,%r14
5496	mulxq	%r11,%rax,%rdx
5497	imulq	%r12,%r15
5498	addq	%rax,%r14
5499	adcq	%rdx,%r15
5500	movq	8+0+0(%rbp),%rdx
5501	mulxq	%r10,%r10,%rax
5502	addq	%r10,%r14
5503	mulxq	%r11,%r11,%r9
5504	adcq	%r11,%r15
5505	adcq	$0,%r9
5506	imulq	%r12,%rdx
5507	addq	%rax,%r15
5508	adcq	%rdx,%r9
5509	movq	%r13,%r10
5510	movq	%r14,%r11
5511	movq	%r15,%r12
5512	andq	$3,%r12
5513	movq	%r15,%r13
5514	andq	$-4,%r13
5515	movq	%r9,%r14
5516	shrdq	$2,%r9,%r15
5517	shrq	$2,%r9
5518	addq	%r13,%r15
5519	adcq	%r14,%r9
5520	addq	%r15,%r10
5521	adcq	%r9,%r11
5522	adcq	$0,%r12
5523	vpaddd	%ymm6,%ymm2,%ymm2
5524	vpaddd	%ymm5,%ymm1,%ymm1
5525	vpaddd	%ymm4,%ymm0,%ymm0
5526	vpxor	%ymm3,%ymm15,%ymm15
5527	vpxor	%ymm2,%ymm14,%ymm14
5528	vpxor	%ymm1,%ymm13,%ymm13
5529	vpxor	%ymm0,%ymm12,%ymm12
5530	vpshufb	%ymm8,%ymm15,%ymm15
5531	vpshufb	%ymm8,%ymm14,%ymm14
5532	vpshufb	%ymm8,%ymm13,%ymm13
5533	vpshufb	%ymm8,%ymm12,%ymm12
5534	vpaddd	%ymm15,%ymm11,%ymm11
5535	vpaddd	%ymm14,%ymm10,%ymm10
5536	vpaddd	%ymm13,%ymm9,%ymm9
5537	vpaddd	0+128(%rbp),%ymm12,%ymm8
5538	vpxor	%ymm11,%ymm7,%ymm7
5539	vpxor	%ymm10,%ymm6,%ymm6
5540	vpxor	%ymm9,%ymm5,%ymm5
5541	vpxor	%ymm8,%ymm4,%ymm4
5542	vmovdqa	%ymm8,0+128(%rbp)
5543	vpsrld	$25,%ymm7,%ymm8
5544	vpslld	$32-25,%ymm7,%ymm7
5545	vpxor	%ymm8,%ymm7,%ymm7
5546	vpsrld	$25,%ymm6,%ymm8
5547	vpslld	$32-25,%ymm6,%ymm6
5548	vpxor	%ymm8,%ymm6,%ymm6
5549	vpsrld	$25,%ymm5,%ymm8
5550	vpslld	$32-25,%ymm5,%ymm5
5551	vpxor	%ymm8,%ymm5,%ymm5
5552	vpsrld	$25,%ymm4,%ymm8
5553	vpslld	$32-25,%ymm4,%ymm4
5554	vpxor	%ymm8,%ymm4,%ymm4
5555	vmovdqa	0+128(%rbp),%ymm8
5556	vpalignr	$4,%ymm7,%ymm7,%ymm7
5557	vpalignr	$8,%ymm11,%ymm11,%ymm11
5558	vpalignr	$12,%ymm15,%ymm15,%ymm15
5559	vpalignr	$4,%ymm6,%ymm6,%ymm6
5560	vpalignr	$8,%ymm10,%ymm10,%ymm10
5561	vpalignr	$12,%ymm14,%ymm14,%ymm14
5562	vpalignr	$4,%ymm5,%ymm5,%ymm5
5563	vpalignr	$8,%ymm9,%ymm9,%ymm9
5564	vpalignr	$12,%ymm13,%ymm13,%ymm13
5565	vpalignr	$4,%ymm4,%ymm4,%ymm4
5566	vpalignr	$8,%ymm8,%ymm8,%ymm8
5567	vpalignr	$12,%ymm12,%ymm12,%ymm12
5568	vmovdqa	%ymm8,0+128(%rbp)
5569	vmovdqa	.Lrol16(%rip),%ymm8
5570	vpaddd	%ymm7,%ymm3,%ymm3
5571	addq	0+16(%r8),%r10
5572	adcq	8+16(%r8),%r11
5573	adcq	$1,%r12
5574	movq	0+0+0(%rbp),%rdx
5575	movq	%rdx,%r15
5576	mulxq	%r10,%r13,%r14
5577	mulxq	%r11,%rax,%rdx
5578	imulq	%r12,%r15
5579	addq	%rax,%r14
5580	adcq	%rdx,%r15
5581	movq	8+0+0(%rbp),%rdx
5582	mulxq	%r10,%r10,%rax
5583	addq	%r10,%r14
5584	mulxq	%r11,%r11,%r9
5585	adcq	%r11,%r15
5586	adcq	$0,%r9
5587	imulq	%r12,%rdx
5588	addq	%rax,%r15
5589	adcq	%rdx,%r9
5590	movq	%r13,%r10
5591	movq	%r14,%r11
5592	movq	%r15,%r12
5593	andq	$3,%r12
5594	movq	%r15,%r13
5595	andq	$-4,%r13
5596	movq	%r9,%r14
5597	shrdq	$2,%r9,%r15
5598	shrq	$2,%r9
5599	addq	%r13,%r15
5600	adcq	%r14,%r9
5601	addq	%r15,%r10
5602	adcq	%r9,%r11
5603	adcq	$0,%r12
5604
5605	leaq	32(%r8),%r8
5606	vpaddd	%ymm6,%ymm2,%ymm2
5607	vpaddd	%ymm5,%ymm1,%ymm1
5608	vpaddd	%ymm4,%ymm0,%ymm0
5609	vpxor	%ymm3,%ymm15,%ymm15
5610	vpxor	%ymm2,%ymm14,%ymm14
5611	vpxor	%ymm1,%ymm13,%ymm13
5612	vpxor	%ymm0,%ymm12,%ymm12
5613	vpshufb	%ymm8,%ymm15,%ymm15
5614	vpshufb	%ymm8,%ymm14,%ymm14
5615	vpshufb	%ymm8,%ymm13,%ymm13
5616	vpshufb	%ymm8,%ymm12,%ymm12
5617	vpaddd	%ymm15,%ymm11,%ymm11
5618	vpaddd	%ymm14,%ymm10,%ymm10
5619	vpaddd	%ymm13,%ymm9,%ymm9
5620	vpaddd	0+128(%rbp),%ymm12,%ymm8
5621	vpxor	%ymm11,%ymm7,%ymm7
5622	vpxor	%ymm10,%ymm6,%ymm6
5623	vpxor	%ymm9,%ymm5,%ymm5
5624	vpxor	%ymm8,%ymm4,%ymm4
5625	vmovdqa	%ymm8,0+128(%rbp)
5626	vpsrld	$20,%ymm7,%ymm8
5627	vpslld	$32-20,%ymm7,%ymm7
5628	vpxor	%ymm8,%ymm7,%ymm7
5629	vpsrld	$20,%ymm6,%ymm8
5630	vpslld	$32-20,%ymm6,%ymm6
5631	vpxor	%ymm8,%ymm6,%ymm6
5632	vpsrld	$20,%ymm5,%ymm8
5633	vpslld	$32-20,%ymm5,%ymm5
5634	vpxor	%ymm8,%ymm5,%ymm5
5635	vpsrld	$20,%ymm4,%ymm8
5636	vpslld	$32-20,%ymm4,%ymm4
5637	vpxor	%ymm8,%ymm4,%ymm4
5638	vmovdqa	.Lrol8(%rip),%ymm8
5639	vpaddd	%ymm7,%ymm3,%ymm3
5640	vpaddd	%ymm6,%ymm2,%ymm2
5641	vpaddd	%ymm5,%ymm1,%ymm1
5642	vpaddd	%ymm4,%ymm0,%ymm0
5643	vpxor	%ymm3,%ymm15,%ymm15
5644	vpxor	%ymm2,%ymm14,%ymm14
5645	vpxor	%ymm1,%ymm13,%ymm13
5646	vpxor	%ymm0,%ymm12,%ymm12
5647	vpshufb	%ymm8,%ymm15,%ymm15
5648	vpshufb	%ymm8,%ymm14,%ymm14
5649	vpshufb	%ymm8,%ymm13,%ymm13
5650	vpshufb	%ymm8,%ymm12,%ymm12
5651	vpaddd	%ymm15,%ymm11,%ymm11
5652	vpaddd	%ymm14,%ymm10,%ymm10
5653	vpaddd	%ymm13,%ymm9,%ymm9
5654	vpaddd	0+128(%rbp),%ymm12,%ymm8
5655	vpxor	%ymm11,%ymm7,%ymm7
5656	vpxor	%ymm10,%ymm6,%ymm6
5657	vpxor	%ymm9,%ymm5,%ymm5
5658	vpxor	%ymm8,%ymm4,%ymm4
5659	vmovdqa	%ymm8,0+128(%rbp)
5660	vpsrld	$25,%ymm7,%ymm8
5661	vpslld	$32-25,%ymm7,%ymm7
5662	vpxor	%ymm8,%ymm7,%ymm7
5663	vpsrld	$25,%ymm6,%ymm8
5664	vpslld	$32-25,%ymm6,%ymm6
5665	vpxor	%ymm8,%ymm6,%ymm6
5666	vpsrld	$25,%ymm5,%ymm8
5667	vpslld	$32-25,%ymm5,%ymm5
5668	vpxor	%ymm8,%ymm5,%ymm5
5669	vpsrld	$25,%ymm4,%ymm8
5670	vpslld	$32-25,%ymm4,%ymm4
5671	vpxor	%ymm8,%ymm4,%ymm4
5672	vmovdqa	0+128(%rbp),%ymm8
5673	vpalignr	$12,%ymm7,%ymm7,%ymm7
5674	vpalignr	$8,%ymm11,%ymm11,%ymm11
5675	vpalignr	$4,%ymm15,%ymm15,%ymm15
5676	vpalignr	$12,%ymm6,%ymm6,%ymm6
5677	vpalignr	$8,%ymm10,%ymm10,%ymm10
5678	vpalignr	$4,%ymm14,%ymm14,%ymm14
5679	vpalignr	$12,%ymm5,%ymm5,%ymm5
5680	vpalignr	$8,%ymm9,%ymm9,%ymm9
5681	vpalignr	$4,%ymm13,%ymm13,%ymm13
5682	vpalignr	$12,%ymm4,%ymm4,%ymm4
5683	vpalignr	$8,%ymm8,%ymm8,%ymm8
5684	vpalignr	$4,%ymm12,%ymm12,%ymm12
5685
5686	incq	%rcx
5687	cmpq	$4,%rcx
5688	jl	.Lopen_avx2_tail_512_rounds_and_x2hash
5689	cmpq	$10,%rcx
5690	jne	.Lopen_avx2_tail_512_rounds_and_x1hash
5691	movq	%rbx,%rcx
5692	subq	$384,%rcx
5693	andq	$-16,%rcx
5694.Lopen_avx2_tail_512_hash:
5695	testq	%rcx,%rcx
5696	je	.Lopen_avx2_tail_512_done
5697	addq	0+0(%r8),%r10
5698	adcq	8+0(%r8),%r11
5699	adcq	$1,%r12
5700	movq	0+0+0(%rbp),%rdx
5701	movq	%rdx,%r15
5702	mulxq	%r10,%r13,%r14
5703	mulxq	%r11,%rax,%rdx
5704	imulq	%r12,%r15
5705	addq	%rax,%r14
5706	adcq	%rdx,%r15
5707	movq	8+0+0(%rbp),%rdx
5708	mulxq	%r10,%r10,%rax
5709	addq	%r10,%r14
5710	mulxq	%r11,%r11,%r9
5711	adcq	%r11,%r15
5712	adcq	$0,%r9
5713	imulq	%r12,%rdx
5714	addq	%rax,%r15
5715	adcq	%rdx,%r9
5716	movq	%r13,%r10
5717	movq	%r14,%r11
5718	movq	%r15,%r12
5719	andq	$3,%r12
5720	movq	%r15,%r13
5721	andq	$-4,%r13
5722	movq	%r9,%r14
5723	shrdq	$2,%r9,%r15
5724	shrq	$2,%r9
5725	addq	%r13,%r15
5726	adcq	%r14,%r9
5727	addq	%r15,%r10
5728	adcq	%r9,%r11
5729	adcq	$0,%r12
5730
5731	leaq	16(%r8),%r8
5732	subq	$16,%rcx
5733	jmp	.Lopen_avx2_tail_512_hash
5734.Lopen_avx2_tail_512_done:
5735	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
5736	vpaddd	0+64(%rbp),%ymm7,%ymm7
5737	vpaddd	0+96(%rbp),%ymm11,%ymm11
5738	vpaddd	0+256(%rbp),%ymm15,%ymm15
5739	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
5740	vpaddd	0+64(%rbp),%ymm6,%ymm6
5741	vpaddd	0+96(%rbp),%ymm10,%ymm10
5742	vpaddd	0+224(%rbp),%ymm14,%ymm14
5743	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
5744	vpaddd	0+64(%rbp),%ymm5,%ymm5
5745	vpaddd	0+96(%rbp),%ymm9,%ymm9
5746	vpaddd	0+192(%rbp),%ymm13,%ymm13
5747	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
5748	vpaddd	0+64(%rbp),%ymm4,%ymm4
5749	vpaddd	0+96(%rbp),%ymm8,%ymm8
5750	vpaddd	0+160(%rbp),%ymm12,%ymm12
5751
5752	vmovdqa	%ymm0,0+128(%rbp)
5753	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
5754	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
5755	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
5756	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
5757	vpxor	0+0(%rsi),%ymm0,%ymm0
5758	vpxor	32+0(%rsi),%ymm3,%ymm3
5759	vpxor	64+0(%rsi),%ymm7,%ymm7
5760	vpxor	96+0(%rsi),%ymm11,%ymm11
5761	vmovdqu	%ymm0,0+0(%rdi)
5762	vmovdqu	%ymm3,32+0(%rdi)
5763	vmovdqu	%ymm7,64+0(%rdi)
5764	vmovdqu	%ymm11,96+0(%rdi)
5765
5766	vmovdqa	0+128(%rbp),%ymm0
5767	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
5768	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
5769	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
5770	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
5771	vpxor	0+128(%rsi),%ymm3,%ymm3
5772	vpxor	32+128(%rsi),%ymm2,%ymm2
5773	vpxor	64+128(%rsi),%ymm6,%ymm6
5774	vpxor	96+128(%rsi),%ymm10,%ymm10
5775	vmovdqu	%ymm3,0+128(%rdi)
5776	vmovdqu	%ymm2,32+128(%rdi)
5777	vmovdqu	%ymm6,64+128(%rdi)
5778	vmovdqu	%ymm10,96+128(%rdi)
5779	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
5780	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
5781	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
5782	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
5783	vpxor	0+256(%rsi),%ymm3,%ymm3
5784	vpxor	32+256(%rsi),%ymm1,%ymm1
5785	vpxor	64+256(%rsi),%ymm5,%ymm5
5786	vpxor	96+256(%rsi),%ymm9,%ymm9
5787	vmovdqu	%ymm3,0+256(%rdi)
5788	vmovdqu	%ymm1,32+256(%rdi)
5789	vmovdqu	%ymm5,64+256(%rdi)
5790	vmovdqu	%ymm9,96+256(%rdi)
5791	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
5792	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
5793	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
5794	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
5795	vmovdqa	%ymm3,%ymm8
5796
5797	leaq	384(%rsi),%rsi
5798	leaq	384(%rdi),%rdi
5799	subq	$384,%rbx
5800.Lopen_avx2_tail_128_xor:
5801	cmpq	$32,%rbx
5802	jb	.Lopen_avx2_tail_32_xor
5803	subq	$32,%rbx
5804	vpxor	(%rsi),%ymm0,%ymm0
5805	vmovdqu	%ymm0,(%rdi)
5806	leaq	32(%rsi),%rsi
5807	leaq	32(%rdi),%rdi
5808	vmovdqa	%ymm4,%ymm0
5809	vmovdqa	%ymm8,%ymm4
5810	vmovdqa	%ymm12,%ymm8
5811	jmp	.Lopen_avx2_tail_128_xor
5812.Lopen_avx2_tail_32_xor:
5813	cmpq	$16,%rbx
5814	vmovdqa	%xmm0,%xmm1
5815	jb	.Lopen_avx2_exit
5816	subq	$16,%rbx
5817
5818	vpxor	(%rsi),%xmm0,%xmm1
5819	vmovdqu	%xmm1,(%rdi)
5820	leaq	16(%rsi),%rsi
5821	leaq	16(%rdi),%rdi
5822	vperm2i128	$0x11,%ymm0,%ymm0,%ymm0
5823	vmovdqa	%xmm0,%xmm1
5824.Lopen_avx2_exit:
5825	vzeroupper
5826	jmp	.Lopen_sse_tail_16
5827
5828.Lopen_avx2_192:
5829	vmovdqa	%ymm0,%ymm1
5830	vmovdqa	%ymm0,%ymm2
5831	vmovdqa	%ymm4,%ymm5
5832	vmovdqa	%ymm4,%ymm6
5833	vmovdqa	%ymm8,%ymm9
5834	vmovdqa	%ymm8,%ymm10
5835	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13
5836	vmovdqa	%ymm12,%ymm11
5837	vmovdqa	%ymm13,%ymm15
5838	movq	$10,%r10
5839.Lopen_avx2_192_rounds:
5840	vpaddd	%ymm4,%ymm0,%ymm0
5841	vpxor	%ymm0,%ymm12,%ymm12
5842	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
5843	vpaddd	%ymm12,%ymm8,%ymm8
5844	vpxor	%ymm8,%ymm4,%ymm4
5845	vpsrld	$20,%ymm4,%ymm3
5846	vpslld	$12,%ymm4,%ymm4
5847	vpxor	%ymm3,%ymm4,%ymm4
5848	vpaddd	%ymm4,%ymm0,%ymm0
5849	vpxor	%ymm0,%ymm12,%ymm12
5850	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
5851	vpaddd	%ymm12,%ymm8,%ymm8
5852	vpxor	%ymm8,%ymm4,%ymm4
5853	vpslld	$7,%ymm4,%ymm3
5854	vpsrld	$25,%ymm4,%ymm4
5855	vpxor	%ymm3,%ymm4,%ymm4
5856	vpalignr	$12,%ymm12,%ymm12,%ymm12
5857	vpalignr	$8,%ymm8,%ymm8,%ymm8
5858	vpalignr	$4,%ymm4,%ymm4,%ymm4
5859	vpaddd	%ymm5,%ymm1,%ymm1
5860	vpxor	%ymm1,%ymm13,%ymm13
5861	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
5862	vpaddd	%ymm13,%ymm9,%ymm9
5863	vpxor	%ymm9,%ymm5,%ymm5
5864	vpsrld	$20,%ymm5,%ymm3
5865	vpslld	$12,%ymm5,%ymm5
5866	vpxor	%ymm3,%ymm5,%ymm5
5867	vpaddd	%ymm5,%ymm1,%ymm1
5868	vpxor	%ymm1,%ymm13,%ymm13
5869	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
5870	vpaddd	%ymm13,%ymm9,%ymm9
5871	vpxor	%ymm9,%ymm5,%ymm5
5872	vpslld	$7,%ymm5,%ymm3
5873	vpsrld	$25,%ymm5,%ymm5
5874	vpxor	%ymm3,%ymm5,%ymm5
5875	vpalignr	$12,%ymm13,%ymm13,%ymm13
5876	vpalignr	$8,%ymm9,%ymm9,%ymm9
5877	vpalignr	$4,%ymm5,%ymm5,%ymm5
5878	vpaddd	%ymm4,%ymm0,%ymm0
5879	vpxor	%ymm0,%ymm12,%ymm12
5880	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
5881	vpaddd	%ymm12,%ymm8,%ymm8
5882	vpxor	%ymm8,%ymm4,%ymm4
5883	vpsrld	$20,%ymm4,%ymm3
5884	vpslld	$12,%ymm4,%ymm4
5885	vpxor	%ymm3,%ymm4,%ymm4
5886	vpaddd	%ymm4,%ymm0,%ymm0
5887	vpxor	%ymm0,%ymm12,%ymm12
5888	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
5889	vpaddd	%ymm12,%ymm8,%ymm8
5890	vpxor	%ymm8,%ymm4,%ymm4
5891	vpslld	$7,%ymm4,%ymm3
5892	vpsrld	$25,%ymm4,%ymm4
5893	vpxor	%ymm3,%ymm4,%ymm4
5894	vpalignr	$4,%ymm12,%ymm12,%ymm12
5895	vpalignr	$8,%ymm8,%ymm8,%ymm8
5896	vpalignr	$12,%ymm4,%ymm4,%ymm4
5897	vpaddd	%ymm5,%ymm1,%ymm1
5898	vpxor	%ymm1,%ymm13,%ymm13
5899	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
5900	vpaddd	%ymm13,%ymm9,%ymm9
5901	vpxor	%ymm9,%ymm5,%ymm5
5902	vpsrld	$20,%ymm5,%ymm3
5903	vpslld	$12,%ymm5,%ymm5
5904	vpxor	%ymm3,%ymm5,%ymm5
5905	vpaddd	%ymm5,%ymm1,%ymm1
5906	vpxor	%ymm1,%ymm13,%ymm13
5907	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
5908	vpaddd	%ymm13,%ymm9,%ymm9
5909	vpxor	%ymm9,%ymm5,%ymm5
5910	vpslld	$7,%ymm5,%ymm3
5911	vpsrld	$25,%ymm5,%ymm5
5912	vpxor	%ymm3,%ymm5,%ymm5
5913	vpalignr	$4,%ymm13,%ymm13,%ymm13
5914	vpalignr	$8,%ymm9,%ymm9,%ymm9
5915	vpalignr	$12,%ymm5,%ymm5,%ymm5
5916
5917	decq	%r10
5918	jne	.Lopen_avx2_192_rounds
5919	vpaddd	%ymm2,%ymm0,%ymm0
5920	vpaddd	%ymm2,%ymm1,%ymm1
5921	vpaddd	%ymm6,%ymm4,%ymm4
5922	vpaddd	%ymm6,%ymm5,%ymm5
5923	vpaddd	%ymm10,%ymm8,%ymm8
5924	vpaddd	%ymm10,%ymm9,%ymm9
5925	vpaddd	%ymm11,%ymm12,%ymm12
5926	vpaddd	%ymm15,%ymm13,%ymm13
5927	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
5928
5929	vpand	.Lclamp(%rip),%ymm3,%ymm3
5930	vmovdqa	%ymm3,0+0(%rbp)
5931
5932	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
5933	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
5934	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
5935	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
5936	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
5937	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
5938.Lopen_avx2_short:
5939	movq	%r8,%r8
5940	call	poly_hash_ad_internal
5941.Lopen_avx2_short_hash_and_xor_loop:
5942	cmpq	$32,%rbx
5943	jb	.Lopen_avx2_short_tail_32
5944	subq	$32,%rbx
5945	addq	0+0(%rsi),%r10
5946	adcq	8+0(%rsi),%r11
5947	adcq	$1,%r12
5948	movq	0+0+0(%rbp),%rax
5949	movq	%rax,%r15
5950	mulq	%r10
5951	movq	%rax,%r13
5952	movq	%rdx,%r14
5953	movq	0+0+0(%rbp),%rax
5954	mulq	%r11
5955	imulq	%r12,%r15
5956	addq	%rax,%r14
5957	adcq	%rdx,%r15
5958	movq	8+0+0(%rbp),%rax
5959	movq	%rax,%r9
5960	mulq	%r10
5961	addq	%rax,%r14
5962	adcq	$0,%rdx
5963	movq	%rdx,%r10
5964	movq	8+0+0(%rbp),%rax
5965	mulq	%r11
5966	addq	%rax,%r15
5967	adcq	$0,%rdx
5968	imulq	%r12,%r9
5969	addq	%r10,%r15
5970	adcq	%rdx,%r9
5971	movq	%r13,%r10
5972	movq	%r14,%r11
5973	movq	%r15,%r12
5974	andq	$3,%r12
5975	movq	%r15,%r13
5976	andq	$-4,%r13
5977	movq	%r9,%r14
5978	shrdq	$2,%r9,%r15
5979	shrq	$2,%r9
5980	addq	%r13,%r15
5981	adcq	%r14,%r9
5982	addq	%r15,%r10
5983	adcq	%r9,%r11
5984	adcq	$0,%r12
5985	addq	0+16(%rsi),%r10
5986	adcq	8+16(%rsi),%r11
5987	adcq	$1,%r12
5988	movq	0+0+0(%rbp),%rax
5989	movq	%rax,%r15
5990	mulq	%r10
5991	movq	%rax,%r13
5992	movq	%rdx,%r14
5993	movq	0+0+0(%rbp),%rax
5994	mulq	%r11
5995	imulq	%r12,%r15
5996	addq	%rax,%r14
5997	adcq	%rdx,%r15
5998	movq	8+0+0(%rbp),%rax
5999	movq	%rax,%r9
6000	mulq	%r10
6001	addq	%rax,%r14
6002	adcq	$0,%rdx
6003	movq	%rdx,%r10
6004	movq	8+0+0(%rbp),%rax
6005	mulq	%r11
6006	addq	%rax,%r15
6007	adcq	$0,%rdx
6008	imulq	%r12,%r9
6009	addq	%r10,%r15
6010	adcq	%rdx,%r9
6011	movq	%r13,%r10
6012	movq	%r14,%r11
6013	movq	%r15,%r12
6014	andq	$3,%r12
6015	movq	%r15,%r13
6016	andq	$-4,%r13
6017	movq	%r9,%r14
6018	shrdq	$2,%r9,%r15
6019	shrq	$2,%r9
6020	addq	%r13,%r15
6021	adcq	%r14,%r9
6022	addq	%r15,%r10
6023	adcq	%r9,%r11
6024	adcq	$0,%r12
6025
6026
6027	vpxor	(%rsi),%ymm0,%ymm0
6028	vmovdqu	%ymm0,(%rdi)
6029	leaq	32(%rsi),%rsi
6030	leaq	32(%rdi),%rdi
6031
6032	vmovdqa	%ymm4,%ymm0
6033	vmovdqa	%ymm8,%ymm4
6034	vmovdqa	%ymm12,%ymm8
6035	vmovdqa	%ymm1,%ymm12
6036	vmovdqa	%ymm5,%ymm1
6037	vmovdqa	%ymm9,%ymm5
6038	vmovdqa	%ymm13,%ymm9
6039	vmovdqa	%ymm2,%ymm13
6040	vmovdqa	%ymm6,%ymm2
6041	jmp	.Lopen_avx2_short_hash_and_xor_loop
6042.Lopen_avx2_short_tail_32:
6043	cmpq	$16,%rbx
6044	vmovdqa	%xmm0,%xmm1
6045	jb	.Lopen_avx2_short_tail_32_exit
6046	subq	$16,%rbx
6047	addq	0+0(%rsi),%r10
6048	adcq	8+0(%rsi),%r11
6049	adcq	$1,%r12
6050	movq	0+0+0(%rbp),%rax
6051	movq	%rax,%r15
6052	mulq	%r10
6053	movq	%rax,%r13
6054	movq	%rdx,%r14
6055	movq	0+0+0(%rbp),%rax
6056	mulq	%r11
6057	imulq	%r12,%r15
6058	addq	%rax,%r14
6059	adcq	%rdx,%r15
6060	movq	8+0+0(%rbp),%rax
6061	movq	%rax,%r9
6062	mulq	%r10
6063	addq	%rax,%r14
6064	adcq	$0,%rdx
6065	movq	%rdx,%r10
6066	movq	8+0+0(%rbp),%rax
6067	mulq	%r11
6068	addq	%rax,%r15
6069	adcq	$0,%rdx
6070	imulq	%r12,%r9
6071	addq	%r10,%r15
6072	adcq	%rdx,%r9
6073	movq	%r13,%r10
6074	movq	%r14,%r11
6075	movq	%r15,%r12
6076	andq	$3,%r12
6077	movq	%r15,%r13
6078	andq	$-4,%r13
6079	movq	%r9,%r14
6080	shrdq	$2,%r9,%r15
6081	shrq	$2,%r9
6082	addq	%r13,%r15
6083	adcq	%r14,%r9
6084	addq	%r15,%r10
6085	adcq	%r9,%r11
6086	adcq	$0,%r12
6087
6088	vpxor	(%rsi),%xmm0,%xmm3
6089	vmovdqu	%xmm3,(%rdi)
6090	leaq	16(%rsi),%rsi
6091	leaq	16(%rdi),%rdi
6092	vextracti128	$1,%ymm0,%xmm1
6093.Lopen_avx2_short_tail_32_exit:
6094	vzeroupper
6095	jmp	.Lopen_sse_tail_16
6096
6097.Lopen_avx2_320:
6098	vmovdqa	%ymm0,%ymm1
6099	vmovdqa	%ymm0,%ymm2
6100	vmovdqa	%ymm4,%ymm5
6101	vmovdqa	%ymm4,%ymm6
6102	vmovdqa	%ymm8,%ymm9
6103	vmovdqa	%ymm8,%ymm10
6104	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13
6105	vpaddd	.Lavx2_inc(%rip),%ymm13,%ymm14
6106	vmovdqa	%ymm4,%ymm7
6107	vmovdqa	%ymm8,%ymm11
6108	vmovdqa	%ymm12,0+160(%rbp)
6109	vmovdqa	%ymm13,0+192(%rbp)
6110	vmovdqa	%ymm14,0+224(%rbp)
6111	movq	$10,%r10
6112.Lopen_avx2_320_rounds:
6113	vpaddd	%ymm4,%ymm0,%ymm0
6114	vpxor	%ymm0,%ymm12,%ymm12
6115	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
6116	vpaddd	%ymm12,%ymm8,%ymm8
6117	vpxor	%ymm8,%ymm4,%ymm4
6118	vpsrld	$20,%ymm4,%ymm3
6119	vpslld	$12,%ymm4,%ymm4
6120	vpxor	%ymm3,%ymm4,%ymm4
6121	vpaddd	%ymm4,%ymm0,%ymm0
6122	vpxor	%ymm0,%ymm12,%ymm12
6123	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
6124	vpaddd	%ymm12,%ymm8,%ymm8
6125	vpxor	%ymm8,%ymm4,%ymm4
6126	vpslld	$7,%ymm4,%ymm3
6127	vpsrld	$25,%ymm4,%ymm4
6128	vpxor	%ymm3,%ymm4,%ymm4
6129	vpalignr	$12,%ymm12,%ymm12,%ymm12
6130	vpalignr	$8,%ymm8,%ymm8,%ymm8
6131	vpalignr	$4,%ymm4,%ymm4,%ymm4
6132	vpaddd	%ymm5,%ymm1,%ymm1
6133	vpxor	%ymm1,%ymm13,%ymm13
6134	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
6135	vpaddd	%ymm13,%ymm9,%ymm9
6136	vpxor	%ymm9,%ymm5,%ymm5
6137	vpsrld	$20,%ymm5,%ymm3
6138	vpslld	$12,%ymm5,%ymm5
6139	vpxor	%ymm3,%ymm5,%ymm5
6140	vpaddd	%ymm5,%ymm1,%ymm1
6141	vpxor	%ymm1,%ymm13,%ymm13
6142	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
6143	vpaddd	%ymm13,%ymm9,%ymm9
6144	vpxor	%ymm9,%ymm5,%ymm5
6145	vpslld	$7,%ymm5,%ymm3
6146	vpsrld	$25,%ymm5,%ymm5
6147	vpxor	%ymm3,%ymm5,%ymm5
6148	vpalignr	$12,%ymm13,%ymm13,%ymm13
6149	vpalignr	$8,%ymm9,%ymm9,%ymm9
6150	vpalignr	$4,%ymm5,%ymm5,%ymm5
6151	vpaddd	%ymm6,%ymm2,%ymm2
6152	vpxor	%ymm2,%ymm14,%ymm14
6153	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
6154	vpaddd	%ymm14,%ymm10,%ymm10
6155	vpxor	%ymm10,%ymm6,%ymm6
6156	vpsrld	$20,%ymm6,%ymm3
6157	vpslld	$12,%ymm6,%ymm6
6158	vpxor	%ymm3,%ymm6,%ymm6
6159	vpaddd	%ymm6,%ymm2,%ymm2
6160	vpxor	%ymm2,%ymm14,%ymm14
6161	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
6162	vpaddd	%ymm14,%ymm10,%ymm10
6163	vpxor	%ymm10,%ymm6,%ymm6
6164	vpslld	$7,%ymm6,%ymm3
6165	vpsrld	$25,%ymm6,%ymm6
6166	vpxor	%ymm3,%ymm6,%ymm6
6167	vpalignr	$12,%ymm14,%ymm14,%ymm14
6168	vpalignr	$8,%ymm10,%ymm10,%ymm10
6169	vpalignr	$4,%ymm6,%ymm6,%ymm6
6170	vpaddd	%ymm4,%ymm0,%ymm0
6171	vpxor	%ymm0,%ymm12,%ymm12
6172	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
6173	vpaddd	%ymm12,%ymm8,%ymm8
6174	vpxor	%ymm8,%ymm4,%ymm4
6175	vpsrld	$20,%ymm4,%ymm3
6176	vpslld	$12,%ymm4,%ymm4
6177	vpxor	%ymm3,%ymm4,%ymm4
6178	vpaddd	%ymm4,%ymm0,%ymm0
6179	vpxor	%ymm0,%ymm12,%ymm12
6180	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
6181	vpaddd	%ymm12,%ymm8,%ymm8
6182	vpxor	%ymm8,%ymm4,%ymm4
6183	vpslld	$7,%ymm4,%ymm3
6184	vpsrld	$25,%ymm4,%ymm4
6185	vpxor	%ymm3,%ymm4,%ymm4
6186	vpalignr	$4,%ymm12,%ymm12,%ymm12
6187	vpalignr	$8,%ymm8,%ymm8,%ymm8
6188	vpalignr	$12,%ymm4,%ymm4,%ymm4
6189	vpaddd	%ymm5,%ymm1,%ymm1
6190	vpxor	%ymm1,%ymm13,%ymm13
6191	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
6192	vpaddd	%ymm13,%ymm9,%ymm9
6193	vpxor	%ymm9,%ymm5,%ymm5
6194	vpsrld	$20,%ymm5,%ymm3
6195	vpslld	$12,%ymm5,%ymm5
6196	vpxor	%ymm3,%ymm5,%ymm5
6197	vpaddd	%ymm5,%ymm1,%ymm1
6198	vpxor	%ymm1,%ymm13,%ymm13
6199	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
6200	vpaddd	%ymm13,%ymm9,%ymm9
6201	vpxor	%ymm9,%ymm5,%ymm5
6202	vpslld	$7,%ymm5,%ymm3
6203	vpsrld	$25,%ymm5,%ymm5
6204	vpxor	%ymm3,%ymm5,%ymm5
6205	vpalignr	$4,%ymm13,%ymm13,%ymm13
6206	vpalignr	$8,%ymm9,%ymm9,%ymm9
6207	vpalignr	$12,%ymm5,%ymm5,%ymm5
6208	vpaddd	%ymm6,%ymm2,%ymm2
6209	vpxor	%ymm2,%ymm14,%ymm14
6210	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
6211	vpaddd	%ymm14,%ymm10,%ymm10
6212	vpxor	%ymm10,%ymm6,%ymm6
6213	vpsrld	$20,%ymm6,%ymm3
6214	vpslld	$12,%ymm6,%ymm6
6215	vpxor	%ymm3,%ymm6,%ymm6
6216	vpaddd	%ymm6,%ymm2,%ymm2
6217	vpxor	%ymm2,%ymm14,%ymm14
6218	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
6219	vpaddd	%ymm14,%ymm10,%ymm10
6220	vpxor	%ymm10,%ymm6,%ymm6
6221	vpslld	$7,%ymm6,%ymm3
6222	vpsrld	$25,%ymm6,%ymm6
6223	vpxor	%ymm3,%ymm6,%ymm6
6224	vpalignr	$4,%ymm14,%ymm14,%ymm14
6225	vpalignr	$8,%ymm10,%ymm10,%ymm10
6226	vpalignr	$12,%ymm6,%ymm6,%ymm6
6227
6228	decq	%r10
6229	jne	.Lopen_avx2_320_rounds
6230	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
6231	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
6232	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
6233	vpaddd	%ymm7,%ymm4,%ymm4
6234	vpaddd	%ymm7,%ymm5,%ymm5
6235	vpaddd	%ymm7,%ymm6,%ymm6
6236	vpaddd	%ymm11,%ymm8,%ymm8
6237	vpaddd	%ymm11,%ymm9,%ymm9
6238	vpaddd	%ymm11,%ymm10,%ymm10
6239	vpaddd	0+160(%rbp),%ymm12,%ymm12
6240	vpaddd	0+192(%rbp),%ymm13,%ymm13
6241	vpaddd	0+224(%rbp),%ymm14,%ymm14
6242	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
6243
6244	vpand	.Lclamp(%rip),%ymm3,%ymm3
6245	vmovdqa	%ymm3,0+0(%rbp)
6246
6247	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
6248	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
6249	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
6250	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
6251	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
6252	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
6253	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
6254	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
6255	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
6256	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
6257	jmp	.Lopen_avx2_short
6258.size	chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2
6259.cfi_endproc
6260
6261
6262.type	chacha20_poly1305_seal_avx2,@function
6263.align	64
6264chacha20_poly1305_seal_avx2:
6265.cfi_startproc
6266
6267
6268.cfi_adjust_cfa_offset	8
6269.cfi_offset	%rbp,-16
6270.cfi_adjust_cfa_offset	8
6271.cfi_offset	%rbx,-24
6272.cfi_adjust_cfa_offset	8
6273.cfi_offset	%r12,-32
6274.cfi_adjust_cfa_offset	8
6275.cfi_offset	%r13,-40
6276.cfi_adjust_cfa_offset	8
6277.cfi_offset	%r14,-48
6278.cfi_adjust_cfa_offset	8
6279.cfi_offset	%r15,-56
6280.cfi_adjust_cfa_offset	8
6281.cfi_offset	%r9,-64
6282.cfi_adjust_cfa_offset	288 + 32
6283
6284	vzeroupper
6285	vmovdqa	.Lchacha20_consts(%rip),%ymm0
6286	vbroadcasti128	0(%r9),%ymm4
6287	vbroadcasti128	16(%r9),%ymm8
6288	vbroadcasti128	32(%r9),%ymm12
6289	vpaddd	.Lavx2_init(%rip),%ymm12,%ymm12
6290	cmpq	$192,%rbx
6291	jbe	.Lseal_avx2_192
6292	cmpq	$320,%rbx
6293	jbe	.Lseal_avx2_320
6294	vmovdqa	%ymm0,%ymm1
6295	vmovdqa	%ymm0,%ymm2
6296	vmovdqa	%ymm0,%ymm3
6297	vmovdqa	%ymm4,%ymm5
6298	vmovdqa	%ymm4,%ymm6
6299	vmovdqa	%ymm4,%ymm7
6300	vmovdqa	%ymm4,0+64(%rbp)
6301	vmovdqa	%ymm8,%ymm9
6302	vmovdqa	%ymm8,%ymm10
6303	vmovdqa	%ymm8,%ymm11
6304	vmovdqa	%ymm8,0+96(%rbp)
6305	vmovdqa	%ymm12,%ymm15
6306	vpaddd	.Lavx2_inc(%rip),%ymm15,%ymm14
6307	vpaddd	.Lavx2_inc(%rip),%ymm14,%ymm13
6308	vpaddd	.Lavx2_inc(%rip),%ymm13,%ymm12
6309	vmovdqa	%ymm12,0+160(%rbp)
6310	vmovdqa	%ymm13,0+192(%rbp)
6311	vmovdqa	%ymm14,0+224(%rbp)
6312	vmovdqa	%ymm15,0+256(%rbp)
6313	movq	$10,%r10
6314.Lseal_avx2_init_rounds:
6315	vmovdqa	%ymm8,0+128(%rbp)
6316	vmovdqa	.Lrol16(%rip),%ymm8
6317	vpaddd	%ymm7,%ymm3,%ymm3
6318	vpaddd	%ymm6,%ymm2,%ymm2
6319	vpaddd	%ymm5,%ymm1,%ymm1
6320	vpaddd	%ymm4,%ymm0,%ymm0
6321	vpxor	%ymm3,%ymm15,%ymm15
6322	vpxor	%ymm2,%ymm14,%ymm14
6323	vpxor	%ymm1,%ymm13,%ymm13
6324	vpxor	%ymm0,%ymm12,%ymm12
6325	vpshufb	%ymm8,%ymm15,%ymm15
6326	vpshufb	%ymm8,%ymm14,%ymm14
6327	vpshufb	%ymm8,%ymm13,%ymm13
6328	vpshufb	%ymm8,%ymm12,%ymm12
6329	vpaddd	%ymm15,%ymm11,%ymm11
6330	vpaddd	%ymm14,%ymm10,%ymm10
6331	vpaddd	%ymm13,%ymm9,%ymm9
6332	vpaddd	0+128(%rbp),%ymm12,%ymm8
6333	vpxor	%ymm11,%ymm7,%ymm7
6334	vpxor	%ymm10,%ymm6,%ymm6
6335	vpxor	%ymm9,%ymm5,%ymm5
6336	vpxor	%ymm8,%ymm4,%ymm4
6337	vmovdqa	%ymm8,0+128(%rbp)
6338	vpsrld	$20,%ymm7,%ymm8
6339	vpslld	$32-20,%ymm7,%ymm7
6340	vpxor	%ymm8,%ymm7,%ymm7
6341	vpsrld	$20,%ymm6,%ymm8
6342	vpslld	$32-20,%ymm6,%ymm6
6343	vpxor	%ymm8,%ymm6,%ymm6
6344	vpsrld	$20,%ymm5,%ymm8
6345	vpslld	$32-20,%ymm5,%ymm5
6346	vpxor	%ymm8,%ymm5,%ymm5
6347	vpsrld	$20,%ymm4,%ymm8
6348	vpslld	$32-20,%ymm4,%ymm4
6349	vpxor	%ymm8,%ymm4,%ymm4
6350	vmovdqa	.Lrol8(%rip),%ymm8
6351	vpaddd	%ymm7,%ymm3,%ymm3
6352	vpaddd	%ymm6,%ymm2,%ymm2
6353	vpaddd	%ymm5,%ymm1,%ymm1
6354	vpaddd	%ymm4,%ymm0,%ymm0
6355	vpxor	%ymm3,%ymm15,%ymm15
6356	vpxor	%ymm2,%ymm14,%ymm14
6357	vpxor	%ymm1,%ymm13,%ymm13
6358	vpxor	%ymm0,%ymm12,%ymm12
6359	vpshufb	%ymm8,%ymm15,%ymm15
6360	vpshufb	%ymm8,%ymm14,%ymm14
6361	vpshufb	%ymm8,%ymm13,%ymm13
6362	vpshufb	%ymm8,%ymm12,%ymm12
6363	vpaddd	%ymm15,%ymm11,%ymm11
6364	vpaddd	%ymm14,%ymm10,%ymm10
6365	vpaddd	%ymm13,%ymm9,%ymm9
6366	vpaddd	0+128(%rbp),%ymm12,%ymm8
6367	vpxor	%ymm11,%ymm7,%ymm7
6368	vpxor	%ymm10,%ymm6,%ymm6
6369	vpxor	%ymm9,%ymm5,%ymm5
6370	vpxor	%ymm8,%ymm4,%ymm4
6371	vmovdqa	%ymm8,0+128(%rbp)
6372	vpsrld	$25,%ymm7,%ymm8
6373	vpslld	$32-25,%ymm7,%ymm7
6374	vpxor	%ymm8,%ymm7,%ymm7
6375	vpsrld	$25,%ymm6,%ymm8
6376	vpslld	$32-25,%ymm6,%ymm6
6377	vpxor	%ymm8,%ymm6,%ymm6
6378	vpsrld	$25,%ymm5,%ymm8
6379	vpslld	$32-25,%ymm5,%ymm5
6380	vpxor	%ymm8,%ymm5,%ymm5
6381	vpsrld	$25,%ymm4,%ymm8
6382	vpslld	$32-25,%ymm4,%ymm4
6383	vpxor	%ymm8,%ymm4,%ymm4
6384	vmovdqa	0+128(%rbp),%ymm8
6385	vpalignr	$4,%ymm7,%ymm7,%ymm7
6386	vpalignr	$8,%ymm11,%ymm11,%ymm11
6387	vpalignr	$12,%ymm15,%ymm15,%ymm15
6388	vpalignr	$4,%ymm6,%ymm6,%ymm6
6389	vpalignr	$8,%ymm10,%ymm10,%ymm10
6390	vpalignr	$12,%ymm14,%ymm14,%ymm14
6391	vpalignr	$4,%ymm5,%ymm5,%ymm5
6392	vpalignr	$8,%ymm9,%ymm9,%ymm9
6393	vpalignr	$12,%ymm13,%ymm13,%ymm13
6394	vpalignr	$4,%ymm4,%ymm4,%ymm4
6395	vpalignr	$8,%ymm8,%ymm8,%ymm8
6396	vpalignr	$12,%ymm12,%ymm12,%ymm12
6397	vmovdqa	%ymm8,0+128(%rbp)
6398	vmovdqa	.Lrol16(%rip),%ymm8
6399	vpaddd	%ymm7,%ymm3,%ymm3
6400	vpaddd	%ymm6,%ymm2,%ymm2
6401	vpaddd	%ymm5,%ymm1,%ymm1
6402	vpaddd	%ymm4,%ymm0,%ymm0
6403	vpxor	%ymm3,%ymm15,%ymm15
6404	vpxor	%ymm2,%ymm14,%ymm14
6405	vpxor	%ymm1,%ymm13,%ymm13
6406	vpxor	%ymm0,%ymm12,%ymm12
6407	vpshufb	%ymm8,%ymm15,%ymm15
6408	vpshufb	%ymm8,%ymm14,%ymm14
6409	vpshufb	%ymm8,%ymm13,%ymm13
6410	vpshufb	%ymm8,%ymm12,%ymm12
6411	vpaddd	%ymm15,%ymm11,%ymm11
6412	vpaddd	%ymm14,%ymm10,%ymm10
6413	vpaddd	%ymm13,%ymm9,%ymm9
6414	vpaddd	0+128(%rbp),%ymm12,%ymm8
6415	vpxor	%ymm11,%ymm7,%ymm7
6416	vpxor	%ymm10,%ymm6,%ymm6
6417	vpxor	%ymm9,%ymm5,%ymm5
6418	vpxor	%ymm8,%ymm4,%ymm4
6419	vmovdqa	%ymm8,0+128(%rbp)
6420	vpsrld	$20,%ymm7,%ymm8
6421	vpslld	$32-20,%ymm7,%ymm7
6422	vpxor	%ymm8,%ymm7,%ymm7
6423	vpsrld	$20,%ymm6,%ymm8
6424	vpslld	$32-20,%ymm6,%ymm6
6425	vpxor	%ymm8,%ymm6,%ymm6
6426	vpsrld	$20,%ymm5,%ymm8
6427	vpslld	$32-20,%ymm5,%ymm5
6428	vpxor	%ymm8,%ymm5,%ymm5
6429	vpsrld	$20,%ymm4,%ymm8
6430	vpslld	$32-20,%ymm4,%ymm4
6431	vpxor	%ymm8,%ymm4,%ymm4
6432	vmovdqa	.Lrol8(%rip),%ymm8
6433	vpaddd	%ymm7,%ymm3,%ymm3
6434	vpaddd	%ymm6,%ymm2,%ymm2
6435	vpaddd	%ymm5,%ymm1,%ymm1
6436	vpaddd	%ymm4,%ymm0,%ymm0
6437	vpxor	%ymm3,%ymm15,%ymm15
6438	vpxor	%ymm2,%ymm14,%ymm14
6439	vpxor	%ymm1,%ymm13,%ymm13
6440	vpxor	%ymm0,%ymm12,%ymm12
6441	vpshufb	%ymm8,%ymm15,%ymm15
6442	vpshufb	%ymm8,%ymm14,%ymm14
6443	vpshufb	%ymm8,%ymm13,%ymm13
6444	vpshufb	%ymm8,%ymm12,%ymm12
6445	vpaddd	%ymm15,%ymm11,%ymm11
6446	vpaddd	%ymm14,%ymm10,%ymm10
6447	vpaddd	%ymm13,%ymm9,%ymm9
6448	vpaddd	0+128(%rbp),%ymm12,%ymm8
6449	vpxor	%ymm11,%ymm7,%ymm7
6450	vpxor	%ymm10,%ymm6,%ymm6
6451	vpxor	%ymm9,%ymm5,%ymm5
6452	vpxor	%ymm8,%ymm4,%ymm4
6453	vmovdqa	%ymm8,0+128(%rbp)
6454	vpsrld	$25,%ymm7,%ymm8
6455	vpslld	$32-25,%ymm7,%ymm7
6456	vpxor	%ymm8,%ymm7,%ymm7
6457	vpsrld	$25,%ymm6,%ymm8
6458	vpslld	$32-25,%ymm6,%ymm6
6459	vpxor	%ymm8,%ymm6,%ymm6
6460	vpsrld	$25,%ymm5,%ymm8
6461	vpslld	$32-25,%ymm5,%ymm5
6462	vpxor	%ymm8,%ymm5,%ymm5
6463	vpsrld	$25,%ymm4,%ymm8
6464	vpslld	$32-25,%ymm4,%ymm4
6465	vpxor	%ymm8,%ymm4,%ymm4
6466	vmovdqa	0+128(%rbp),%ymm8
6467	vpalignr	$12,%ymm7,%ymm7,%ymm7
6468	vpalignr	$8,%ymm11,%ymm11,%ymm11
6469	vpalignr	$4,%ymm15,%ymm15,%ymm15
6470	vpalignr	$12,%ymm6,%ymm6,%ymm6
6471	vpalignr	$8,%ymm10,%ymm10,%ymm10
6472	vpalignr	$4,%ymm14,%ymm14,%ymm14
6473	vpalignr	$12,%ymm5,%ymm5,%ymm5
6474	vpalignr	$8,%ymm9,%ymm9,%ymm9
6475	vpalignr	$4,%ymm13,%ymm13,%ymm13
6476	vpalignr	$12,%ymm4,%ymm4,%ymm4
6477	vpalignr	$8,%ymm8,%ymm8,%ymm8
6478	vpalignr	$4,%ymm12,%ymm12,%ymm12
6479
6480	decq	%r10
6481	jnz	.Lseal_avx2_init_rounds
6482	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
6483	vpaddd	0+64(%rbp),%ymm7,%ymm7
6484	vpaddd	0+96(%rbp),%ymm11,%ymm11
6485	vpaddd	0+256(%rbp),%ymm15,%ymm15
6486	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
6487	vpaddd	0+64(%rbp),%ymm6,%ymm6
6488	vpaddd	0+96(%rbp),%ymm10,%ymm10
6489	vpaddd	0+224(%rbp),%ymm14,%ymm14
6490	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
6491	vpaddd	0+64(%rbp),%ymm5,%ymm5
6492	vpaddd	0+96(%rbp),%ymm9,%ymm9
6493	vpaddd	0+192(%rbp),%ymm13,%ymm13
6494	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
6495	vpaddd	0+64(%rbp),%ymm4,%ymm4
6496	vpaddd	0+96(%rbp),%ymm8,%ymm8
6497	vpaddd	0+160(%rbp),%ymm12,%ymm12
6498
6499	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
6500	vperm2i128	$0x02,%ymm3,%ymm7,%ymm15
6501	vperm2i128	$0x13,%ymm3,%ymm7,%ymm3
6502	vpand	.Lclamp(%rip),%ymm15,%ymm15
6503	vmovdqa	%ymm15,0+0(%rbp)
6504	movq	%r8,%r8
6505	call	poly_hash_ad_internal
6506
6507	vpxor	0(%rsi),%ymm3,%ymm3
6508	vpxor	32(%rsi),%ymm11,%ymm11
6509	vmovdqu	%ymm3,0(%rdi)
6510	vmovdqu	%ymm11,32(%rdi)
6511	vperm2i128	$0x02,%ymm2,%ymm6,%ymm15
6512	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
6513	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
6514	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
6515	vpxor	0+64(%rsi),%ymm15,%ymm15
6516	vpxor	32+64(%rsi),%ymm2,%ymm2
6517	vpxor	64+64(%rsi),%ymm6,%ymm6
6518	vpxor	96+64(%rsi),%ymm10,%ymm10
6519	vmovdqu	%ymm15,0+64(%rdi)
6520	vmovdqu	%ymm2,32+64(%rdi)
6521	vmovdqu	%ymm6,64+64(%rdi)
6522	vmovdqu	%ymm10,96+64(%rdi)
6523	vperm2i128	$0x02,%ymm1,%ymm5,%ymm15
6524	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
6525	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
6526	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
6527	vpxor	0+192(%rsi),%ymm15,%ymm15
6528	vpxor	32+192(%rsi),%ymm1,%ymm1
6529	vpxor	64+192(%rsi),%ymm5,%ymm5
6530	vpxor	96+192(%rsi),%ymm9,%ymm9
6531	vmovdqu	%ymm15,0+192(%rdi)
6532	vmovdqu	%ymm1,32+192(%rdi)
6533	vmovdqu	%ymm5,64+192(%rdi)
6534	vmovdqu	%ymm9,96+192(%rdi)
6535	vperm2i128	$0x13,%ymm0,%ymm4,%ymm15
6536	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
6537	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
6538	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
6539	vmovdqa	%ymm15,%ymm8
6540
6541	leaq	320(%rsi),%rsi
6542	subq	$320,%rbx
6543	movq	$320,%rcx
6544	cmpq	$128,%rbx
6545	jbe	.Lseal_avx2_short_hash_remainder
6546	vpxor	0(%rsi),%ymm0,%ymm0
6547	vpxor	32(%rsi),%ymm4,%ymm4
6548	vpxor	64(%rsi),%ymm8,%ymm8
6549	vpxor	96(%rsi),%ymm12,%ymm12
6550	vmovdqu	%ymm0,320(%rdi)
6551	vmovdqu	%ymm4,352(%rdi)
6552	vmovdqu	%ymm8,384(%rdi)
6553	vmovdqu	%ymm12,416(%rdi)
6554	leaq	128(%rsi),%rsi
6555	subq	$128,%rbx
6556	movq	$8,%rcx
6557	movq	$2,%r8
6558	cmpq	$128,%rbx
6559	jbe	.Lseal_avx2_tail_128
6560	cmpq	$256,%rbx
6561	jbe	.Lseal_avx2_tail_256
6562	cmpq	$384,%rbx
6563	jbe	.Lseal_avx2_tail_384
6564	cmpq	$512,%rbx
6565	jbe	.Lseal_avx2_tail_512
6566	vmovdqa	.Lchacha20_consts(%rip),%ymm0
6567	vmovdqa	0+64(%rbp),%ymm4
6568	vmovdqa	0+96(%rbp),%ymm8
6569	vmovdqa	%ymm0,%ymm1
6570	vmovdqa	%ymm4,%ymm5
6571	vmovdqa	%ymm8,%ymm9
6572	vmovdqa	%ymm0,%ymm2
6573	vmovdqa	%ymm4,%ymm6
6574	vmovdqa	%ymm8,%ymm10
6575	vmovdqa	%ymm0,%ymm3
6576	vmovdqa	%ymm4,%ymm7
6577	vmovdqa	%ymm8,%ymm11
6578	vmovdqa	.Lavx2_inc(%rip),%ymm12
6579	vpaddd	0+160(%rbp),%ymm12,%ymm15
6580	vpaddd	%ymm15,%ymm12,%ymm14
6581	vpaddd	%ymm14,%ymm12,%ymm13
6582	vpaddd	%ymm13,%ymm12,%ymm12
6583	vmovdqa	%ymm15,0+256(%rbp)
6584	vmovdqa	%ymm14,0+224(%rbp)
6585	vmovdqa	%ymm13,0+192(%rbp)
6586	vmovdqa	%ymm12,0+160(%rbp)
6587	vmovdqa	%ymm8,0+128(%rbp)
6588	vmovdqa	.Lrol16(%rip),%ymm8
6589	vpaddd	%ymm7,%ymm3,%ymm3
6590	vpaddd	%ymm6,%ymm2,%ymm2
6591	vpaddd	%ymm5,%ymm1,%ymm1
6592	vpaddd	%ymm4,%ymm0,%ymm0
6593	vpxor	%ymm3,%ymm15,%ymm15
6594	vpxor	%ymm2,%ymm14,%ymm14
6595	vpxor	%ymm1,%ymm13,%ymm13
6596	vpxor	%ymm0,%ymm12,%ymm12
6597	vpshufb	%ymm8,%ymm15,%ymm15
6598	vpshufb	%ymm8,%ymm14,%ymm14
6599	vpshufb	%ymm8,%ymm13,%ymm13
6600	vpshufb	%ymm8,%ymm12,%ymm12
6601	vpaddd	%ymm15,%ymm11,%ymm11
6602	vpaddd	%ymm14,%ymm10,%ymm10
6603	vpaddd	%ymm13,%ymm9,%ymm9
6604	vpaddd	0+128(%rbp),%ymm12,%ymm8
6605	vpxor	%ymm11,%ymm7,%ymm7
6606	vpxor	%ymm10,%ymm6,%ymm6
6607	vpxor	%ymm9,%ymm5,%ymm5
6608	vpxor	%ymm8,%ymm4,%ymm4
6609	vmovdqa	%ymm8,0+128(%rbp)
6610	vpsrld	$20,%ymm7,%ymm8
6611	vpslld	$32-20,%ymm7,%ymm7
6612	vpxor	%ymm8,%ymm7,%ymm7
6613	vpsrld	$20,%ymm6,%ymm8
6614	vpslld	$32-20,%ymm6,%ymm6
6615	vpxor	%ymm8,%ymm6,%ymm6
6616	vpsrld	$20,%ymm5,%ymm8
6617	vpslld	$32-20,%ymm5,%ymm5
6618	vpxor	%ymm8,%ymm5,%ymm5
6619	vpsrld	$20,%ymm4,%ymm8
6620	vpslld	$32-20,%ymm4,%ymm4
6621	vpxor	%ymm8,%ymm4,%ymm4
6622	vmovdqa	.Lrol8(%rip),%ymm8
6623	vpaddd	%ymm7,%ymm3,%ymm3
6624	vpaddd	%ymm6,%ymm2,%ymm2
6625	vpaddd	%ymm5,%ymm1,%ymm1
6626	vpaddd	%ymm4,%ymm0,%ymm0
6627	vpxor	%ymm3,%ymm15,%ymm15
6628	vpxor	%ymm2,%ymm14,%ymm14
6629	vpxor	%ymm1,%ymm13,%ymm13
6630	vpxor	%ymm0,%ymm12,%ymm12
6631	vpshufb	%ymm8,%ymm15,%ymm15
6632	vpshufb	%ymm8,%ymm14,%ymm14
6633	vpshufb	%ymm8,%ymm13,%ymm13
6634	vpshufb	%ymm8,%ymm12,%ymm12
6635	vpaddd	%ymm15,%ymm11,%ymm11
6636	vpaddd	%ymm14,%ymm10,%ymm10
6637	vpaddd	%ymm13,%ymm9,%ymm9
6638	vpaddd	0+128(%rbp),%ymm12,%ymm8
6639	vpxor	%ymm11,%ymm7,%ymm7
6640	vpxor	%ymm10,%ymm6,%ymm6
6641	vpxor	%ymm9,%ymm5,%ymm5
6642	vpxor	%ymm8,%ymm4,%ymm4
6643	vmovdqa	%ymm8,0+128(%rbp)
6644	vpsrld	$25,%ymm7,%ymm8
6645	vpslld	$32-25,%ymm7,%ymm7
6646	vpxor	%ymm8,%ymm7,%ymm7
6647	vpsrld	$25,%ymm6,%ymm8
6648	vpslld	$32-25,%ymm6,%ymm6
6649	vpxor	%ymm8,%ymm6,%ymm6
6650	vpsrld	$25,%ymm5,%ymm8
6651	vpslld	$32-25,%ymm5,%ymm5
6652	vpxor	%ymm8,%ymm5,%ymm5
6653	vpsrld	$25,%ymm4,%ymm8
6654	vpslld	$32-25,%ymm4,%ymm4
6655	vpxor	%ymm8,%ymm4,%ymm4
6656	vmovdqa	0+128(%rbp),%ymm8
6657	vpalignr	$4,%ymm7,%ymm7,%ymm7
6658	vpalignr	$8,%ymm11,%ymm11,%ymm11
6659	vpalignr	$12,%ymm15,%ymm15,%ymm15
6660	vpalignr	$4,%ymm6,%ymm6,%ymm6
6661	vpalignr	$8,%ymm10,%ymm10,%ymm10
6662	vpalignr	$12,%ymm14,%ymm14,%ymm14
6663	vpalignr	$4,%ymm5,%ymm5,%ymm5
6664	vpalignr	$8,%ymm9,%ymm9,%ymm9
6665	vpalignr	$12,%ymm13,%ymm13,%ymm13
6666	vpalignr	$4,%ymm4,%ymm4,%ymm4
6667	vpalignr	$8,%ymm8,%ymm8,%ymm8
6668	vpalignr	$12,%ymm12,%ymm12,%ymm12
6669	vmovdqa	%ymm8,0+128(%rbp)
6670	vmovdqa	.Lrol16(%rip),%ymm8
6671	vpaddd	%ymm7,%ymm3,%ymm3
6672	vpaddd	%ymm6,%ymm2,%ymm2
6673	vpaddd	%ymm5,%ymm1,%ymm1
6674	vpaddd	%ymm4,%ymm0,%ymm0
6675	vpxor	%ymm3,%ymm15,%ymm15
6676	vpxor	%ymm2,%ymm14,%ymm14
6677	vpxor	%ymm1,%ymm13,%ymm13
6678	vpxor	%ymm0,%ymm12,%ymm12
6679	vpshufb	%ymm8,%ymm15,%ymm15
6680	vpshufb	%ymm8,%ymm14,%ymm14
6681	vpshufb	%ymm8,%ymm13,%ymm13
6682	vpshufb	%ymm8,%ymm12,%ymm12
6683	vpaddd	%ymm15,%ymm11,%ymm11
6684	vpaddd	%ymm14,%ymm10,%ymm10
6685	vpaddd	%ymm13,%ymm9,%ymm9
6686	vpaddd	0+128(%rbp),%ymm12,%ymm8
6687	vpxor	%ymm11,%ymm7,%ymm7
6688	vpxor	%ymm10,%ymm6,%ymm6
6689	vpxor	%ymm9,%ymm5,%ymm5
6690	vpxor	%ymm8,%ymm4,%ymm4
6691	vmovdqa	%ymm8,0+128(%rbp)
6692	vpsrld	$20,%ymm7,%ymm8
6693	vpslld	$32-20,%ymm7,%ymm7
6694	vpxor	%ymm8,%ymm7,%ymm7
6695	vpsrld	$20,%ymm6,%ymm8
6696	vpslld	$32-20,%ymm6,%ymm6
6697	vpxor	%ymm8,%ymm6,%ymm6
6698	vpsrld	$20,%ymm5,%ymm8
6699	vpslld	$32-20,%ymm5,%ymm5
6700	vpxor	%ymm8,%ymm5,%ymm5
6701	vpsrld	$20,%ymm4,%ymm8
6702	vpslld	$32-20,%ymm4,%ymm4
6703	vpxor	%ymm8,%ymm4,%ymm4
6704	vmovdqa	.Lrol8(%rip),%ymm8
6705	vpaddd	%ymm7,%ymm3,%ymm3
6706	vpaddd	%ymm6,%ymm2,%ymm2
6707	vpaddd	%ymm5,%ymm1,%ymm1
6708	vpaddd	%ymm4,%ymm0,%ymm0
6709	vpxor	%ymm3,%ymm15,%ymm15
6710	vpxor	%ymm2,%ymm14,%ymm14
6711	vpxor	%ymm1,%ymm13,%ymm13
6712	vpxor	%ymm0,%ymm12,%ymm12
6713	vpshufb	%ymm8,%ymm15,%ymm15
6714	vpshufb	%ymm8,%ymm14,%ymm14
6715	vpshufb	%ymm8,%ymm13,%ymm13
6716	vpshufb	%ymm8,%ymm12,%ymm12
6717	vpaddd	%ymm15,%ymm11,%ymm11
6718	vpaddd	%ymm14,%ymm10,%ymm10
6719	vpaddd	%ymm13,%ymm9,%ymm9
6720	vpaddd	0+128(%rbp),%ymm12,%ymm8
6721	vpxor	%ymm11,%ymm7,%ymm7
6722	vpxor	%ymm10,%ymm6,%ymm6
6723	vpxor	%ymm9,%ymm5,%ymm5
6724	vpxor	%ymm8,%ymm4,%ymm4
6725	vmovdqa	%ymm8,0+128(%rbp)
6726	vpsrld	$25,%ymm7,%ymm8
6727	vpslld	$32-25,%ymm7,%ymm7
6728	vpxor	%ymm8,%ymm7,%ymm7
6729	vpsrld	$25,%ymm6,%ymm8
6730	vpslld	$32-25,%ymm6,%ymm6
6731	vpxor	%ymm8,%ymm6,%ymm6
6732	vpsrld	$25,%ymm5,%ymm8
6733	vpslld	$32-25,%ymm5,%ymm5
6734	vpxor	%ymm8,%ymm5,%ymm5
6735	vpsrld	$25,%ymm4,%ymm8
6736	vpslld	$32-25,%ymm4,%ymm4
6737	vpxor	%ymm8,%ymm4,%ymm4
6738	vmovdqa	0+128(%rbp),%ymm8
6739	vpalignr	$12,%ymm7,%ymm7,%ymm7
6740	vpalignr	$8,%ymm11,%ymm11,%ymm11
6741	vpalignr	$4,%ymm15,%ymm15,%ymm15
6742	vpalignr	$12,%ymm6,%ymm6,%ymm6
6743	vpalignr	$8,%ymm10,%ymm10,%ymm10
6744	vpalignr	$4,%ymm14,%ymm14,%ymm14
6745	vpalignr	$12,%ymm5,%ymm5,%ymm5
6746	vpalignr	$8,%ymm9,%ymm9,%ymm9
6747	vpalignr	$4,%ymm13,%ymm13,%ymm13
6748	vpalignr	$12,%ymm4,%ymm4,%ymm4
6749	vpalignr	$8,%ymm8,%ymm8,%ymm8
6750	vpalignr	$4,%ymm12,%ymm12,%ymm12
6751	vmovdqa	%ymm8,0+128(%rbp)
6752	vmovdqa	.Lrol16(%rip),%ymm8
6753	vpaddd	%ymm7,%ymm3,%ymm3
6754	vpaddd	%ymm6,%ymm2,%ymm2
6755	vpaddd	%ymm5,%ymm1,%ymm1
6756	vpaddd	%ymm4,%ymm0,%ymm0
6757	vpxor	%ymm3,%ymm15,%ymm15
6758	vpxor	%ymm2,%ymm14,%ymm14
6759	vpxor	%ymm1,%ymm13,%ymm13
6760	vpxor	%ymm0,%ymm12,%ymm12
6761	vpshufb	%ymm8,%ymm15,%ymm15
6762	vpshufb	%ymm8,%ymm14,%ymm14
6763	vpshufb	%ymm8,%ymm13,%ymm13
6764	vpshufb	%ymm8,%ymm12,%ymm12
6765	vpaddd	%ymm15,%ymm11,%ymm11
6766	vpaddd	%ymm14,%ymm10,%ymm10
6767	vpaddd	%ymm13,%ymm9,%ymm9
6768	vpaddd	0+128(%rbp),%ymm12,%ymm8
6769	vpxor	%ymm11,%ymm7,%ymm7
6770	vpxor	%ymm10,%ymm6,%ymm6
6771	vpxor	%ymm9,%ymm5,%ymm5
6772	vpxor	%ymm8,%ymm4,%ymm4
6773	vmovdqa	%ymm8,0+128(%rbp)
6774	vpsrld	$20,%ymm7,%ymm8
6775	vpslld	$32-20,%ymm7,%ymm7
6776	vpxor	%ymm8,%ymm7,%ymm7
6777	vpsrld	$20,%ymm6,%ymm8
6778	vpslld	$32-20,%ymm6,%ymm6
6779	vpxor	%ymm8,%ymm6,%ymm6
6780	vpsrld	$20,%ymm5,%ymm8
6781	vpslld	$32-20,%ymm5,%ymm5
6782	vpxor	%ymm8,%ymm5,%ymm5
6783	vpsrld	$20,%ymm4,%ymm8
6784	vpslld	$32-20,%ymm4,%ymm4
6785	vpxor	%ymm8,%ymm4,%ymm4
6786	vmovdqa	.Lrol8(%rip),%ymm8
6787	vpaddd	%ymm7,%ymm3,%ymm3
6788	vpaddd	%ymm6,%ymm2,%ymm2
6789	vpaddd	%ymm5,%ymm1,%ymm1
6790	vpaddd	%ymm4,%ymm0,%ymm0
6791	vpxor	%ymm3,%ymm15,%ymm15
6792
6793	subq	$16,%rdi
6794	movq	$9,%rcx
6795	jmp	.Lseal_avx2_main_loop_rounds_entry
6796.align	32
6797.Lseal_avx2_main_loop:
6798	vmovdqa	.Lchacha20_consts(%rip),%ymm0
6799	vmovdqa	0+64(%rbp),%ymm4
6800	vmovdqa	0+96(%rbp),%ymm8
6801	vmovdqa	%ymm0,%ymm1
6802	vmovdqa	%ymm4,%ymm5
6803	vmovdqa	%ymm8,%ymm9
6804	vmovdqa	%ymm0,%ymm2
6805	vmovdqa	%ymm4,%ymm6
6806	vmovdqa	%ymm8,%ymm10
6807	vmovdqa	%ymm0,%ymm3
6808	vmovdqa	%ymm4,%ymm7
6809	vmovdqa	%ymm8,%ymm11
6810	vmovdqa	.Lavx2_inc(%rip),%ymm12
6811	vpaddd	0+160(%rbp),%ymm12,%ymm15
6812	vpaddd	%ymm15,%ymm12,%ymm14
6813	vpaddd	%ymm14,%ymm12,%ymm13
6814	vpaddd	%ymm13,%ymm12,%ymm12
6815	vmovdqa	%ymm15,0+256(%rbp)
6816	vmovdqa	%ymm14,0+224(%rbp)
6817	vmovdqa	%ymm13,0+192(%rbp)
6818	vmovdqa	%ymm12,0+160(%rbp)
6819
6820	movq	$10,%rcx
6821.align	32
6822.Lseal_avx2_main_loop_rounds:
6823	addq	0+0(%rdi),%r10
6824	adcq	8+0(%rdi),%r11
6825	adcq	$1,%r12
6826	vmovdqa	%ymm8,0+128(%rbp)
6827	vmovdqa	.Lrol16(%rip),%ymm8
6828	vpaddd	%ymm7,%ymm3,%ymm3
6829	vpaddd	%ymm6,%ymm2,%ymm2
6830	vpaddd	%ymm5,%ymm1,%ymm1
6831	vpaddd	%ymm4,%ymm0,%ymm0
6832	vpxor	%ymm3,%ymm15,%ymm15
6833	vpxor	%ymm2,%ymm14,%ymm14
6834	vpxor	%ymm1,%ymm13,%ymm13
6835	vpxor	%ymm0,%ymm12,%ymm12
6836	movq	0+0+0(%rbp),%rdx
6837	movq	%rdx,%r15
6838	mulxq	%r10,%r13,%r14
6839	mulxq	%r11,%rax,%rdx
6840	imulq	%r12,%r15
6841	addq	%rax,%r14
6842	adcq	%rdx,%r15
6843	vpshufb	%ymm8,%ymm15,%ymm15
6844	vpshufb	%ymm8,%ymm14,%ymm14
6845	vpshufb	%ymm8,%ymm13,%ymm13
6846	vpshufb	%ymm8,%ymm12,%ymm12
6847	vpaddd	%ymm15,%ymm11,%ymm11
6848	vpaddd	%ymm14,%ymm10,%ymm10
6849	vpaddd	%ymm13,%ymm9,%ymm9
6850	vpaddd	0+128(%rbp),%ymm12,%ymm8
6851	vpxor	%ymm11,%ymm7,%ymm7
6852	movq	8+0+0(%rbp),%rdx
6853	mulxq	%r10,%r10,%rax
6854	addq	%r10,%r14
6855	mulxq	%r11,%r11,%r9
6856	adcq	%r11,%r15
6857	adcq	$0,%r9
6858	imulq	%r12,%rdx
6859	vpxor	%ymm10,%ymm6,%ymm6
6860	vpxor	%ymm9,%ymm5,%ymm5
6861	vpxor	%ymm8,%ymm4,%ymm4
6862	vmovdqa	%ymm8,0+128(%rbp)
6863	vpsrld	$20,%ymm7,%ymm8
6864	vpslld	$32-20,%ymm7,%ymm7
6865	vpxor	%ymm8,%ymm7,%ymm7
6866	vpsrld	$20,%ymm6,%ymm8
6867	vpslld	$32-20,%ymm6,%ymm6
6868	vpxor	%ymm8,%ymm6,%ymm6
6869	vpsrld	$20,%ymm5,%ymm8
6870	vpslld	$32-20,%ymm5,%ymm5
6871	addq	%rax,%r15
6872	adcq	%rdx,%r9
6873	vpxor	%ymm8,%ymm5,%ymm5
6874	vpsrld	$20,%ymm4,%ymm8
6875	vpslld	$32-20,%ymm4,%ymm4
6876	vpxor	%ymm8,%ymm4,%ymm4
6877	vmovdqa	.Lrol8(%rip),%ymm8
6878	vpaddd	%ymm7,%ymm3,%ymm3
6879	vpaddd	%ymm6,%ymm2,%ymm2
6880	vpaddd	%ymm5,%ymm1,%ymm1
6881	vpaddd	%ymm4,%ymm0,%ymm0
6882	vpxor	%ymm3,%ymm15,%ymm15
6883	movq	%r13,%r10
6884	movq	%r14,%r11
6885	movq	%r15,%r12
6886	andq	$3,%r12
6887	movq	%r15,%r13
6888	andq	$-4,%r13
6889	movq	%r9,%r14
6890	shrdq	$2,%r9,%r15
6891	shrq	$2,%r9
6892	addq	%r13,%r15
6893	adcq	%r14,%r9
6894	addq	%r15,%r10
6895	adcq	%r9,%r11
6896	adcq	$0,%r12
6897
6898.Lseal_avx2_main_loop_rounds_entry:
6899	vpxor	%ymm2,%ymm14,%ymm14
6900	vpxor	%ymm1,%ymm13,%ymm13
6901	vpxor	%ymm0,%ymm12,%ymm12
6902	vpshufb	%ymm8,%ymm15,%ymm15
6903	vpshufb	%ymm8,%ymm14,%ymm14
6904	vpshufb	%ymm8,%ymm13,%ymm13
6905	vpshufb	%ymm8,%ymm12,%ymm12
6906	vpaddd	%ymm15,%ymm11,%ymm11
6907	vpaddd	%ymm14,%ymm10,%ymm10
6908	addq	0+16(%rdi),%r10
6909	adcq	8+16(%rdi),%r11
6910	adcq	$1,%r12
6911	vpaddd	%ymm13,%ymm9,%ymm9
6912	vpaddd	0+128(%rbp),%ymm12,%ymm8
6913	vpxor	%ymm11,%ymm7,%ymm7
6914	vpxor	%ymm10,%ymm6,%ymm6
6915	vpxor	%ymm9,%ymm5,%ymm5
6916	vpxor	%ymm8,%ymm4,%ymm4
6917	vmovdqa	%ymm8,0+128(%rbp)
6918	vpsrld	$25,%ymm7,%ymm8
6919	movq	0+0+0(%rbp),%rdx
6920	movq	%rdx,%r15
6921	mulxq	%r10,%r13,%r14
6922	mulxq	%r11,%rax,%rdx
6923	imulq	%r12,%r15
6924	addq	%rax,%r14
6925	adcq	%rdx,%r15
6926	vpslld	$32-25,%ymm7,%ymm7
6927	vpxor	%ymm8,%ymm7,%ymm7
6928	vpsrld	$25,%ymm6,%ymm8
6929	vpslld	$32-25,%ymm6,%ymm6
6930	vpxor	%ymm8,%ymm6,%ymm6
6931	vpsrld	$25,%ymm5,%ymm8
6932	vpslld	$32-25,%ymm5,%ymm5
6933	vpxor	%ymm8,%ymm5,%ymm5
6934	vpsrld	$25,%ymm4,%ymm8
6935	vpslld	$32-25,%ymm4,%ymm4
6936	vpxor	%ymm8,%ymm4,%ymm4
6937	vmovdqa	0+128(%rbp),%ymm8
6938	vpalignr	$4,%ymm7,%ymm7,%ymm7
6939	vpalignr	$8,%ymm11,%ymm11,%ymm11
6940	vpalignr	$12,%ymm15,%ymm15,%ymm15
6941	vpalignr	$4,%ymm6,%ymm6,%ymm6
6942	vpalignr	$8,%ymm10,%ymm10,%ymm10
6943	vpalignr	$12,%ymm14,%ymm14,%ymm14
6944	movq	8+0+0(%rbp),%rdx
6945	mulxq	%r10,%r10,%rax
6946	addq	%r10,%r14
6947	mulxq	%r11,%r11,%r9
6948	adcq	%r11,%r15
6949	adcq	$0,%r9
6950	imulq	%r12,%rdx
6951	vpalignr	$4,%ymm5,%ymm5,%ymm5
6952	vpalignr	$8,%ymm9,%ymm9,%ymm9
6953	vpalignr	$12,%ymm13,%ymm13,%ymm13
6954	vpalignr	$4,%ymm4,%ymm4,%ymm4
6955	vpalignr	$8,%ymm8,%ymm8,%ymm8
6956	vpalignr	$12,%ymm12,%ymm12,%ymm12
6957	vmovdqa	%ymm8,0+128(%rbp)
6958	vmovdqa	.Lrol16(%rip),%ymm8
6959	vpaddd	%ymm7,%ymm3,%ymm3
6960	vpaddd	%ymm6,%ymm2,%ymm2
6961	vpaddd	%ymm5,%ymm1,%ymm1
6962	vpaddd	%ymm4,%ymm0,%ymm0
6963	vpxor	%ymm3,%ymm15,%ymm15
6964	vpxor	%ymm2,%ymm14,%ymm14
6965	vpxor	%ymm1,%ymm13,%ymm13
6966	vpxor	%ymm0,%ymm12,%ymm12
6967	vpshufb	%ymm8,%ymm15,%ymm15
6968	vpshufb	%ymm8,%ymm14,%ymm14
6969	addq	%rax,%r15
6970	adcq	%rdx,%r9
6971	vpshufb	%ymm8,%ymm13,%ymm13
6972	vpshufb	%ymm8,%ymm12,%ymm12
6973	vpaddd	%ymm15,%ymm11,%ymm11
6974	vpaddd	%ymm14,%ymm10,%ymm10
6975	vpaddd	%ymm13,%ymm9,%ymm9
6976	vpaddd	0+128(%rbp),%ymm12,%ymm8
6977	vpxor	%ymm11,%ymm7,%ymm7
6978	vpxor	%ymm10,%ymm6,%ymm6
6979	vpxor	%ymm9,%ymm5,%ymm5
6980	movq	%r13,%r10
6981	movq	%r14,%r11
6982	movq	%r15,%r12
6983	andq	$3,%r12
6984	movq	%r15,%r13
6985	andq	$-4,%r13
6986	movq	%r9,%r14
6987	shrdq	$2,%r9,%r15
6988	shrq	$2,%r9
6989	addq	%r13,%r15
6990	adcq	%r14,%r9
6991	addq	%r15,%r10
6992	adcq	%r9,%r11
6993	adcq	$0,%r12
6994	vpxor	%ymm8,%ymm4,%ymm4
6995	vmovdqa	%ymm8,0+128(%rbp)
6996	vpsrld	$20,%ymm7,%ymm8
6997	vpslld	$32-20,%ymm7,%ymm7
6998	vpxor	%ymm8,%ymm7,%ymm7
6999	vpsrld	$20,%ymm6,%ymm8
7000	vpslld	$32-20,%ymm6,%ymm6
7001	vpxor	%ymm8,%ymm6,%ymm6
7002	addq	0+32(%rdi),%r10
7003	adcq	8+32(%rdi),%r11
7004	adcq	$1,%r12
7005
7006	leaq	48(%rdi),%rdi
7007	vpsrld	$20,%ymm5,%ymm8
7008	vpslld	$32-20,%ymm5,%ymm5
7009	vpxor	%ymm8,%ymm5,%ymm5
7010	vpsrld	$20,%ymm4,%ymm8
7011	vpslld	$32-20,%ymm4,%ymm4
7012	vpxor	%ymm8,%ymm4,%ymm4
7013	vmovdqa	.Lrol8(%rip),%ymm8
7014	vpaddd	%ymm7,%ymm3,%ymm3
7015	vpaddd	%ymm6,%ymm2,%ymm2
7016	vpaddd	%ymm5,%ymm1,%ymm1
7017	vpaddd	%ymm4,%ymm0,%ymm0
7018	vpxor	%ymm3,%ymm15,%ymm15
7019	vpxor	%ymm2,%ymm14,%ymm14
7020	vpxor	%ymm1,%ymm13,%ymm13
7021	vpxor	%ymm0,%ymm12,%ymm12
7022	vpshufb	%ymm8,%ymm15,%ymm15
7023	vpshufb	%ymm8,%ymm14,%ymm14
7024	vpshufb	%ymm8,%ymm13,%ymm13
7025	movq	0+0+0(%rbp),%rdx
7026	movq	%rdx,%r15
7027	mulxq	%r10,%r13,%r14
7028	mulxq	%r11,%rax,%rdx
7029	imulq	%r12,%r15
7030	addq	%rax,%r14
7031	adcq	%rdx,%r15
7032	vpshufb	%ymm8,%ymm12,%ymm12
7033	vpaddd	%ymm15,%ymm11,%ymm11
7034	vpaddd	%ymm14,%ymm10,%ymm10
7035	vpaddd	%ymm13,%ymm9,%ymm9
7036	vpaddd	0+128(%rbp),%ymm12,%ymm8
7037	vpxor	%ymm11,%ymm7,%ymm7
7038	vpxor	%ymm10,%ymm6,%ymm6
7039	vpxor	%ymm9,%ymm5,%ymm5
7040	movq	8+0+0(%rbp),%rdx
7041	mulxq	%r10,%r10,%rax
7042	addq	%r10,%r14
7043	mulxq	%r11,%r11,%r9
7044	adcq	%r11,%r15
7045	adcq	$0,%r9
7046	imulq	%r12,%rdx
7047	vpxor	%ymm8,%ymm4,%ymm4
7048	vmovdqa	%ymm8,0+128(%rbp)
7049	vpsrld	$25,%ymm7,%ymm8
7050	vpslld	$32-25,%ymm7,%ymm7
7051	vpxor	%ymm8,%ymm7,%ymm7
7052	vpsrld	$25,%ymm6,%ymm8
7053	vpslld	$32-25,%ymm6,%ymm6
7054	vpxor	%ymm8,%ymm6,%ymm6
7055	addq	%rax,%r15
7056	adcq	%rdx,%r9
7057	vpsrld	$25,%ymm5,%ymm8
7058	vpslld	$32-25,%ymm5,%ymm5
7059	vpxor	%ymm8,%ymm5,%ymm5
7060	vpsrld	$25,%ymm4,%ymm8
7061	vpslld	$32-25,%ymm4,%ymm4
7062	vpxor	%ymm8,%ymm4,%ymm4
7063	vmovdqa	0+128(%rbp),%ymm8
7064	vpalignr	$12,%ymm7,%ymm7,%ymm7
7065	vpalignr	$8,%ymm11,%ymm11,%ymm11
7066	vpalignr	$4,%ymm15,%ymm15,%ymm15
7067	vpalignr	$12,%ymm6,%ymm6,%ymm6
7068	vpalignr	$8,%ymm10,%ymm10,%ymm10
7069	vpalignr	$4,%ymm14,%ymm14,%ymm14
7070	vpalignr	$12,%ymm5,%ymm5,%ymm5
7071	vpalignr	$8,%ymm9,%ymm9,%ymm9
7072	vpalignr	$4,%ymm13,%ymm13,%ymm13
7073	vpalignr	$12,%ymm4,%ymm4,%ymm4
7074	vpalignr	$8,%ymm8,%ymm8,%ymm8
7075	movq	%r13,%r10
7076	movq	%r14,%r11
7077	movq	%r15,%r12
7078	andq	$3,%r12
7079	movq	%r15,%r13
7080	andq	$-4,%r13
7081	movq	%r9,%r14
7082	shrdq	$2,%r9,%r15
7083	shrq	$2,%r9
7084	addq	%r13,%r15
7085	adcq	%r14,%r9
7086	addq	%r15,%r10
7087	adcq	%r9,%r11
7088	adcq	$0,%r12
7089	vpalignr	$4,%ymm12,%ymm12,%ymm12
7090
7091	decq	%rcx
7092	jne	.Lseal_avx2_main_loop_rounds
7093	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
7094	vpaddd	0+64(%rbp),%ymm7,%ymm7
7095	vpaddd	0+96(%rbp),%ymm11,%ymm11
7096	vpaddd	0+256(%rbp),%ymm15,%ymm15
7097	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
7098	vpaddd	0+64(%rbp),%ymm6,%ymm6
7099	vpaddd	0+96(%rbp),%ymm10,%ymm10
7100	vpaddd	0+224(%rbp),%ymm14,%ymm14
7101	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
7102	vpaddd	0+64(%rbp),%ymm5,%ymm5
7103	vpaddd	0+96(%rbp),%ymm9,%ymm9
7104	vpaddd	0+192(%rbp),%ymm13,%ymm13
7105	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
7106	vpaddd	0+64(%rbp),%ymm4,%ymm4
7107	vpaddd	0+96(%rbp),%ymm8,%ymm8
7108	vpaddd	0+160(%rbp),%ymm12,%ymm12
7109
7110	vmovdqa	%ymm0,0+128(%rbp)
7111	addq	0+0(%rdi),%r10
7112	adcq	8+0(%rdi),%r11
7113	adcq	$1,%r12
7114	movq	0+0+0(%rbp),%rdx
7115	movq	%rdx,%r15
7116	mulxq	%r10,%r13,%r14
7117	mulxq	%r11,%rax,%rdx
7118	imulq	%r12,%r15
7119	addq	%rax,%r14
7120	adcq	%rdx,%r15
7121	movq	8+0+0(%rbp),%rdx
7122	mulxq	%r10,%r10,%rax
7123	addq	%r10,%r14
7124	mulxq	%r11,%r11,%r9
7125	adcq	%r11,%r15
7126	adcq	$0,%r9
7127	imulq	%r12,%rdx
7128	addq	%rax,%r15
7129	adcq	%rdx,%r9
7130	movq	%r13,%r10
7131	movq	%r14,%r11
7132	movq	%r15,%r12
7133	andq	$3,%r12
7134	movq	%r15,%r13
7135	andq	$-4,%r13
7136	movq	%r9,%r14
7137	shrdq	$2,%r9,%r15
7138	shrq	$2,%r9
7139	addq	%r13,%r15
7140	adcq	%r14,%r9
7141	addq	%r15,%r10
7142	adcq	%r9,%r11
7143	adcq	$0,%r12
7144	addq	0+16(%rdi),%r10
7145	adcq	8+16(%rdi),%r11
7146	adcq	$1,%r12
7147	movq	0+0+0(%rbp),%rdx
7148	movq	%rdx,%r15
7149	mulxq	%r10,%r13,%r14
7150	mulxq	%r11,%rax,%rdx
7151	imulq	%r12,%r15
7152	addq	%rax,%r14
7153	adcq	%rdx,%r15
7154	movq	8+0+0(%rbp),%rdx
7155	mulxq	%r10,%r10,%rax
7156	addq	%r10,%r14
7157	mulxq	%r11,%r11,%r9
7158	adcq	%r11,%r15
7159	adcq	$0,%r9
7160	imulq	%r12,%rdx
7161	addq	%rax,%r15
7162	adcq	%rdx,%r9
7163	movq	%r13,%r10
7164	movq	%r14,%r11
7165	movq	%r15,%r12
7166	andq	$3,%r12
7167	movq	%r15,%r13
7168	andq	$-4,%r13
7169	movq	%r9,%r14
7170	shrdq	$2,%r9,%r15
7171	shrq	$2,%r9
7172	addq	%r13,%r15
7173	adcq	%r14,%r9
7174	addq	%r15,%r10
7175	adcq	%r9,%r11
7176	adcq	$0,%r12
7177
7178	leaq	32(%rdi),%rdi
7179	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
7180	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
7181	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
7182	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
7183	vpxor	0+0(%rsi),%ymm0,%ymm0
7184	vpxor	32+0(%rsi),%ymm3,%ymm3
7185	vpxor	64+0(%rsi),%ymm7,%ymm7
7186	vpxor	96+0(%rsi),%ymm11,%ymm11
7187	vmovdqu	%ymm0,0+0(%rdi)
7188	vmovdqu	%ymm3,32+0(%rdi)
7189	vmovdqu	%ymm7,64+0(%rdi)
7190	vmovdqu	%ymm11,96+0(%rdi)
7191
7192	vmovdqa	0+128(%rbp),%ymm0
7193	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
7194	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
7195	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
7196	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
7197	vpxor	0+128(%rsi),%ymm3,%ymm3
7198	vpxor	32+128(%rsi),%ymm2,%ymm2
7199	vpxor	64+128(%rsi),%ymm6,%ymm6
7200	vpxor	96+128(%rsi),%ymm10,%ymm10
7201	vmovdqu	%ymm3,0+128(%rdi)
7202	vmovdqu	%ymm2,32+128(%rdi)
7203	vmovdqu	%ymm6,64+128(%rdi)
7204	vmovdqu	%ymm10,96+128(%rdi)
7205	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
7206	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
7207	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
7208	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
7209	vpxor	0+256(%rsi),%ymm3,%ymm3
7210	vpxor	32+256(%rsi),%ymm1,%ymm1
7211	vpxor	64+256(%rsi),%ymm5,%ymm5
7212	vpxor	96+256(%rsi),%ymm9,%ymm9
7213	vmovdqu	%ymm3,0+256(%rdi)
7214	vmovdqu	%ymm1,32+256(%rdi)
7215	vmovdqu	%ymm5,64+256(%rdi)
7216	vmovdqu	%ymm9,96+256(%rdi)
7217	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
7218	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
7219	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
7220	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
7221	vpxor	0+384(%rsi),%ymm3,%ymm3
7222	vpxor	32+384(%rsi),%ymm0,%ymm0
7223	vpxor	64+384(%rsi),%ymm4,%ymm4
7224	vpxor	96+384(%rsi),%ymm8,%ymm8
7225	vmovdqu	%ymm3,0+384(%rdi)
7226	vmovdqu	%ymm0,32+384(%rdi)
7227	vmovdqu	%ymm4,64+384(%rdi)
7228	vmovdqu	%ymm8,96+384(%rdi)
7229
7230	leaq	512(%rsi),%rsi
7231	subq	$512,%rbx
7232	cmpq	$512,%rbx
7233	jg	.Lseal_avx2_main_loop
7234
7235	addq	0+0(%rdi),%r10
7236	adcq	8+0(%rdi),%r11
7237	adcq	$1,%r12
7238	movq	0+0+0(%rbp),%rdx
7239	movq	%rdx,%r15
7240	mulxq	%r10,%r13,%r14
7241	mulxq	%r11,%rax,%rdx
7242	imulq	%r12,%r15
7243	addq	%rax,%r14
7244	adcq	%rdx,%r15
7245	movq	8+0+0(%rbp),%rdx
7246	mulxq	%r10,%r10,%rax
7247	addq	%r10,%r14
7248	mulxq	%r11,%r11,%r9
7249	adcq	%r11,%r15
7250	adcq	$0,%r9
7251	imulq	%r12,%rdx
7252	addq	%rax,%r15
7253	adcq	%rdx,%r9
7254	movq	%r13,%r10
7255	movq	%r14,%r11
7256	movq	%r15,%r12
7257	andq	$3,%r12
7258	movq	%r15,%r13
7259	andq	$-4,%r13
7260	movq	%r9,%r14
7261	shrdq	$2,%r9,%r15
7262	shrq	$2,%r9
7263	addq	%r13,%r15
7264	adcq	%r14,%r9
7265	addq	%r15,%r10
7266	adcq	%r9,%r11
7267	adcq	$0,%r12
7268	addq	0+16(%rdi),%r10
7269	adcq	8+16(%rdi),%r11
7270	adcq	$1,%r12
7271	movq	0+0+0(%rbp),%rdx
7272	movq	%rdx,%r15
7273	mulxq	%r10,%r13,%r14
7274	mulxq	%r11,%rax,%rdx
7275	imulq	%r12,%r15
7276	addq	%rax,%r14
7277	adcq	%rdx,%r15
7278	movq	8+0+0(%rbp),%rdx
7279	mulxq	%r10,%r10,%rax
7280	addq	%r10,%r14
7281	mulxq	%r11,%r11,%r9
7282	adcq	%r11,%r15
7283	adcq	$0,%r9
7284	imulq	%r12,%rdx
7285	addq	%rax,%r15
7286	adcq	%rdx,%r9
7287	movq	%r13,%r10
7288	movq	%r14,%r11
7289	movq	%r15,%r12
7290	andq	$3,%r12
7291	movq	%r15,%r13
7292	andq	$-4,%r13
7293	movq	%r9,%r14
7294	shrdq	$2,%r9,%r15
7295	shrq	$2,%r9
7296	addq	%r13,%r15
7297	adcq	%r14,%r9
7298	addq	%r15,%r10
7299	adcq	%r9,%r11
7300	adcq	$0,%r12
7301
7302	leaq	32(%rdi),%rdi
7303	movq	$10,%rcx
7304	xorq	%r8,%r8
7305
7306	cmpq	$384,%rbx
7307	ja	.Lseal_avx2_tail_512
7308	cmpq	$256,%rbx
7309	ja	.Lseal_avx2_tail_384
7310	cmpq	$128,%rbx
7311	ja	.Lseal_avx2_tail_256
7312
7313.Lseal_avx2_tail_128:
7314	vmovdqa	.Lchacha20_consts(%rip),%ymm0
7315	vmovdqa	0+64(%rbp),%ymm4
7316	vmovdqa	0+96(%rbp),%ymm8
7317	vmovdqa	.Lavx2_inc(%rip),%ymm12
7318	vpaddd	0+160(%rbp),%ymm12,%ymm12
7319	vmovdqa	%ymm12,0+160(%rbp)
7320
7321.Lseal_avx2_tail_128_rounds_and_3xhash:
7322	addq	0+0(%rdi),%r10
7323	adcq	8+0(%rdi),%r11
7324	adcq	$1,%r12
7325	movq	0+0+0(%rbp),%rdx
7326	movq	%rdx,%r15
7327	mulxq	%r10,%r13,%r14
7328	mulxq	%r11,%rax,%rdx
7329	imulq	%r12,%r15
7330	addq	%rax,%r14
7331	adcq	%rdx,%r15
7332	movq	8+0+0(%rbp),%rdx
7333	mulxq	%r10,%r10,%rax
7334	addq	%r10,%r14
7335	mulxq	%r11,%r11,%r9
7336	adcq	%r11,%r15
7337	adcq	$0,%r9
7338	imulq	%r12,%rdx
7339	addq	%rax,%r15
7340	adcq	%rdx,%r9
7341	movq	%r13,%r10
7342	movq	%r14,%r11
7343	movq	%r15,%r12
7344	andq	$3,%r12
7345	movq	%r15,%r13
7346	andq	$-4,%r13
7347	movq	%r9,%r14
7348	shrdq	$2,%r9,%r15
7349	shrq	$2,%r9
7350	addq	%r13,%r15
7351	adcq	%r14,%r9
7352	addq	%r15,%r10
7353	adcq	%r9,%r11
7354	adcq	$0,%r12
7355
7356	leaq	16(%rdi),%rdi
7357.Lseal_avx2_tail_128_rounds_and_2xhash:
7358	vpaddd	%ymm4,%ymm0,%ymm0
7359	vpxor	%ymm0,%ymm12,%ymm12
7360	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
7361	vpaddd	%ymm12,%ymm8,%ymm8
7362	vpxor	%ymm8,%ymm4,%ymm4
7363	vpsrld	$20,%ymm4,%ymm3
7364	vpslld	$12,%ymm4,%ymm4
7365	vpxor	%ymm3,%ymm4,%ymm4
7366	vpaddd	%ymm4,%ymm0,%ymm0
7367	vpxor	%ymm0,%ymm12,%ymm12
7368	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
7369	vpaddd	%ymm12,%ymm8,%ymm8
7370	vpxor	%ymm8,%ymm4,%ymm4
7371	vpslld	$7,%ymm4,%ymm3
7372	vpsrld	$25,%ymm4,%ymm4
7373	vpxor	%ymm3,%ymm4,%ymm4
7374	vpalignr	$12,%ymm12,%ymm12,%ymm12
7375	vpalignr	$8,%ymm8,%ymm8,%ymm8
7376	vpalignr	$4,%ymm4,%ymm4,%ymm4
7377	addq	0+0(%rdi),%r10
7378	adcq	8+0(%rdi),%r11
7379	adcq	$1,%r12
7380	movq	0+0+0(%rbp),%rdx
7381	movq	%rdx,%r15
7382	mulxq	%r10,%r13,%r14
7383	mulxq	%r11,%rax,%rdx
7384	imulq	%r12,%r15
7385	addq	%rax,%r14
7386	adcq	%rdx,%r15
7387	movq	8+0+0(%rbp),%rdx
7388	mulxq	%r10,%r10,%rax
7389	addq	%r10,%r14
7390	mulxq	%r11,%r11,%r9
7391	adcq	%r11,%r15
7392	adcq	$0,%r9
7393	imulq	%r12,%rdx
7394	addq	%rax,%r15
7395	adcq	%rdx,%r9
7396	movq	%r13,%r10
7397	movq	%r14,%r11
7398	movq	%r15,%r12
7399	andq	$3,%r12
7400	movq	%r15,%r13
7401	andq	$-4,%r13
7402	movq	%r9,%r14
7403	shrdq	$2,%r9,%r15
7404	shrq	$2,%r9
7405	addq	%r13,%r15
7406	adcq	%r14,%r9
7407	addq	%r15,%r10
7408	adcq	%r9,%r11
7409	adcq	$0,%r12
7410	vpaddd	%ymm4,%ymm0,%ymm0
7411	vpxor	%ymm0,%ymm12,%ymm12
7412	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
7413	vpaddd	%ymm12,%ymm8,%ymm8
7414	vpxor	%ymm8,%ymm4,%ymm4
7415	vpsrld	$20,%ymm4,%ymm3
7416	vpslld	$12,%ymm4,%ymm4
7417	vpxor	%ymm3,%ymm4,%ymm4
7418	vpaddd	%ymm4,%ymm0,%ymm0
7419	vpxor	%ymm0,%ymm12,%ymm12
7420	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
7421	vpaddd	%ymm12,%ymm8,%ymm8
7422	vpxor	%ymm8,%ymm4,%ymm4
7423	vpslld	$7,%ymm4,%ymm3
7424	vpsrld	$25,%ymm4,%ymm4
7425	vpxor	%ymm3,%ymm4,%ymm4
7426	vpalignr	$4,%ymm12,%ymm12,%ymm12
7427	vpalignr	$8,%ymm8,%ymm8,%ymm8
7428	vpalignr	$12,%ymm4,%ymm4,%ymm4
7429	addq	0+16(%rdi),%r10
7430	adcq	8+16(%rdi),%r11
7431	adcq	$1,%r12
7432	movq	0+0+0(%rbp),%rdx
7433	movq	%rdx,%r15
7434	mulxq	%r10,%r13,%r14
7435	mulxq	%r11,%rax,%rdx
7436	imulq	%r12,%r15
7437	addq	%rax,%r14
7438	adcq	%rdx,%r15
7439	movq	8+0+0(%rbp),%rdx
7440	mulxq	%r10,%r10,%rax
7441	addq	%r10,%r14
7442	mulxq	%r11,%r11,%r9
7443	adcq	%r11,%r15
7444	adcq	$0,%r9
7445	imulq	%r12,%rdx
7446	addq	%rax,%r15
7447	adcq	%rdx,%r9
7448	movq	%r13,%r10
7449	movq	%r14,%r11
7450	movq	%r15,%r12
7451	andq	$3,%r12
7452	movq	%r15,%r13
7453	andq	$-4,%r13
7454	movq	%r9,%r14
7455	shrdq	$2,%r9,%r15
7456	shrq	$2,%r9
7457	addq	%r13,%r15
7458	adcq	%r14,%r9
7459	addq	%r15,%r10
7460	adcq	%r9,%r11
7461	adcq	$0,%r12
7462
7463	leaq	32(%rdi),%rdi
7464	decq	%rcx
7465	jg	.Lseal_avx2_tail_128_rounds_and_3xhash
7466	decq	%r8
7467	jge	.Lseal_avx2_tail_128_rounds_and_2xhash
7468	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
7469	vpaddd	0+64(%rbp),%ymm4,%ymm4
7470	vpaddd	0+96(%rbp),%ymm8,%ymm8
7471	vpaddd	0+160(%rbp),%ymm12,%ymm12
7472	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
7473	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
7474	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
7475	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
7476	vmovdqa	%ymm3,%ymm8
7477
7478	jmp	.Lseal_avx2_short_loop
7479
7480.Lseal_avx2_tail_256:
7481	vmovdqa	.Lchacha20_consts(%rip),%ymm0
7482	vmovdqa	0+64(%rbp),%ymm4
7483	vmovdqa	0+96(%rbp),%ymm8
7484	vmovdqa	%ymm0,%ymm1
7485	vmovdqa	%ymm4,%ymm5
7486	vmovdqa	%ymm8,%ymm9
7487	vmovdqa	.Lavx2_inc(%rip),%ymm12
7488	vpaddd	0+160(%rbp),%ymm12,%ymm13
7489	vpaddd	%ymm13,%ymm12,%ymm12
7490	vmovdqa	%ymm12,0+160(%rbp)
7491	vmovdqa	%ymm13,0+192(%rbp)
7492
7493.Lseal_avx2_tail_256_rounds_and_3xhash:
7494	addq	0+0(%rdi),%r10
7495	adcq	8+0(%rdi),%r11
7496	adcq	$1,%r12
7497	movq	0+0+0(%rbp),%rax
7498	movq	%rax,%r15
7499	mulq	%r10
7500	movq	%rax,%r13
7501	movq	%rdx,%r14
7502	movq	0+0+0(%rbp),%rax
7503	mulq	%r11
7504	imulq	%r12,%r15
7505	addq	%rax,%r14
7506	adcq	%rdx,%r15
7507	movq	8+0+0(%rbp),%rax
7508	movq	%rax,%r9
7509	mulq	%r10
7510	addq	%rax,%r14
7511	adcq	$0,%rdx
7512	movq	%rdx,%r10
7513	movq	8+0+0(%rbp),%rax
7514	mulq	%r11
7515	addq	%rax,%r15
7516	adcq	$0,%rdx
7517	imulq	%r12,%r9
7518	addq	%r10,%r15
7519	adcq	%rdx,%r9
7520	movq	%r13,%r10
7521	movq	%r14,%r11
7522	movq	%r15,%r12
7523	andq	$3,%r12
7524	movq	%r15,%r13
7525	andq	$-4,%r13
7526	movq	%r9,%r14
7527	shrdq	$2,%r9,%r15
7528	shrq	$2,%r9
7529	addq	%r13,%r15
7530	adcq	%r14,%r9
7531	addq	%r15,%r10
7532	adcq	%r9,%r11
7533	adcq	$0,%r12
7534
7535	leaq	16(%rdi),%rdi
7536.Lseal_avx2_tail_256_rounds_and_2xhash:
7537	vpaddd	%ymm4,%ymm0,%ymm0
7538	vpxor	%ymm0,%ymm12,%ymm12
7539	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
7540	vpaddd	%ymm12,%ymm8,%ymm8
7541	vpxor	%ymm8,%ymm4,%ymm4
7542	vpsrld	$20,%ymm4,%ymm3
7543	vpslld	$12,%ymm4,%ymm4
7544	vpxor	%ymm3,%ymm4,%ymm4
7545	vpaddd	%ymm4,%ymm0,%ymm0
7546	vpxor	%ymm0,%ymm12,%ymm12
7547	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
7548	vpaddd	%ymm12,%ymm8,%ymm8
7549	vpxor	%ymm8,%ymm4,%ymm4
7550	vpslld	$7,%ymm4,%ymm3
7551	vpsrld	$25,%ymm4,%ymm4
7552	vpxor	%ymm3,%ymm4,%ymm4
7553	vpalignr	$12,%ymm12,%ymm12,%ymm12
7554	vpalignr	$8,%ymm8,%ymm8,%ymm8
7555	vpalignr	$4,%ymm4,%ymm4,%ymm4
7556	vpaddd	%ymm5,%ymm1,%ymm1
7557	vpxor	%ymm1,%ymm13,%ymm13
7558	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
7559	vpaddd	%ymm13,%ymm9,%ymm9
7560	vpxor	%ymm9,%ymm5,%ymm5
7561	vpsrld	$20,%ymm5,%ymm3
7562	vpslld	$12,%ymm5,%ymm5
7563	vpxor	%ymm3,%ymm5,%ymm5
7564	vpaddd	%ymm5,%ymm1,%ymm1
7565	vpxor	%ymm1,%ymm13,%ymm13
7566	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
7567	vpaddd	%ymm13,%ymm9,%ymm9
7568	vpxor	%ymm9,%ymm5,%ymm5
7569	vpslld	$7,%ymm5,%ymm3
7570	vpsrld	$25,%ymm5,%ymm5
7571	vpxor	%ymm3,%ymm5,%ymm5
7572	vpalignr	$12,%ymm13,%ymm13,%ymm13
7573	vpalignr	$8,%ymm9,%ymm9,%ymm9
7574	vpalignr	$4,%ymm5,%ymm5,%ymm5
7575	addq	0+0(%rdi),%r10
7576	adcq	8+0(%rdi),%r11
7577	adcq	$1,%r12
7578	movq	0+0+0(%rbp),%rax
7579	movq	%rax,%r15
7580	mulq	%r10
7581	movq	%rax,%r13
7582	movq	%rdx,%r14
7583	movq	0+0+0(%rbp),%rax
7584	mulq	%r11
7585	imulq	%r12,%r15
7586	addq	%rax,%r14
7587	adcq	%rdx,%r15
7588	movq	8+0+0(%rbp),%rax
7589	movq	%rax,%r9
7590	mulq	%r10
7591	addq	%rax,%r14
7592	adcq	$0,%rdx
7593	movq	%rdx,%r10
7594	movq	8+0+0(%rbp),%rax
7595	mulq	%r11
7596	addq	%rax,%r15
7597	adcq	$0,%rdx
7598	imulq	%r12,%r9
7599	addq	%r10,%r15
7600	adcq	%rdx,%r9
7601	movq	%r13,%r10
7602	movq	%r14,%r11
7603	movq	%r15,%r12
7604	andq	$3,%r12
7605	movq	%r15,%r13
7606	andq	$-4,%r13
7607	movq	%r9,%r14
7608	shrdq	$2,%r9,%r15
7609	shrq	$2,%r9
7610	addq	%r13,%r15
7611	adcq	%r14,%r9
7612	addq	%r15,%r10
7613	adcq	%r9,%r11
7614	adcq	$0,%r12
7615	vpaddd	%ymm4,%ymm0,%ymm0
7616	vpxor	%ymm0,%ymm12,%ymm12
7617	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
7618	vpaddd	%ymm12,%ymm8,%ymm8
7619	vpxor	%ymm8,%ymm4,%ymm4
7620	vpsrld	$20,%ymm4,%ymm3
7621	vpslld	$12,%ymm4,%ymm4
7622	vpxor	%ymm3,%ymm4,%ymm4
7623	vpaddd	%ymm4,%ymm0,%ymm0
7624	vpxor	%ymm0,%ymm12,%ymm12
7625	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
7626	vpaddd	%ymm12,%ymm8,%ymm8
7627	vpxor	%ymm8,%ymm4,%ymm4
7628	vpslld	$7,%ymm4,%ymm3
7629	vpsrld	$25,%ymm4,%ymm4
7630	vpxor	%ymm3,%ymm4,%ymm4
7631	vpalignr	$4,%ymm12,%ymm12,%ymm12
7632	vpalignr	$8,%ymm8,%ymm8,%ymm8
7633	vpalignr	$12,%ymm4,%ymm4,%ymm4
7634	vpaddd	%ymm5,%ymm1,%ymm1
7635	vpxor	%ymm1,%ymm13,%ymm13
7636	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
7637	vpaddd	%ymm13,%ymm9,%ymm9
7638	vpxor	%ymm9,%ymm5,%ymm5
7639	vpsrld	$20,%ymm5,%ymm3
7640	vpslld	$12,%ymm5,%ymm5
7641	vpxor	%ymm3,%ymm5,%ymm5
7642	vpaddd	%ymm5,%ymm1,%ymm1
7643	vpxor	%ymm1,%ymm13,%ymm13
7644	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
7645	vpaddd	%ymm13,%ymm9,%ymm9
7646	vpxor	%ymm9,%ymm5,%ymm5
7647	vpslld	$7,%ymm5,%ymm3
7648	vpsrld	$25,%ymm5,%ymm5
7649	vpxor	%ymm3,%ymm5,%ymm5
7650	vpalignr	$4,%ymm13,%ymm13,%ymm13
7651	vpalignr	$8,%ymm9,%ymm9,%ymm9
7652	vpalignr	$12,%ymm5,%ymm5,%ymm5
7653	addq	0+16(%rdi),%r10
7654	adcq	8+16(%rdi),%r11
7655	adcq	$1,%r12
7656	movq	0+0+0(%rbp),%rax
7657	movq	%rax,%r15
7658	mulq	%r10
7659	movq	%rax,%r13
7660	movq	%rdx,%r14
7661	movq	0+0+0(%rbp),%rax
7662	mulq	%r11
7663	imulq	%r12,%r15
7664	addq	%rax,%r14
7665	adcq	%rdx,%r15
7666	movq	8+0+0(%rbp),%rax
7667	movq	%rax,%r9
7668	mulq	%r10
7669	addq	%rax,%r14
7670	adcq	$0,%rdx
7671	movq	%rdx,%r10
7672	movq	8+0+0(%rbp),%rax
7673	mulq	%r11
7674	addq	%rax,%r15
7675	adcq	$0,%rdx
7676	imulq	%r12,%r9
7677	addq	%r10,%r15
7678	adcq	%rdx,%r9
7679	movq	%r13,%r10
7680	movq	%r14,%r11
7681	movq	%r15,%r12
7682	andq	$3,%r12
7683	movq	%r15,%r13
7684	andq	$-4,%r13
7685	movq	%r9,%r14
7686	shrdq	$2,%r9,%r15
7687	shrq	$2,%r9
7688	addq	%r13,%r15
7689	adcq	%r14,%r9
7690	addq	%r15,%r10
7691	adcq	%r9,%r11
7692	adcq	$0,%r12
7693
7694	leaq	32(%rdi),%rdi
7695	decq	%rcx
7696	jg	.Lseal_avx2_tail_256_rounds_and_3xhash
7697	decq	%r8
7698	jge	.Lseal_avx2_tail_256_rounds_and_2xhash
7699	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
7700	vpaddd	0+64(%rbp),%ymm5,%ymm5
7701	vpaddd	0+96(%rbp),%ymm9,%ymm9
7702	vpaddd	0+192(%rbp),%ymm13,%ymm13
7703	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
7704	vpaddd	0+64(%rbp),%ymm4,%ymm4
7705	vpaddd	0+96(%rbp),%ymm8,%ymm8
7706	vpaddd	0+160(%rbp),%ymm12,%ymm12
7707	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
7708	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
7709	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
7710	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
7711	vpxor	0+0(%rsi),%ymm3,%ymm3
7712	vpxor	32+0(%rsi),%ymm1,%ymm1
7713	vpxor	64+0(%rsi),%ymm5,%ymm5
7714	vpxor	96+0(%rsi),%ymm9,%ymm9
7715	vmovdqu	%ymm3,0+0(%rdi)
7716	vmovdqu	%ymm1,32+0(%rdi)
7717	vmovdqu	%ymm5,64+0(%rdi)
7718	vmovdqu	%ymm9,96+0(%rdi)
7719	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
7720	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
7721	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
7722	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
7723	vmovdqa	%ymm3,%ymm8
7724
7725	movq	$128,%rcx
7726	leaq	128(%rsi),%rsi
7727	subq	$128,%rbx
7728	jmp	.Lseal_avx2_short_hash_remainder
7729
7730.Lseal_avx2_tail_384:
7731	vmovdqa	.Lchacha20_consts(%rip),%ymm0
7732	vmovdqa	0+64(%rbp),%ymm4
7733	vmovdqa	0+96(%rbp),%ymm8
7734	vmovdqa	%ymm0,%ymm1
7735	vmovdqa	%ymm4,%ymm5
7736	vmovdqa	%ymm8,%ymm9
7737	vmovdqa	%ymm0,%ymm2
7738	vmovdqa	%ymm4,%ymm6
7739	vmovdqa	%ymm8,%ymm10
7740	vmovdqa	.Lavx2_inc(%rip),%ymm12
7741	vpaddd	0+160(%rbp),%ymm12,%ymm14
7742	vpaddd	%ymm14,%ymm12,%ymm13
7743	vpaddd	%ymm13,%ymm12,%ymm12
7744	vmovdqa	%ymm12,0+160(%rbp)
7745	vmovdqa	%ymm13,0+192(%rbp)
7746	vmovdqa	%ymm14,0+224(%rbp)
7747
7748.Lseal_avx2_tail_384_rounds_and_3xhash:
7749	addq	0+0(%rdi),%r10
7750	adcq	8+0(%rdi),%r11
7751	adcq	$1,%r12
7752	movq	0+0+0(%rbp),%rax
7753	movq	%rax,%r15
7754	mulq	%r10
7755	movq	%rax,%r13
7756	movq	%rdx,%r14
7757	movq	0+0+0(%rbp),%rax
7758	mulq	%r11
7759	imulq	%r12,%r15
7760	addq	%rax,%r14
7761	adcq	%rdx,%r15
7762	movq	8+0+0(%rbp),%rax
7763	movq	%rax,%r9
7764	mulq	%r10
7765	addq	%rax,%r14
7766	adcq	$0,%rdx
7767	movq	%rdx,%r10
7768	movq	8+0+0(%rbp),%rax
7769	mulq	%r11
7770	addq	%rax,%r15
7771	adcq	$0,%rdx
7772	imulq	%r12,%r9
7773	addq	%r10,%r15
7774	adcq	%rdx,%r9
7775	movq	%r13,%r10
7776	movq	%r14,%r11
7777	movq	%r15,%r12
7778	andq	$3,%r12
7779	movq	%r15,%r13
7780	andq	$-4,%r13
7781	movq	%r9,%r14
7782	shrdq	$2,%r9,%r15
7783	shrq	$2,%r9
7784	addq	%r13,%r15
7785	adcq	%r14,%r9
7786	addq	%r15,%r10
7787	adcq	%r9,%r11
7788	adcq	$0,%r12
7789
7790	leaq	16(%rdi),%rdi
7791.Lseal_avx2_tail_384_rounds_and_2xhash:
7792	vpaddd	%ymm4,%ymm0,%ymm0
7793	vpxor	%ymm0,%ymm12,%ymm12
7794	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
7795	vpaddd	%ymm12,%ymm8,%ymm8
7796	vpxor	%ymm8,%ymm4,%ymm4
7797	vpsrld	$20,%ymm4,%ymm3
7798	vpslld	$12,%ymm4,%ymm4
7799	vpxor	%ymm3,%ymm4,%ymm4
7800	vpaddd	%ymm4,%ymm0,%ymm0
7801	vpxor	%ymm0,%ymm12,%ymm12
7802	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
7803	vpaddd	%ymm12,%ymm8,%ymm8
7804	vpxor	%ymm8,%ymm4,%ymm4
7805	vpslld	$7,%ymm4,%ymm3
7806	vpsrld	$25,%ymm4,%ymm4
7807	vpxor	%ymm3,%ymm4,%ymm4
7808	vpalignr	$12,%ymm12,%ymm12,%ymm12
7809	vpalignr	$8,%ymm8,%ymm8,%ymm8
7810	vpalignr	$4,%ymm4,%ymm4,%ymm4
7811	vpaddd	%ymm5,%ymm1,%ymm1
7812	vpxor	%ymm1,%ymm13,%ymm13
7813	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
7814	vpaddd	%ymm13,%ymm9,%ymm9
7815	vpxor	%ymm9,%ymm5,%ymm5
7816	vpsrld	$20,%ymm5,%ymm3
7817	vpslld	$12,%ymm5,%ymm5
7818	vpxor	%ymm3,%ymm5,%ymm5
7819	vpaddd	%ymm5,%ymm1,%ymm1
7820	vpxor	%ymm1,%ymm13,%ymm13
7821	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
7822	vpaddd	%ymm13,%ymm9,%ymm9
7823	vpxor	%ymm9,%ymm5,%ymm5
7824	vpslld	$7,%ymm5,%ymm3
7825	vpsrld	$25,%ymm5,%ymm5
7826	vpxor	%ymm3,%ymm5,%ymm5
7827	vpalignr	$12,%ymm13,%ymm13,%ymm13
7828	vpalignr	$8,%ymm9,%ymm9,%ymm9
7829	vpalignr	$4,%ymm5,%ymm5,%ymm5
7830	addq	0+0(%rdi),%r10
7831	adcq	8+0(%rdi),%r11
7832	adcq	$1,%r12
7833	movq	0+0+0(%rbp),%rax
7834	movq	%rax,%r15
7835	mulq	%r10
7836	movq	%rax,%r13
7837	movq	%rdx,%r14
7838	movq	0+0+0(%rbp),%rax
7839	mulq	%r11
7840	imulq	%r12,%r15
7841	addq	%rax,%r14
7842	adcq	%rdx,%r15
7843	movq	8+0+0(%rbp),%rax
7844	movq	%rax,%r9
7845	mulq	%r10
7846	addq	%rax,%r14
7847	adcq	$0,%rdx
7848	movq	%rdx,%r10
7849	movq	8+0+0(%rbp),%rax
7850	mulq	%r11
7851	addq	%rax,%r15
7852	adcq	$0,%rdx
7853	imulq	%r12,%r9
7854	addq	%r10,%r15
7855	adcq	%rdx,%r9
7856	movq	%r13,%r10
7857	movq	%r14,%r11
7858	movq	%r15,%r12
7859	andq	$3,%r12
7860	movq	%r15,%r13
7861	andq	$-4,%r13
7862	movq	%r9,%r14
7863	shrdq	$2,%r9,%r15
7864	shrq	$2,%r9
7865	addq	%r13,%r15
7866	adcq	%r14,%r9
7867	addq	%r15,%r10
7868	adcq	%r9,%r11
7869	adcq	$0,%r12
7870	vpaddd	%ymm6,%ymm2,%ymm2
7871	vpxor	%ymm2,%ymm14,%ymm14
7872	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
7873	vpaddd	%ymm14,%ymm10,%ymm10
7874	vpxor	%ymm10,%ymm6,%ymm6
7875	vpsrld	$20,%ymm6,%ymm3
7876	vpslld	$12,%ymm6,%ymm6
7877	vpxor	%ymm3,%ymm6,%ymm6
7878	vpaddd	%ymm6,%ymm2,%ymm2
7879	vpxor	%ymm2,%ymm14,%ymm14
7880	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
7881	vpaddd	%ymm14,%ymm10,%ymm10
7882	vpxor	%ymm10,%ymm6,%ymm6
7883	vpslld	$7,%ymm6,%ymm3
7884	vpsrld	$25,%ymm6,%ymm6
7885	vpxor	%ymm3,%ymm6,%ymm6
7886	vpalignr	$12,%ymm14,%ymm14,%ymm14
7887	vpalignr	$8,%ymm10,%ymm10,%ymm10
7888	vpalignr	$4,%ymm6,%ymm6,%ymm6
7889	vpaddd	%ymm4,%ymm0,%ymm0
7890	vpxor	%ymm0,%ymm12,%ymm12
7891	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
7892	vpaddd	%ymm12,%ymm8,%ymm8
7893	vpxor	%ymm8,%ymm4,%ymm4
7894	vpsrld	$20,%ymm4,%ymm3
7895	vpslld	$12,%ymm4,%ymm4
7896	vpxor	%ymm3,%ymm4,%ymm4
7897	vpaddd	%ymm4,%ymm0,%ymm0
7898	vpxor	%ymm0,%ymm12,%ymm12
7899	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
7900	vpaddd	%ymm12,%ymm8,%ymm8
7901	vpxor	%ymm8,%ymm4,%ymm4
7902	vpslld	$7,%ymm4,%ymm3
7903	vpsrld	$25,%ymm4,%ymm4
7904	vpxor	%ymm3,%ymm4,%ymm4
7905	vpalignr	$4,%ymm12,%ymm12,%ymm12
7906	vpalignr	$8,%ymm8,%ymm8,%ymm8
7907	vpalignr	$12,%ymm4,%ymm4,%ymm4
7908	addq	0+16(%rdi),%r10
7909	adcq	8+16(%rdi),%r11
7910	adcq	$1,%r12
7911	movq	0+0+0(%rbp),%rax
7912	movq	%rax,%r15
7913	mulq	%r10
7914	movq	%rax,%r13
7915	movq	%rdx,%r14
7916	movq	0+0+0(%rbp),%rax
7917	mulq	%r11
7918	imulq	%r12,%r15
7919	addq	%rax,%r14
7920	adcq	%rdx,%r15
7921	movq	8+0+0(%rbp),%rax
7922	movq	%rax,%r9
7923	mulq	%r10
7924	addq	%rax,%r14
7925	adcq	$0,%rdx
7926	movq	%rdx,%r10
7927	movq	8+0+0(%rbp),%rax
7928	mulq	%r11
7929	addq	%rax,%r15
7930	adcq	$0,%rdx
7931	imulq	%r12,%r9
7932	addq	%r10,%r15
7933	adcq	%rdx,%r9
7934	movq	%r13,%r10
7935	movq	%r14,%r11
7936	movq	%r15,%r12
7937	andq	$3,%r12
7938	movq	%r15,%r13
7939	andq	$-4,%r13
7940	movq	%r9,%r14
7941	shrdq	$2,%r9,%r15
7942	shrq	$2,%r9
7943	addq	%r13,%r15
7944	adcq	%r14,%r9
7945	addq	%r15,%r10
7946	adcq	%r9,%r11
7947	adcq	$0,%r12
7948	vpaddd	%ymm5,%ymm1,%ymm1
7949	vpxor	%ymm1,%ymm13,%ymm13
7950	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
7951	vpaddd	%ymm13,%ymm9,%ymm9
7952	vpxor	%ymm9,%ymm5,%ymm5
7953	vpsrld	$20,%ymm5,%ymm3
7954	vpslld	$12,%ymm5,%ymm5
7955	vpxor	%ymm3,%ymm5,%ymm5
7956	vpaddd	%ymm5,%ymm1,%ymm1
7957	vpxor	%ymm1,%ymm13,%ymm13
7958	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
7959	vpaddd	%ymm13,%ymm9,%ymm9
7960	vpxor	%ymm9,%ymm5,%ymm5
7961	vpslld	$7,%ymm5,%ymm3
7962	vpsrld	$25,%ymm5,%ymm5
7963	vpxor	%ymm3,%ymm5,%ymm5
7964	vpalignr	$4,%ymm13,%ymm13,%ymm13
7965	vpalignr	$8,%ymm9,%ymm9,%ymm9
7966	vpalignr	$12,%ymm5,%ymm5,%ymm5
7967	vpaddd	%ymm6,%ymm2,%ymm2
7968	vpxor	%ymm2,%ymm14,%ymm14
7969	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
7970	vpaddd	%ymm14,%ymm10,%ymm10
7971	vpxor	%ymm10,%ymm6,%ymm6
7972	vpsrld	$20,%ymm6,%ymm3
7973	vpslld	$12,%ymm6,%ymm6
7974	vpxor	%ymm3,%ymm6,%ymm6
7975	vpaddd	%ymm6,%ymm2,%ymm2
7976	vpxor	%ymm2,%ymm14,%ymm14
7977	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
7978	vpaddd	%ymm14,%ymm10,%ymm10
7979	vpxor	%ymm10,%ymm6,%ymm6
7980	vpslld	$7,%ymm6,%ymm3
7981	vpsrld	$25,%ymm6,%ymm6
7982	vpxor	%ymm3,%ymm6,%ymm6
7983	vpalignr	$4,%ymm14,%ymm14,%ymm14
7984	vpalignr	$8,%ymm10,%ymm10,%ymm10
7985	vpalignr	$12,%ymm6,%ymm6,%ymm6
7986
7987	leaq	32(%rdi),%rdi
7988	decq	%rcx
7989	jg	.Lseal_avx2_tail_384_rounds_and_3xhash
7990	decq	%r8
7991	jge	.Lseal_avx2_tail_384_rounds_and_2xhash
7992	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
7993	vpaddd	0+64(%rbp),%ymm6,%ymm6
7994	vpaddd	0+96(%rbp),%ymm10,%ymm10
7995	vpaddd	0+224(%rbp),%ymm14,%ymm14
7996	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
7997	vpaddd	0+64(%rbp),%ymm5,%ymm5
7998	vpaddd	0+96(%rbp),%ymm9,%ymm9
7999	vpaddd	0+192(%rbp),%ymm13,%ymm13
8000	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
8001	vpaddd	0+64(%rbp),%ymm4,%ymm4
8002	vpaddd	0+96(%rbp),%ymm8,%ymm8
8003	vpaddd	0+160(%rbp),%ymm12,%ymm12
8004	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
8005	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
8006	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
8007	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
8008	vpxor	0+0(%rsi),%ymm3,%ymm3
8009	vpxor	32+0(%rsi),%ymm2,%ymm2
8010	vpxor	64+0(%rsi),%ymm6,%ymm6
8011	vpxor	96+0(%rsi),%ymm10,%ymm10
8012	vmovdqu	%ymm3,0+0(%rdi)
8013	vmovdqu	%ymm2,32+0(%rdi)
8014	vmovdqu	%ymm6,64+0(%rdi)
8015	vmovdqu	%ymm10,96+0(%rdi)
8016	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
8017	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
8018	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
8019	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
8020	vpxor	0+128(%rsi),%ymm3,%ymm3
8021	vpxor	32+128(%rsi),%ymm1,%ymm1
8022	vpxor	64+128(%rsi),%ymm5,%ymm5
8023	vpxor	96+128(%rsi),%ymm9,%ymm9
8024	vmovdqu	%ymm3,0+128(%rdi)
8025	vmovdqu	%ymm1,32+128(%rdi)
8026	vmovdqu	%ymm5,64+128(%rdi)
8027	vmovdqu	%ymm9,96+128(%rdi)
8028	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
8029	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
8030	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
8031	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
8032	vmovdqa	%ymm3,%ymm8
8033
8034	movq	$256,%rcx
8035	leaq	256(%rsi),%rsi
8036	subq	$256,%rbx
8037	jmp	.Lseal_avx2_short_hash_remainder
8038
8039.Lseal_avx2_tail_512:
8040	vmovdqa	.Lchacha20_consts(%rip),%ymm0
8041	vmovdqa	0+64(%rbp),%ymm4
8042	vmovdqa	0+96(%rbp),%ymm8
8043	vmovdqa	%ymm0,%ymm1
8044	vmovdqa	%ymm4,%ymm5
8045	vmovdqa	%ymm8,%ymm9
8046	vmovdqa	%ymm0,%ymm2
8047	vmovdqa	%ymm4,%ymm6
8048	vmovdqa	%ymm8,%ymm10
8049	vmovdqa	%ymm0,%ymm3
8050	vmovdqa	%ymm4,%ymm7
8051	vmovdqa	%ymm8,%ymm11
8052	vmovdqa	.Lavx2_inc(%rip),%ymm12
8053	vpaddd	0+160(%rbp),%ymm12,%ymm15
8054	vpaddd	%ymm15,%ymm12,%ymm14
8055	vpaddd	%ymm14,%ymm12,%ymm13
8056	vpaddd	%ymm13,%ymm12,%ymm12
8057	vmovdqa	%ymm15,0+256(%rbp)
8058	vmovdqa	%ymm14,0+224(%rbp)
8059	vmovdqa	%ymm13,0+192(%rbp)
8060	vmovdqa	%ymm12,0+160(%rbp)
8061
8062.Lseal_avx2_tail_512_rounds_and_3xhash:
8063	addq	0+0(%rdi),%r10
8064	adcq	8+0(%rdi),%r11
8065	adcq	$1,%r12
8066	movq	0+0+0(%rbp),%rdx
8067	movq	%rdx,%r15
8068	mulxq	%r10,%r13,%r14
8069	mulxq	%r11,%rax,%rdx
8070	imulq	%r12,%r15
8071	addq	%rax,%r14
8072	adcq	%rdx,%r15
8073	movq	8+0+0(%rbp),%rdx
8074	mulxq	%r10,%r10,%rax
8075	addq	%r10,%r14
8076	mulxq	%r11,%r11,%r9
8077	adcq	%r11,%r15
8078	adcq	$0,%r9
8079	imulq	%r12,%rdx
8080	addq	%rax,%r15
8081	adcq	%rdx,%r9
8082	movq	%r13,%r10
8083	movq	%r14,%r11
8084	movq	%r15,%r12
8085	andq	$3,%r12
8086	movq	%r15,%r13
8087	andq	$-4,%r13
8088	movq	%r9,%r14
8089	shrdq	$2,%r9,%r15
8090	shrq	$2,%r9
8091	addq	%r13,%r15
8092	adcq	%r14,%r9
8093	addq	%r15,%r10
8094	adcq	%r9,%r11
8095	adcq	$0,%r12
8096
8097	leaq	16(%rdi),%rdi
8098.Lseal_avx2_tail_512_rounds_and_2xhash:
8099	vmovdqa	%ymm8,0+128(%rbp)
8100	vmovdqa	.Lrol16(%rip),%ymm8
8101	vpaddd	%ymm7,%ymm3,%ymm3
8102	vpaddd	%ymm6,%ymm2,%ymm2
8103	vpaddd	%ymm5,%ymm1,%ymm1
8104	vpaddd	%ymm4,%ymm0,%ymm0
8105	vpxor	%ymm3,%ymm15,%ymm15
8106	vpxor	%ymm2,%ymm14,%ymm14
8107	vpxor	%ymm1,%ymm13,%ymm13
8108	vpxor	%ymm0,%ymm12,%ymm12
8109	vpshufb	%ymm8,%ymm15,%ymm15
8110	vpshufb	%ymm8,%ymm14,%ymm14
8111	vpshufb	%ymm8,%ymm13,%ymm13
8112	vpshufb	%ymm8,%ymm12,%ymm12
8113	vpaddd	%ymm15,%ymm11,%ymm11
8114	vpaddd	%ymm14,%ymm10,%ymm10
8115	vpaddd	%ymm13,%ymm9,%ymm9
8116	vpaddd	0+128(%rbp),%ymm12,%ymm8
8117	vpxor	%ymm11,%ymm7,%ymm7
8118	vpxor	%ymm10,%ymm6,%ymm6
8119	addq	0+0(%rdi),%r10
8120	adcq	8+0(%rdi),%r11
8121	adcq	$1,%r12
8122	vpxor	%ymm9,%ymm5,%ymm5
8123	vpxor	%ymm8,%ymm4,%ymm4
8124	vmovdqa	%ymm8,0+128(%rbp)
8125	vpsrld	$20,%ymm7,%ymm8
8126	vpslld	$32-20,%ymm7,%ymm7
8127	vpxor	%ymm8,%ymm7,%ymm7
8128	vpsrld	$20,%ymm6,%ymm8
8129	vpslld	$32-20,%ymm6,%ymm6
8130	vpxor	%ymm8,%ymm6,%ymm6
8131	vpsrld	$20,%ymm5,%ymm8
8132	vpslld	$32-20,%ymm5,%ymm5
8133	vpxor	%ymm8,%ymm5,%ymm5
8134	vpsrld	$20,%ymm4,%ymm8
8135	vpslld	$32-20,%ymm4,%ymm4
8136	vpxor	%ymm8,%ymm4,%ymm4
8137	vmovdqa	.Lrol8(%rip),%ymm8
8138	vpaddd	%ymm7,%ymm3,%ymm3
8139	vpaddd	%ymm6,%ymm2,%ymm2
8140	vpaddd	%ymm5,%ymm1,%ymm1
8141	vpaddd	%ymm4,%ymm0,%ymm0
8142	movq	0+0+0(%rbp),%rdx
8143	movq	%rdx,%r15
8144	mulxq	%r10,%r13,%r14
8145	mulxq	%r11,%rax,%rdx
8146	imulq	%r12,%r15
8147	addq	%rax,%r14
8148	adcq	%rdx,%r15
8149	vpxor	%ymm3,%ymm15,%ymm15
8150	vpxor	%ymm2,%ymm14,%ymm14
8151	vpxor	%ymm1,%ymm13,%ymm13
8152	vpxor	%ymm0,%ymm12,%ymm12
8153	vpshufb	%ymm8,%ymm15,%ymm15
8154	vpshufb	%ymm8,%ymm14,%ymm14
8155	vpshufb	%ymm8,%ymm13,%ymm13
8156	vpshufb	%ymm8,%ymm12,%ymm12
8157	vpaddd	%ymm15,%ymm11,%ymm11
8158	vpaddd	%ymm14,%ymm10,%ymm10
8159	vpaddd	%ymm13,%ymm9,%ymm9
8160	vpaddd	0+128(%rbp),%ymm12,%ymm8
8161	vpxor	%ymm11,%ymm7,%ymm7
8162	vpxor	%ymm10,%ymm6,%ymm6
8163	vpxor	%ymm9,%ymm5,%ymm5
8164	vpxor	%ymm8,%ymm4,%ymm4
8165	vmovdqa	%ymm8,0+128(%rbp)
8166	vpsrld	$25,%ymm7,%ymm8
8167	vpslld	$32-25,%ymm7,%ymm7
8168	vpxor	%ymm8,%ymm7,%ymm7
8169	movq	8+0+0(%rbp),%rdx
8170	mulxq	%r10,%r10,%rax
8171	addq	%r10,%r14
8172	mulxq	%r11,%r11,%r9
8173	adcq	%r11,%r15
8174	adcq	$0,%r9
8175	imulq	%r12,%rdx
8176	vpsrld	$25,%ymm6,%ymm8
8177	vpslld	$32-25,%ymm6,%ymm6
8178	vpxor	%ymm8,%ymm6,%ymm6
8179	vpsrld	$25,%ymm5,%ymm8
8180	vpslld	$32-25,%ymm5,%ymm5
8181	vpxor	%ymm8,%ymm5,%ymm5
8182	vpsrld	$25,%ymm4,%ymm8
8183	vpslld	$32-25,%ymm4,%ymm4
8184	vpxor	%ymm8,%ymm4,%ymm4
8185	vmovdqa	0+128(%rbp),%ymm8
8186	vpalignr	$4,%ymm7,%ymm7,%ymm7
8187	vpalignr	$8,%ymm11,%ymm11,%ymm11
8188	vpalignr	$12,%ymm15,%ymm15,%ymm15
8189	vpalignr	$4,%ymm6,%ymm6,%ymm6
8190	vpalignr	$8,%ymm10,%ymm10,%ymm10
8191	vpalignr	$12,%ymm14,%ymm14,%ymm14
8192	vpalignr	$4,%ymm5,%ymm5,%ymm5
8193	vpalignr	$8,%ymm9,%ymm9,%ymm9
8194	vpalignr	$12,%ymm13,%ymm13,%ymm13
8195	vpalignr	$4,%ymm4,%ymm4,%ymm4
8196	addq	%rax,%r15
8197	adcq	%rdx,%r9
8198	vpalignr	$8,%ymm8,%ymm8,%ymm8
8199	vpalignr	$12,%ymm12,%ymm12,%ymm12
8200	vmovdqa	%ymm8,0+128(%rbp)
8201	vmovdqa	.Lrol16(%rip),%ymm8
8202	vpaddd	%ymm7,%ymm3,%ymm3
8203	vpaddd	%ymm6,%ymm2,%ymm2
8204	vpaddd	%ymm5,%ymm1,%ymm1
8205	vpaddd	%ymm4,%ymm0,%ymm0
8206	vpxor	%ymm3,%ymm15,%ymm15
8207	vpxor	%ymm2,%ymm14,%ymm14
8208	vpxor	%ymm1,%ymm13,%ymm13
8209	vpxor	%ymm0,%ymm12,%ymm12
8210	vpshufb	%ymm8,%ymm15,%ymm15
8211	vpshufb	%ymm8,%ymm14,%ymm14
8212	vpshufb	%ymm8,%ymm13,%ymm13
8213	vpshufb	%ymm8,%ymm12,%ymm12
8214	vpaddd	%ymm15,%ymm11,%ymm11
8215	vpaddd	%ymm14,%ymm10,%ymm10
8216	vpaddd	%ymm13,%ymm9,%ymm9
8217	vpaddd	0+128(%rbp),%ymm12,%ymm8
8218	movq	%r13,%r10
8219	movq	%r14,%r11
8220	movq	%r15,%r12
8221	andq	$3,%r12
8222	movq	%r15,%r13
8223	andq	$-4,%r13
8224	movq	%r9,%r14
8225	shrdq	$2,%r9,%r15
8226	shrq	$2,%r9
8227	addq	%r13,%r15
8228	adcq	%r14,%r9
8229	addq	%r15,%r10
8230	adcq	%r9,%r11
8231	adcq	$0,%r12
8232	vpxor	%ymm11,%ymm7,%ymm7
8233	vpxor	%ymm10,%ymm6,%ymm6
8234	vpxor	%ymm9,%ymm5,%ymm5
8235	vpxor	%ymm8,%ymm4,%ymm4
8236	vmovdqa	%ymm8,0+128(%rbp)
8237	vpsrld	$20,%ymm7,%ymm8
8238	vpslld	$32-20,%ymm7,%ymm7
8239	vpxor	%ymm8,%ymm7,%ymm7
8240	vpsrld	$20,%ymm6,%ymm8
8241	vpslld	$32-20,%ymm6,%ymm6
8242	vpxor	%ymm8,%ymm6,%ymm6
8243	vpsrld	$20,%ymm5,%ymm8
8244	vpslld	$32-20,%ymm5,%ymm5
8245	vpxor	%ymm8,%ymm5,%ymm5
8246	vpsrld	$20,%ymm4,%ymm8
8247	vpslld	$32-20,%ymm4,%ymm4
8248	vpxor	%ymm8,%ymm4,%ymm4
8249	vmovdqa	.Lrol8(%rip),%ymm8
8250	vpaddd	%ymm7,%ymm3,%ymm3
8251	vpaddd	%ymm6,%ymm2,%ymm2
8252	addq	0+16(%rdi),%r10
8253	adcq	8+16(%rdi),%r11
8254	adcq	$1,%r12
8255	vpaddd	%ymm5,%ymm1,%ymm1
8256	vpaddd	%ymm4,%ymm0,%ymm0
8257	vpxor	%ymm3,%ymm15,%ymm15
8258	vpxor	%ymm2,%ymm14,%ymm14
8259	vpxor	%ymm1,%ymm13,%ymm13
8260	vpxor	%ymm0,%ymm12,%ymm12
8261	vpshufb	%ymm8,%ymm15,%ymm15
8262	vpshufb	%ymm8,%ymm14,%ymm14
8263	vpshufb	%ymm8,%ymm13,%ymm13
8264	vpshufb	%ymm8,%ymm12,%ymm12
8265	vpaddd	%ymm15,%ymm11,%ymm11
8266	vpaddd	%ymm14,%ymm10,%ymm10
8267	vpaddd	%ymm13,%ymm9,%ymm9
8268	vpaddd	0+128(%rbp),%ymm12,%ymm8
8269	vpxor	%ymm11,%ymm7,%ymm7
8270	vpxor	%ymm10,%ymm6,%ymm6
8271	vpxor	%ymm9,%ymm5,%ymm5
8272	vpxor	%ymm8,%ymm4,%ymm4
8273	vmovdqa	%ymm8,0+128(%rbp)
8274	vpsrld	$25,%ymm7,%ymm8
8275	movq	0+0+0(%rbp),%rdx
8276	movq	%rdx,%r15
8277	mulxq	%r10,%r13,%r14
8278	mulxq	%r11,%rax,%rdx
8279	imulq	%r12,%r15
8280	addq	%rax,%r14
8281	adcq	%rdx,%r15
8282	vpslld	$32-25,%ymm7,%ymm7
8283	vpxor	%ymm8,%ymm7,%ymm7
8284	vpsrld	$25,%ymm6,%ymm8
8285	vpslld	$32-25,%ymm6,%ymm6
8286	vpxor	%ymm8,%ymm6,%ymm6
8287	vpsrld	$25,%ymm5,%ymm8
8288	vpslld	$32-25,%ymm5,%ymm5
8289	vpxor	%ymm8,%ymm5,%ymm5
8290	vpsrld	$25,%ymm4,%ymm8
8291	vpslld	$32-25,%ymm4,%ymm4
8292	vpxor	%ymm8,%ymm4,%ymm4
8293	vmovdqa	0+128(%rbp),%ymm8
8294	vpalignr	$12,%ymm7,%ymm7,%ymm7
8295	vpalignr	$8,%ymm11,%ymm11,%ymm11
8296	vpalignr	$4,%ymm15,%ymm15,%ymm15
8297	vpalignr	$12,%ymm6,%ymm6,%ymm6
8298	vpalignr	$8,%ymm10,%ymm10,%ymm10
8299	vpalignr	$4,%ymm14,%ymm14,%ymm14
8300	vpalignr	$12,%ymm5,%ymm5,%ymm5
8301	vpalignr	$8,%ymm9,%ymm9,%ymm9
8302	movq	8+0+0(%rbp),%rdx
8303	mulxq	%r10,%r10,%rax
8304	addq	%r10,%r14
8305	mulxq	%r11,%r11,%r9
8306	adcq	%r11,%r15
8307	adcq	$0,%r9
8308	imulq	%r12,%rdx
8309	vpalignr	$4,%ymm13,%ymm13,%ymm13
8310	vpalignr	$12,%ymm4,%ymm4,%ymm4
8311	vpalignr	$8,%ymm8,%ymm8,%ymm8
8312	vpalignr	$4,%ymm12,%ymm12,%ymm12
8313
8314
8315
8316
8317
8318
8319
8320
8321
8322
8323
8324
8325
8326
8327
8328
8329	addq	%rax,%r15
8330	adcq	%rdx,%r9
8331
8332
8333
8334
8335
8336
8337
8338
8339
8340
8341
8342
8343
8344
8345
8346
8347
8348
8349
8350
8351	movq	%r13,%r10
8352	movq	%r14,%r11
8353	movq	%r15,%r12
8354	andq	$3,%r12
8355	movq	%r15,%r13
8356	andq	$-4,%r13
8357	movq	%r9,%r14
8358	shrdq	$2,%r9,%r15
8359	shrq	$2,%r9
8360	addq	%r13,%r15
8361	adcq	%r14,%r9
8362	addq	%r15,%r10
8363	adcq	%r9,%r11
8364	adcq	$0,%r12
8365
8366	leaq	32(%rdi),%rdi
8367	decq	%rcx
8368	jg	.Lseal_avx2_tail_512_rounds_and_3xhash
8369	decq	%r8
8370	jge	.Lseal_avx2_tail_512_rounds_and_2xhash
8371	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
8372	vpaddd	0+64(%rbp),%ymm7,%ymm7
8373	vpaddd	0+96(%rbp),%ymm11,%ymm11
8374	vpaddd	0+256(%rbp),%ymm15,%ymm15
8375	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
8376	vpaddd	0+64(%rbp),%ymm6,%ymm6
8377	vpaddd	0+96(%rbp),%ymm10,%ymm10
8378	vpaddd	0+224(%rbp),%ymm14,%ymm14
8379	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
8380	vpaddd	0+64(%rbp),%ymm5,%ymm5
8381	vpaddd	0+96(%rbp),%ymm9,%ymm9
8382	vpaddd	0+192(%rbp),%ymm13,%ymm13
8383	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
8384	vpaddd	0+64(%rbp),%ymm4,%ymm4
8385	vpaddd	0+96(%rbp),%ymm8,%ymm8
8386	vpaddd	0+160(%rbp),%ymm12,%ymm12
8387
8388	vmovdqa	%ymm0,0+128(%rbp)
8389	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
8390	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
8391	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
8392	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
8393	vpxor	0+0(%rsi),%ymm0,%ymm0
8394	vpxor	32+0(%rsi),%ymm3,%ymm3
8395	vpxor	64+0(%rsi),%ymm7,%ymm7
8396	vpxor	96+0(%rsi),%ymm11,%ymm11
8397	vmovdqu	%ymm0,0+0(%rdi)
8398	vmovdqu	%ymm3,32+0(%rdi)
8399	vmovdqu	%ymm7,64+0(%rdi)
8400	vmovdqu	%ymm11,96+0(%rdi)
8401
8402	vmovdqa	0+128(%rbp),%ymm0
8403	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
8404	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
8405	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
8406	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
8407	vpxor	0+128(%rsi),%ymm3,%ymm3
8408	vpxor	32+128(%rsi),%ymm2,%ymm2
8409	vpxor	64+128(%rsi),%ymm6,%ymm6
8410	vpxor	96+128(%rsi),%ymm10,%ymm10
8411	vmovdqu	%ymm3,0+128(%rdi)
8412	vmovdqu	%ymm2,32+128(%rdi)
8413	vmovdqu	%ymm6,64+128(%rdi)
8414	vmovdqu	%ymm10,96+128(%rdi)
8415	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
8416	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
8417	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
8418	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
8419	vpxor	0+256(%rsi),%ymm3,%ymm3
8420	vpxor	32+256(%rsi),%ymm1,%ymm1
8421	vpxor	64+256(%rsi),%ymm5,%ymm5
8422	vpxor	96+256(%rsi),%ymm9,%ymm9
8423	vmovdqu	%ymm3,0+256(%rdi)
8424	vmovdqu	%ymm1,32+256(%rdi)
8425	vmovdqu	%ymm5,64+256(%rdi)
8426	vmovdqu	%ymm9,96+256(%rdi)
8427	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
8428	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
8429	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
8430	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
8431	vmovdqa	%ymm3,%ymm8
8432
8433	movq	$384,%rcx
8434	leaq	384(%rsi),%rsi
8435	subq	$384,%rbx
8436	jmp	.Lseal_avx2_short_hash_remainder
8437
8438.Lseal_avx2_320:
8439	vmovdqa	%ymm0,%ymm1
8440	vmovdqa	%ymm0,%ymm2
8441	vmovdqa	%ymm4,%ymm5
8442	vmovdqa	%ymm4,%ymm6
8443	vmovdqa	%ymm8,%ymm9
8444	vmovdqa	%ymm8,%ymm10
8445	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13
8446	vpaddd	.Lavx2_inc(%rip),%ymm13,%ymm14
8447	vmovdqa	%ymm4,%ymm7
8448	vmovdqa	%ymm8,%ymm11
8449	vmovdqa	%ymm12,0+160(%rbp)
8450	vmovdqa	%ymm13,0+192(%rbp)
8451	vmovdqa	%ymm14,0+224(%rbp)
8452	movq	$10,%r10
8453.Lseal_avx2_320_rounds:
8454	vpaddd	%ymm4,%ymm0,%ymm0
8455	vpxor	%ymm0,%ymm12,%ymm12
8456	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
8457	vpaddd	%ymm12,%ymm8,%ymm8
8458	vpxor	%ymm8,%ymm4,%ymm4
8459	vpsrld	$20,%ymm4,%ymm3
8460	vpslld	$12,%ymm4,%ymm4
8461	vpxor	%ymm3,%ymm4,%ymm4
8462	vpaddd	%ymm4,%ymm0,%ymm0
8463	vpxor	%ymm0,%ymm12,%ymm12
8464	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
8465	vpaddd	%ymm12,%ymm8,%ymm8
8466	vpxor	%ymm8,%ymm4,%ymm4
8467	vpslld	$7,%ymm4,%ymm3
8468	vpsrld	$25,%ymm4,%ymm4
8469	vpxor	%ymm3,%ymm4,%ymm4
8470	vpalignr	$12,%ymm12,%ymm12,%ymm12
8471	vpalignr	$8,%ymm8,%ymm8,%ymm8
8472	vpalignr	$4,%ymm4,%ymm4,%ymm4
8473	vpaddd	%ymm5,%ymm1,%ymm1
8474	vpxor	%ymm1,%ymm13,%ymm13
8475	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
8476	vpaddd	%ymm13,%ymm9,%ymm9
8477	vpxor	%ymm9,%ymm5,%ymm5
8478	vpsrld	$20,%ymm5,%ymm3
8479	vpslld	$12,%ymm5,%ymm5
8480	vpxor	%ymm3,%ymm5,%ymm5
8481	vpaddd	%ymm5,%ymm1,%ymm1
8482	vpxor	%ymm1,%ymm13,%ymm13
8483	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
8484	vpaddd	%ymm13,%ymm9,%ymm9
8485	vpxor	%ymm9,%ymm5,%ymm5
8486	vpslld	$7,%ymm5,%ymm3
8487	vpsrld	$25,%ymm5,%ymm5
8488	vpxor	%ymm3,%ymm5,%ymm5
8489	vpalignr	$12,%ymm13,%ymm13,%ymm13
8490	vpalignr	$8,%ymm9,%ymm9,%ymm9
8491	vpalignr	$4,%ymm5,%ymm5,%ymm5
8492	vpaddd	%ymm6,%ymm2,%ymm2
8493	vpxor	%ymm2,%ymm14,%ymm14
8494	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
8495	vpaddd	%ymm14,%ymm10,%ymm10
8496	vpxor	%ymm10,%ymm6,%ymm6
8497	vpsrld	$20,%ymm6,%ymm3
8498	vpslld	$12,%ymm6,%ymm6
8499	vpxor	%ymm3,%ymm6,%ymm6
8500	vpaddd	%ymm6,%ymm2,%ymm2
8501	vpxor	%ymm2,%ymm14,%ymm14
8502	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
8503	vpaddd	%ymm14,%ymm10,%ymm10
8504	vpxor	%ymm10,%ymm6,%ymm6
8505	vpslld	$7,%ymm6,%ymm3
8506	vpsrld	$25,%ymm6,%ymm6
8507	vpxor	%ymm3,%ymm6,%ymm6
8508	vpalignr	$12,%ymm14,%ymm14,%ymm14
8509	vpalignr	$8,%ymm10,%ymm10,%ymm10
8510	vpalignr	$4,%ymm6,%ymm6,%ymm6
8511	vpaddd	%ymm4,%ymm0,%ymm0
8512	vpxor	%ymm0,%ymm12,%ymm12
8513	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
8514	vpaddd	%ymm12,%ymm8,%ymm8
8515	vpxor	%ymm8,%ymm4,%ymm4
8516	vpsrld	$20,%ymm4,%ymm3
8517	vpslld	$12,%ymm4,%ymm4
8518	vpxor	%ymm3,%ymm4,%ymm4
8519	vpaddd	%ymm4,%ymm0,%ymm0
8520	vpxor	%ymm0,%ymm12,%ymm12
8521	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
8522	vpaddd	%ymm12,%ymm8,%ymm8
8523	vpxor	%ymm8,%ymm4,%ymm4
8524	vpslld	$7,%ymm4,%ymm3
8525	vpsrld	$25,%ymm4,%ymm4
8526	vpxor	%ymm3,%ymm4,%ymm4
8527	vpalignr	$4,%ymm12,%ymm12,%ymm12
8528	vpalignr	$8,%ymm8,%ymm8,%ymm8
8529	vpalignr	$12,%ymm4,%ymm4,%ymm4
8530	vpaddd	%ymm5,%ymm1,%ymm1
8531	vpxor	%ymm1,%ymm13,%ymm13
8532	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
8533	vpaddd	%ymm13,%ymm9,%ymm9
8534	vpxor	%ymm9,%ymm5,%ymm5
8535	vpsrld	$20,%ymm5,%ymm3
8536	vpslld	$12,%ymm5,%ymm5
8537	vpxor	%ymm3,%ymm5,%ymm5
8538	vpaddd	%ymm5,%ymm1,%ymm1
8539	vpxor	%ymm1,%ymm13,%ymm13
8540	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
8541	vpaddd	%ymm13,%ymm9,%ymm9
8542	vpxor	%ymm9,%ymm5,%ymm5
8543	vpslld	$7,%ymm5,%ymm3
8544	vpsrld	$25,%ymm5,%ymm5
8545	vpxor	%ymm3,%ymm5,%ymm5
8546	vpalignr	$4,%ymm13,%ymm13,%ymm13
8547	vpalignr	$8,%ymm9,%ymm9,%ymm9
8548	vpalignr	$12,%ymm5,%ymm5,%ymm5
8549	vpaddd	%ymm6,%ymm2,%ymm2
8550	vpxor	%ymm2,%ymm14,%ymm14
8551	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
8552	vpaddd	%ymm14,%ymm10,%ymm10
8553	vpxor	%ymm10,%ymm6,%ymm6
8554	vpsrld	$20,%ymm6,%ymm3
8555	vpslld	$12,%ymm6,%ymm6
8556	vpxor	%ymm3,%ymm6,%ymm6
8557	vpaddd	%ymm6,%ymm2,%ymm2
8558	vpxor	%ymm2,%ymm14,%ymm14
8559	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
8560	vpaddd	%ymm14,%ymm10,%ymm10
8561	vpxor	%ymm10,%ymm6,%ymm6
8562	vpslld	$7,%ymm6,%ymm3
8563	vpsrld	$25,%ymm6,%ymm6
8564	vpxor	%ymm3,%ymm6,%ymm6
8565	vpalignr	$4,%ymm14,%ymm14,%ymm14
8566	vpalignr	$8,%ymm10,%ymm10,%ymm10
8567	vpalignr	$12,%ymm6,%ymm6,%ymm6
8568
8569	decq	%r10
8570	jne	.Lseal_avx2_320_rounds
8571	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
8572	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
8573	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
8574	vpaddd	%ymm7,%ymm4,%ymm4
8575	vpaddd	%ymm7,%ymm5,%ymm5
8576	vpaddd	%ymm7,%ymm6,%ymm6
8577	vpaddd	%ymm11,%ymm8,%ymm8
8578	vpaddd	%ymm11,%ymm9,%ymm9
8579	vpaddd	%ymm11,%ymm10,%ymm10
8580	vpaddd	0+160(%rbp),%ymm12,%ymm12
8581	vpaddd	0+192(%rbp),%ymm13,%ymm13
8582	vpaddd	0+224(%rbp),%ymm14,%ymm14
8583	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
8584
8585	vpand	.Lclamp(%rip),%ymm3,%ymm3
8586	vmovdqa	%ymm3,0+0(%rbp)
8587
8588	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
8589	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
8590	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
8591	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
8592	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
8593	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
8594	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
8595	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
8596	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
8597	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
8598	jmp	.Lseal_avx2_short
8599
8600.Lseal_avx2_192:
8601	vmovdqa	%ymm0,%ymm1
8602	vmovdqa	%ymm0,%ymm2
8603	vmovdqa	%ymm4,%ymm5
8604	vmovdqa	%ymm4,%ymm6
8605	vmovdqa	%ymm8,%ymm9
8606	vmovdqa	%ymm8,%ymm10
8607	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13
8608	vmovdqa	%ymm12,%ymm11
8609	vmovdqa	%ymm13,%ymm15
8610	movq	$10,%r10
8611.Lseal_avx2_192_rounds:
8612	vpaddd	%ymm4,%ymm0,%ymm0
8613	vpxor	%ymm0,%ymm12,%ymm12
8614	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
8615	vpaddd	%ymm12,%ymm8,%ymm8
8616	vpxor	%ymm8,%ymm4,%ymm4
8617	vpsrld	$20,%ymm4,%ymm3
8618	vpslld	$12,%ymm4,%ymm4
8619	vpxor	%ymm3,%ymm4,%ymm4
8620	vpaddd	%ymm4,%ymm0,%ymm0
8621	vpxor	%ymm0,%ymm12,%ymm12
8622	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
8623	vpaddd	%ymm12,%ymm8,%ymm8
8624	vpxor	%ymm8,%ymm4,%ymm4
8625	vpslld	$7,%ymm4,%ymm3
8626	vpsrld	$25,%ymm4,%ymm4
8627	vpxor	%ymm3,%ymm4,%ymm4
8628	vpalignr	$12,%ymm12,%ymm12,%ymm12
8629	vpalignr	$8,%ymm8,%ymm8,%ymm8
8630	vpalignr	$4,%ymm4,%ymm4,%ymm4
8631	vpaddd	%ymm5,%ymm1,%ymm1
8632	vpxor	%ymm1,%ymm13,%ymm13
8633	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
8634	vpaddd	%ymm13,%ymm9,%ymm9
8635	vpxor	%ymm9,%ymm5,%ymm5
8636	vpsrld	$20,%ymm5,%ymm3
8637	vpslld	$12,%ymm5,%ymm5
8638	vpxor	%ymm3,%ymm5,%ymm5
8639	vpaddd	%ymm5,%ymm1,%ymm1
8640	vpxor	%ymm1,%ymm13,%ymm13
8641	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
8642	vpaddd	%ymm13,%ymm9,%ymm9
8643	vpxor	%ymm9,%ymm5,%ymm5
8644	vpslld	$7,%ymm5,%ymm3
8645	vpsrld	$25,%ymm5,%ymm5
8646	vpxor	%ymm3,%ymm5,%ymm5
8647	vpalignr	$12,%ymm13,%ymm13,%ymm13
8648	vpalignr	$8,%ymm9,%ymm9,%ymm9
8649	vpalignr	$4,%ymm5,%ymm5,%ymm5
8650	vpaddd	%ymm4,%ymm0,%ymm0
8651	vpxor	%ymm0,%ymm12,%ymm12
8652	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
8653	vpaddd	%ymm12,%ymm8,%ymm8
8654	vpxor	%ymm8,%ymm4,%ymm4
8655	vpsrld	$20,%ymm4,%ymm3
8656	vpslld	$12,%ymm4,%ymm4
8657	vpxor	%ymm3,%ymm4,%ymm4
8658	vpaddd	%ymm4,%ymm0,%ymm0
8659	vpxor	%ymm0,%ymm12,%ymm12
8660	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
8661	vpaddd	%ymm12,%ymm8,%ymm8
8662	vpxor	%ymm8,%ymm4,%ymm4
8663	vpslld	$7,%ymm4,%ymm3
8664	vpsrld	$25,%ymm4,%ymm4
8665	vpxor	%ymm3,%ymm4,%ymm4
8666	vpalignr	$4,%ymm12,%ymm12,%ymm12
8667	vpalignr	$8,%ymm8,%ymm8,%ymm8
8668	vpalignr	$12,%ymm4,%ymm4,%ymm4
8669	vpaddd	%ymm5,%ymm1,%ymm1
8670	vpxor	%ymm1,%ymm13,%ymm13
8671	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
8672	vpaddd	%ymm13,%ymm9,%ymm9
8673	vpxor	%ymm9,%ymm5,%ymm5
8674	vpsrld	$20,%ymm5,%ymm3
8675	vpslld	$12,%ymm5,%ymm5
8676	vpxor	%ymm3,%ymm5,%ymm5
8677	vpaddd	%ymm5,%ymm1,%ymm1
8678	vpxor	%ymm1,%ymm13,%ymm13
8679	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
8680	vpaddd	%ymm13,%ymm9,%ymm9
8681	vpxor	%ymm9,%ymm5,%ymm5
8682	vpslld	$7,%ymm5,%ymm3
8683	vpsrld	$25,%ymm5,%ymm5
8684	vpxor	%ymm3,%ymm5,%ymm5
8685	vpalignr	$4,%ymm13,%ymm13,%ymm13
8686	vpalignr	$8,%ymm9,%ymm9,%ymm9
8687	vpalignr	$12,%ymm5,%ymm5,%ymm5
8688
8689	decq	%r10
8690	jne	.Lseal_avx2_192_rounds
8691	vpaddd	%ymm2,%ymm0,%ymm0
8692	vpaddd	%ymm2,%ymm1,%ymm1
8693	vpaddd	%ymm6,%ymm4,%ymm4
8694	vpaddd	%ymm6,%ymm5,%ymm5
8695	vpaddd	%ymm10,%ymm8,%ymm8
8696	vpaddd	%ymm10,%ymm9,%ymm9
8697	vpaddd	%ymm11,%ymm12,%ymm12
8698	vpaddd	%ymm15,%ymm13,%ymm13
8699	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
8700
8701	vpand	.Lclamp(%rip),%ymm3,%ymm3
8702	vmovdqa	%ymm3,0+0(%rbp)
8703
8704	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
8705	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
8706	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
8707	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
8708	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
8709	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
8710.Lseal_avx2_short:
8711	movq	%r8,%r8
8712	call	poly_hash_ad_internal
8713	xorq	%rcx,%rcx
8714.Lseal_avx2_short_hash_remainder:
8715	cmpq	$16,%rcx
8716	jb	.Lseal_avx2_short_loop
8717	addq	0+0(%rdi),%r10
8718	adcq	8+0(%rdi),%r11
8719	adcq	$1,%r12
8720	movq	0+0+0(%rbp),%rax
8721	movq	%rax,%r15
8722	mulq	%r10
8723	movq	%rax,%r13
8724	movq	%rdx,%r14
8725	movq	0+0+0(%rbp),%rax
8726	mulq	%r11
8727	imulq	%r12,%r15
8728	addq	%rax,%r14
8729	adcq	%rdx,%r15
8730	movq	8+0+0(%rbp),%rax
8731	movq	%rax,%r9
8732	mulq	%r10
8733	addq	%rax,%r14
8734	adcq	$0,%rdx
8735	movq	%rdx,%r10
8736	movq	8+0+0(%rbp),%rax
8737	mulq	%r11
8738	addq	%rax,%r15
8739	adcq	$0,%rdx
8740	imulq	%r12,%r9
8741	addq	%r10,%r15
8742	adcq	%rdx,%r9
8743	movq	%r13,%r10
8744	movq	%r14,%r11
8745	movq	%r15,%r12
8746	andq	$3,%r12
8747	movq	%r15,%r13
8748	andq	$-4,%r13
8749	movq	%r9,%r14
8750	shrdq	$2,%r9,%r15
8751	shrq	$2,%r9
8752	addq	%r13,%r15
8753	adcq	%r14,%r9
8754	addq	%r15,%r10
8755	adcq	%r9,%r11
8756	adcq	$0,%r12
8757
8758	subq	$16,%rcx
8759	addq	$16,%rdi
8760	jmp	.Lseal_avx2_short_hash_remainder
8761.Lseal_avx2_short_loop:
8762	cmpq	$32,%rbx
8763	jb	.Lseal_avx2_short_tail
8764	subq	$32,%rbx
8765
8766	vpxor	(%rsi),%ymm0,%ymm0
8767	vmovdqu	%ymm0,(%rdi)
8768	leaq	32(%rsi),%rsi
8769
8770	addq	0+0(%rdi),%r10
8771	adcq	8+0(%rdi),%r11
8772	adcq	$1,%r12
8773	movq	0+0+0(%rbp),%rax
8774	movq	%rax,%r15
8775	mulq	%r10
8776	movq	%rax,%r13
8777	movq	%rdx,%r14
8778	movq	0+0+0(%rbp),%rax
8779	mulq	%r11
8780	imulq	%r12,%r15
8781	addq	%rax,%r14
8782	adcq	%rdx,%r15
8783	movq	8+0+0(%rbp),%rax
8784	movq	%rax,%r9
8785	mulq	%r10
8786	addq	%rax,%r14
8787	adcq	$0,%rdx
8788	movq	%rdx,%r10
8789	movq	8+0+0(%rbp),%rax
8790	mulq	%r11
8791	addq	%rax,%r15
8792	adcq	$0,%rdx
8793	imulq	%r12,%r9
8794	addq	%r10,%r15
8795	adcq	%rdx,%r9
8796	movq	%r13,%r10
8797	movq	%r14,%r11
8798	movq	%r15,%r12
8799	andq	$3,%r12
8800	movq	%r15,%r13
8801	andq	$-4,%r13
8802	movq	%r9,%r14
8803	shrdq	$2,%r9,%r15
8804	shrq	$2,%r9
8805	addq	%r13,%r15
8806	adcq	%r14,%r9
8807	addq	%r15,%r10
8808	adcq	%r9,%r11
8809	adcq	$0,%r12
8810	addq	0+16(%rdi),%r10
8811	adcq	8+16(%rdi),%r11
8812	adcq	$1,%r12
8813	movq	0+0+0(%rbp),%rax
8814	movq	%rax,%r15
8815	mulq	%r10
8816	movq	%rax,%r13
8817	movq	%rdx,%r14
8818	movq	0+0+0(%rbp),%rax
8819	mulq	%r11
8820	imulq	%r12,%r15
8821	addq	%rax,%r14
8822	adcq	%rdx,%r15
8823	movq	8+0+0(%rbp),%rax
8824	movq	%rax,%r9
8825	mulq	%r10
8826	addq	%rax,%r14
8827	adcq	$0,%rdx
8828	movq	%rdx,%r10
8829	movq	8+0+0(%rbp),%rax
8830	mulq	%r11
8831	addq	%rax,%r15
8832	adcq	$0,%rdx
8833	imulq	%r12,%r9
8834	addq	%r10,%r15
8835	adcq	%rdx,%r9
8836	movq	%r13,%r10
8837	movq	%r14,%r11
8838	movq	%r15,%r12
8839	andq	$3,%r12
8840	movq	%r15,%r13
8841	andq	$-4,%r13
8842	movq	%r9,%r14
8843	shrdq	$2,%r9,%r15
8844	shrq	$2,%r9
8845	addq	%r13,%r15
8846	adcq	%r14,%r9
8847	addq	%r15,%r10
8848	adcq	%r9,%r11
8849	adcq	$0,%r12
8850
8851	leaq	32(%rdi),%rdi
8852
8853	vmovdqa	%ymm4,%ymm0
8854	vmovdqa	%ymm8,%ymm4
8855	vmovdqa	%ymm12,%ymm8
8856	vmovdqa	%ymm1,%ymm12
8857	vmovdqa	%ymm5,%ymm1
8858	vmovdqa	%ymm9,%ymm5
8859	vmovdqa	%ymm13,%ymm9
8860	vmovdqa	%ymm2,%ymm13
8861	vmovdqa	%ymm6,%ymm2
8862	jmp	.Lseal_avx2_short_loop
8863.Lseal_avx2_short_tail:
8864	cmpq	$16,%rbx
8865	jb	.Lseal_avx2_exit
8866	subq	$16,%rbx
8867	vpxor	(%rsi),%xmm0,%xmm3
8868	vmovdqu	%xmm3,(%rdi)
8869	leaq	16(%rsi),%rsi
8870	addq	0+0(%rdi),%r10
8871	adcq	8+0(%rdi),%r11
8872	adcq	$1,%r12
8873	movq	0+0+0(%rbp),%rax
8874	movq	%rax,%r15
8875	mulq	%r10
8876	movq	%rax,%r13
8877	movq	%rdx,%r14
8878	movq	0+0+0(%rbp),%rax
8879	mulq	%r11
8880	imulq	%r12,%r15
8881	addq	%rax,%r14
8882	adcq	%rdx,%r15
8883	movq	8+0+0(%rbp),%rax
8884	movq	%rax,%r9
8885	mulq	%r10
8886	addq	%rax,%r14
8887	adcq	$0,%rdx
8888	movq	%rdx,%r10
8889	movq	8+0+0(%rbp),%rax
8890	mulq	%r11
8891	addq	%rax,%r15
8892	adcq	$0,%rdx
8893	imulq	%r12,%r9
8894	addq	%r10,%r15
8895	adcq	%rdx,%r9
8896	movq	%r13,%r10
8897	movq	%r14,%r11
8898	movq	%r15,%r12
8899	andq	$3,%r12
8900	movq	%r15,%r13
8901	andq	$-4,%r13
8902	movq	%r9,%r14
8903	shrdq	$2,%r9,%r15
8904	shrq	$2,%r9
8905	addq	%r13,%r15
8906	adcq	%r14,%r9
8907	addq	%r15,%r10
8908	adcq	%r9,%r11
8909	adcq	$0,%r12
8910
8911	leaq	16(%rdi),%rdi
8912	vextracti128	$1,%ymm0,%xmm0
8913.Lseal_avx2_exit:
8914	vzeroupper
8915	jmp	.Lseal_sse_tail_16
8916.cfi_endproc
8917.size	chacha20_poly1305_seal_avx2, .-chacha20_poly1305_seal_avx2
8918#endif
8919