1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%include "ring_core_generated/prefix_symbols_nasm.inc"
5%ifidn __OUTPUT_FORMAT__, win32
6%ifidn __OUTPUT_FORMAT__,obj
7section	code	use32 class=code align=64
8%elifidn __OUTPUT_FORMAT__,win32
9$@feat.00 equ 1
10section	.text	code align=64
11%else
12section	.text	code
13%endif
14global	_ChaCha20_ctr32
15align	16
16_ChaCha20_ctr32:
17L$_ChaCha20_ctr32_begin:
18	push	ebp
19	push	ebx
20	push	esi
21	push	edi
22	xor	eax,eax
23	cmp	eax,DWORD [28+esp]
24	je	NEAR L$000no_data
25	call	L$pic_point
26L$pic_point:
27	pop	eax
28	lea	ebp,[_OPENSSL_ia32cap_P]
29	test	DWORD [ebp],16777216
30	jz	NEAR L$001x86
31	test	DWORD [4+ebp],512
32	jz	NEAR L$001x86
33	jmp	NEAR L$ssse3_shortcut
34L$001x86:
35	mov	esi,DWORD [32+esp]
36	mov	edi,DWORD [36+esp]
37	sub	esp,132
38	mov	eax,DWORD [esi]
39	mov	ebx,DWORD [4+esi]
40	mov	ecx,DWORD [8+esi]
41	mov	edx,DWORD [12+esi]
42	mov	DWORD [80+esp],eax
43	mov	DWORD [84+esp],ebx
44	mov	DWORD [88+esp],ecx
45	mov	DWORD [92+esp],edx
46	mov	eax,DWORD [16+esi]
47	mov	ebx,DWORD [20+esi]
48	mov	ecx,DWORD [24+esi]
49	mov	edx,DWORD [28+esi]
50	mov	DWORD [96+esp],eax
51	mov	DWORD [100+esp],ebx
52	mov	DWORD [104+esp],ecx
53	mov	DWORD [108+esp],edx
54	mov	eax,DWORD [edi]
55	mov	ebx,DWORD [4+edi]
56	mov	ecx,DWORD [8+edi]
57	mov	edx,DWORD [12+edi]
58	sub	eax,1
59	mov	DWORD [112+esp],eax
60	mov	DWORD [116+esp],ebx
61	mov	DWORD [120+esp],ecx
62	mov	DWORD [124+esp],edx
63	jmp	NEAR L$002entry
64align	16
65L$003outer_loop:
66	mov	DWORD [156+esp],ebx
67	mov	DWORD [152+esp],eax
68	mov	DWORD [160+esp],ecx
69L$002entry:
70	mov	eax,1634760805
71	mov	DWORD [4+esp],857760878
72	mov	DWORD [8+esp],2036477234
73	mov	DWORD [12+esp],1797285236
74	mov	ebx,DWORD [84+esp]
75	mov	ebp,DWORD [88+esp]
76	mov	ecx,DWORD [104+esp]
77	mov	esi,DWORD [108+esp]
78	mov	edx,DWORD [116+esp]
79	mov	edi,DWORD [120+esp]
80	mov	DWORD [20+esp],ebx
81	mov	DWORD [24+esp],ebp
82	mov	DWORD [40+esp],ecx
83	mov	DWORD [44+esp],esi
84	mov	DWORD [52+esp],edx
85	mov	DWORD [56+esp],edi
86	mov	ebx,DWORD [92+esp]
87	mov	edi,DWORD [124+esp]
88	mov	edx,DWORD [112+esp]
89	mov	ebp,DWORD [80+esp]
90	mov	ecx,DWORD [96+esp]
91	mov	esi,DWORD [100+esp]
92	add	edx,1
93	mov	DWORD [28+esp],ebx
94	mov	DWORD [60+esp],edi
95	mov	DWORD [112+esp],edx
96	mov	ebx,10
97	jmp	NEAR L$004loop
98align	16
99L$004loop:
100	add	eax,ebp
101	mov	DWORD [128+esp],ebx
102	mov	ebx,ebp
103	xor	edx,eax
104	rol	edx,16
105	add	ecx,edx
106	xor	ebx,ecx
107	mov	edi,DWORD [52+esp]
108	rol	ebx,12
109	mov	ebp,DWORD [20+esp]
110	add	eax,ebx
111	xor	edx,eax
112	mov	DWORD [esp],eax
113	rol	edx,8
114	mov	eax,DWORD [4+esp]
115	add	ecx,edx
116	mov	DWORD [48+esp],edx
117	xor	ebx,ecx
118	add	eax,ebp
119	rol	ebx,7
120	xor	edi,eax
121	mov	DWORD [32+esp],ecx
122	rol	edi,16
123	mov	DWORD [16+esp],ebx
124	add	esi,edi
125	mov	ecx,DWORD [40+esp]
126	xor	ebp,esi
127	mov	edx,DWORD [56+esp]
128	rol	ebp,12
129	mov	ebx,DWORD [24+esp]
130	add	eax,ebp
131	xor	edi,eax
132	mov	DWORD [4+esp],eax
133	rol	edi,8
134	mov	eax,DWORD [8+esp]
135	add	esi,edi
136	mov	DWORD [52+esp],edi
137	xor	ebp,esi
138	add	eax,ebx
139	rol	ebp,7
140	xor	edx,eax
141	mov	DWORD [36+esp],esi
142	rol	edx,16
143	mov	DWORD [20+esp],ebp
144	add	ecx,edx
145	mov	esi,DWORD [44+esp]
146	xor	ebx,ecx
147	mov	edi,DWORD [60+esp]
148	rol	ebx,12
149	mov	ebp,DWORD [28+esp]
150	add	eax,ebx
151	xor	edx,eax
152	mov	DWORD [8+esp],eax
153	rol	edx,8
154	mov	eax,DWORD [12+esp]
155	add	ecx,edx
156	mov	DWORD [56+esp],edx
157	xor	ebx,ecx
158	add	eax,ebp
159	rol	ebx,7
160	xor	edi,eax
161	rol	edi,16
162	mov	DWORD [24+esp],ebx
163	add	esi,edi
164	xor	ebp,esi
165	rol	ebp,12
166	mov	ebx,DWORD [20+esp]
167	add	eax,ebp
168	xor	edi,eax
169	mov	DWORD [12+esp],eax
170	rol	edi,8
171	mov	eax,DWORD [esp]
172	add	esi,edi
173	mov	edx,edi
174	xor	ebp,esi
175	add	eax,ebx
176	rol	ebp,7
177	xor	edx,eax
178	rol	edx,16
179	mov	DWORD [28+esp],ebp
180	add	ecx,edx
181	xor	ebx,ecx
182	mov	edi,DWORD [48+esp]
183	rol	ebx,12
184	mov	ebp,DWORD [24+esp]
185	add	eax,ebx
186	xor	edx,eax
187	mov	DWORD [esp],eax
188	rol	edx,8
189	mov	eax,DWORD [4+esp]
190	add	ecx,edx
191	mov	DWORD [60+esp],edx
192	xor	ebx,ecx
193	add	eax,ebp
194	rol	ebx,7
195	xor	edi,eax
196	mov	DWORD [40+esp],ecx
197	rol	edi,16
198	mov	DWORD [20+esp],ebx
199	add	esi,edi
200	mov	ecx,DWORD [32+esp]
201	xor	ebp,esi
202	mov	edx,DWORD [52+esp]
203	rol	ebp,12
204	mov	ebx,DWORD [28+esp]
205	add	eax,ebp
206	xor	edi,eax
207	mov	DWORD [4+esp],eax
208	rol	edi,8
209	mov	eax,DWORD [8+esp]
210	add	esi,edi
211	mov	DWORD [48+esp],edi
212	xor	ebp,esi
213	add	eax,ebx
214	rol	ebp,7
215	xor	edx,eax
216	mov	DWORD [44+esp],esi
217	rol	edx,16
218	mov	DWORD [24+esp],ebp
219	add	ecx,edx
220	mov	esi,DWORD [36+esp]
221	xor	ebx,ecx
222	mov	edi,DWORD [56+esp]
223	rol	ebx,12
224	mov	ebp,DWORD [16+esp]
225	add	eax,ebx
226	xor	edx,eax
227	mov	DWORD [8+esp],eax
228	rol	edx,8
229	mov	eax,DWORD [12+esp]
230	add	ecx,edx
231	mov	DWORD [52+esp],edx
232	xor	ebx,ecx
233	add	eax,ebp
234	rol	ebx,7
235	xor	edi,eax
236	rol	edi,16
237	mov	DWORD [28+esp],ebx
238	add	esi,edi
239	xor	ebp,esi
240	mov	edx,DWORD [48+esp]
241	rol	ebp,12
242	mov	ebx,DWORD [128+esp]
243	add	eax,ebp
244	xor	edi,eax
245	mov	DWORD [12+esp],eax
246	rol	edi,8
247	mov	eax,DWORD [esp]
248	add	esi,edi
249	mov	DWORD [56+esp],edi
250	xor	ebp,esi
251	rol	ebp,7
252	dec	ebx
253	jnz	NEAR L$004loop
254	mov	ebx,DWORD [160+esp]
255	add	eax,1634760805
256	add	ebp,DWORD [80+esp]
257	add	ecx,DWORD [96+esp]
258	add	esi,DWORD [100+esp]
259	cmp	ebx,64
260	jb	NEAR L$005tail
261	mov	ebx,DWORD [156+esp]
262	add	edx,DWORD [112+esp]
263	add	edi,DWORD [120+esp]
264	xor	eax,DWORD [ebx]
265	xor	ebp,DWORD [16+ebx]
266	mov	DWORD [esp],eax
267	mov	eax,DWORD [152+esp]
268	xor	ecx,DWORD [32+ebx]
269	xor	esi,DWORD [36+ebx]
270	xor	edx,DWORD [48+ebx]
271	xor	edi,DWORD [56+ebx]
272	mov	DWORD [16+eax],ebp
273	mov	DWORD [32+eax],ecx
274	mov	DWORD [36+eax],esi
275	mov	DWORD [48+eax],edx
276	mov	DWORD [56+eax],edi
277	mov	ebp,DWORD [4+esp]
278	mov	ecx,DWORD [8+esp]
279	mov	esi,DWORD [12+esp]
280	mov	edx,DWORD [20+esp]
281	mov	edi,DWORD [24+esp]
282	add	ebp,857760878
283	add	ecx,2036477234
284	add	esi,1797285236
285	add	edx,DWORD [84+esp]
286	add	edi,DWORD [88+esp]
287	xor	ebp,DWORD [4+ebx]
288	xor	ecx,DWORD [8+ebx]
289	xor	esi,DWORD [12+ebx]
290	xor	edx,DWORD [20+ebx]
291	xor	edi,DWORD [24+ebx]
292	mov	DWORD [4+eax],ebp
293	mov	DWORD [8+eax],ecx
294	mov	DWORD [12+eax],esi
295	mov	DWORD [20+eax],edx
296	mov	DWORD [24+eax],edi
297	mov	ebp,DWORD [28+esp]
298	mov	ecx,DWORD [40+esp]
299	mov	esi,DWORD [44+esp]
300	mov	edx,DWORD [52+esp]
301	mov	edi,DWORD [60+esp]
302	add	ebp,DWORD [92+esp]
303	add	ecx,DWORD [104+esp]
304	add	esi,DWORD [108+esp]
305	add	edx,DWORD [116+esp]
306	add	edi,DWORD [124+esp]
307	xor	ebp,DWORD [28+ebx]
308	xor	ecx,DWORD [40+ebx]
309	xor	esi,DWORD [44+ebx]
310	xor	edx,DWORD [52+ebx]
311	xor	edi,DWORD [60+ebx]
312	lea	ebx,[64+ebx]
313	mov	DWORD [28+eax],ebp
314	mov	ebp,DWORD [esp]
315	mov	DWORD [40+eax],ecx
316	mov	ecx,DWORD [160+esp]
317	mov	DWORD [44+eax],esi
318	mov	DWORD [52+eax],edx
319	mov	DWORD [60+eax],edi
320	mov	DWORD [eax],ebp
321	lea	eax,[64+eax]
322	sub	ecx,64
323	jnz	NEAR L$003outer_loop
324	jmp	NEAR L$006done
325L$005tail:
326	add	edx,DWORD [112+esp]
327	add	edi,DWORD [120+esp]
328	mov	DWORD [esp],eax
329	mov	DWORD [16+esp],ebp
330	mov	DWORD [32+esp],ecx
331	mov	DWORD [36+esp],esi
332	mov	DWORD [48+esp],edx
333	mov	DWORD [56+esp],edi
334	mov	ebp,DWORD [4+esp]
335	mov	ecx,DWORD [8+esp]
336	mov	esi,DWORD [12+esp]
337	mov	edx,DWORD [20+esp]
338	mov	edi,DWORD [24+esp]
339	add	ebp,857760878
340	add	ecx,2036477234
341	add	esi,1797285236
342	add	edx,DWORD [84+esp]
343	add	edi,DWORD [88+esp]
344	mov	DWORD [4+esp],ebp
345	mov	DWORD [8+esp],ecx
346	mov	DWORD [12+esp],esi
347	mov	DWORD [20+esp],edx
348	mov	DWORD [24+esp],edi
349	mov	ebp,DWORD [28+esp]
350	mov	ecx,DWORD [40+esp]
351	mov	esi,DWORD [44+esp]
352	mov	edx,DWORD [52+esp]
353	mov	edi,DWORD [60+esp]
354	add	ebp,DWORD [92+esp]
355	add	ecx,DWORD [104+esp]
356	add	esi,DWORD [108+esp]
357	add	edx,DWORD [116+esp]
358	add	edi,DWORD [124+esp]
359	mov	DWORD [28+esp],ebp
360	mov	ebp,DWORD [156+esp]
361	mov	DWORD [40+esp],ecx
362	mov	ecx,DWORD [152+esp]
363	mov	DWORD [44+esp],esi
364	xor	esi,esi
365	mov	DWORD [52+esp],edx
366	mov	DWORD [60+esp],edi
367	xor	eax,eax
368	xor	edx,edx
369L$007tail_loop:
370	mov	al,BYTE [ebp*1+esi]
371	mov	dl,BYTE [esi*1+esp]
372	lea	esi,[1+esi]
373	xor	al,dl
374	mov	BYTE [esi*1+ecx-1],al
375	dec	ebx
376	jnz	NEAR L$007tail_loop
377L$006done:
378	add	esp,132
379L$000no_data:
380	pop	edi
381	pop	esi
382	pop	ebx
383	pop	ebp
384	ret
385align	16
386__ChaCha20_ssse3:
387	push	ebp
388	push	ebx
389	push	esi
390	push	edi
391L$ssse3_shortcut:
392	mov	edi,DWORD [20+esp]
393	mov	esi,DWORD [24+esp]
394	mov	ecx,DWORD [28+esp]
395	mov	edx,DWORD [32+esp]
396	mov	ebx,DWORD [36+esp]
397	mov	ebp,esp
398	sub	esp,524
399	and	esp,-64
400	mov	DWORD [512+esp],ebp
401	lea	eax,[(L$ssse3_data-L$pic_point)+eax]
402	movdqu	xmm3,[ebx]
403	cmp	ecx,256
404	jb	NEAR L$0081x
405	mov	DWORD [516+esp],edx
406	mov	DWORD [520+esp],ebx
407	sub	ecx,256
408	lea	ebp,[384+esp]
409	movdqu	xmm7,[edx]
410	pshufd	xmm0,xmm3,0
411	pshufd	xmm1,xmm3,85
412	pshufd	xmm2,xmm3,170
413	pshufd	xmm3,xmm3,255
414	paddd	xmm0,[48+eax]
415	pshufd	xmm4,xmm7,0
416	pshufd	xmm5,xmm7,85
417	psubd	xmm0,[64+eax]
418	pshufd	xmm6,xmm7,170
419	pshufd	xmm7,xmm7,255
420	movdqa	[64+ebp],xmm0
421	movdqa	[80+ebp],xmm1
422	movdqa	[96+ebp],xmm2
423	movdqa	[112+ebp],xmm3
424	movdqu	xmm3,[16+edx]
425	movdqa	[ebp-64],xmm4
426	movdqa	[ebp-48],xmm5
427	movdqa	[ebp-32],xmm6
428	movdqa	[ebp-16],xmm7
429	movdqa	xmm7,[32+eax]
430	lea	ebx,[128+esp]
431	pshufd	xmm0,xmm3,0
432	pshufd	xmm1,xmm3,85
433	pshufd	xmm2,xmm3,170
434	pshufd	xmm3,xmm3,255
435	pshufd	xmm4,xmm7,0
436	pshufd	xmm5,xmm7,85
437	pshufd	xmm6,xmm7,170
438	pshufd	xmm7,xmm7,255
439	movdqa	[ebp],xmm0
440	movdqa	[16+ebp],xmm1
441	movdqa	[32+ebp],xmm2
442	movdqa	[48+ebp],xmm3
443	movdqa	[ebp-128],xmm4
444	movdqa	[ebp-112],xmm5
445	movdqa	[ebp-96],xmm6
446	movdqa	[ebp-80],xmm7
447	lea	esi,[128+esi]
448	lea	edi,[128+edi]
449	jmp	NEAR L$009outer_loop
450align	16
451L$009outer_loop:
452	movdqa	xmm1,[ebp-112]
453	movdqa	xmm2,[ebp-96]
454	movdqa	xmm3,[ebp-80]
455	movdqa	xmm5,[ebp-48]
456	movdqa	xmm6,[ebp-32]
457	movdqa	xmm7,[ebp-16]
458	movdqa	[ebx-112],xmm1
459	movdqa	[ebx-96],xmm2
460	movdqa	[ebx-80],xmm3
461	movdqa	[ebx-48],xmm5
462	movdqa	[ebx-32],xmm6
463	movdqa	[ebx-16],xmm7
464	movdqa	xmm2,[32+ebp]
465	movdqa	xmm3,[48+ebp]
466	movdqa	xmm4,[64+ebp]
467	movdqa	xmm5,[80+ebp]
468	movdqa	xmm6,[96+ebp]
469	movdqa	xmm7,[112+ebp]
470	paddd	xmm4,[64+eax]
471	movdqa	[32+ebx],xmm2
472	movdqa	[48+ebx],xmm3
473	movdqa	[64+ebx],xmm4
474	movdqa	[80+ebx],xmm5
475	movdqa	[96+ebx],xmm6
476	movdqa	[112+ebx],xmm7
477	movdqa	[64+ebp],xmm4
478	movdqa	xmm0,[ebp-128]
479	movdqa	xmm6,xmm4
480	movdqa	xmm3,[ebp-64]
481	movdqa	xmm4,[ebp]
482	movdqa	xmm5,[16+ebp]
483	mov	edx,10
484	nop
485align	16
486L$010loop:
487	paddd	xmm0,xmm3
488	movdqa	xmm2,xmm3
489	pxor	xmm6,xmm0
490	pshufb	xmm6,[eax]
491	paddd	xmm4,xmm6
492	pxor	xmm2,xmm4
493	movdqa	xmm3,[ebx-48]
494	movdqa	xmm1,xmm2
495	pslld	xmm2,12
496	psrld	xmm1,20
497	por	xmm2,xmm1
498	movdqa	xmm1,[ebx-112]
499	paddd	xmm0,xmm2
500	movdqa	xmm7,[80+ebx]
501	pxor	xmm6,xmm0
502	movdqa	[ebx-128],xmm0
503	pshufb	xmm6,[16+eax]
504	paddd	xmm4,xmm6
505	movdqa	[64+ebx],xmm6
506	pxor	xmm2,xmm4
507	paddd	xmm1,xmm3
508	movdqa	xmm0,xmm2
509	pslld	xmm2,7
510	psrld	xmm0,25
511	pxor	xmm7,xmm1
512	por	xmm2,xmm0
513	movdqa	[ebx],xmm4
514	pshufb	xmm7,[eax]
515	movdqa	[ebx-64],xmm2
516	paddd	xmm5,xmm7
517	movdqa	xmm4,[32+ebx]
518	pxor	xmm3,xmm5
519	movdqa	xmm2,[ebx-32]
520	movdqa	xmm0,xmm3
521	pslld	xmm3,12
522	psrld	xmm0,20
523	por	xmm3,xmm0
524	movdqa	xmm0,[ebx-96]
525	paddd	xmm1,xmm3
526	movdqa	xmm6,[96+ebx]
527	pxor	xmm7,xmm1
528	movdqa	[ebx-112],xmm1
529	pshufb	xmm7,[16+eax]
530	paddd	xmm5,xmm7
531	movdqa	[80+ebx],xmm7
532	pxor	xmm3,xmm5
533	paddd	xmm0,xmm2
534	movdqa	xmm1,xmm3
535	pslld	xmm3,7
536	psrld	xmm1,25
537	pxor	xmm6,xmm0
538	por	xmm3,xmm1
539	movdqa	[16+ebx],xmm5
540	pshufb	xmm6,[eax]
541	movdqa	[ebx-48],xmm3
542	paddd	xmm4,xmm6
543	movdqa	xmm5,[48+ebx]
544	pxor	xmm2,xmm4
545	movdqa	xmm3,[ebx-16]
546	movdqa	xmm1,xmm2
547	pslld	xmm2,12
548	psrld	xmm1,20
549	por	xmm2,xmm1
550	movdqa	xmm1,[ebx-80]
551	paddd	xmm0,xmm2
552	movdqa	xmm7,[112+ebx]
553	pxor	xmm6,xmm0
554	movdqa	[ebx-96],xmm0
555	pshufb	xmm6,[16+eax]
556	paddd	xmm4,xmm6
557	movdqa	[96+ebx],xmm6
558	pxor	xmm2,xmm4
559	paddd	xmm1,xmm3
560	movdqa	xmm0,xmm2
561	pslld	xmm2,7
562	psrld	xmm0,25
563	pxor	xmm7,xmm1
564	por	xmm2,xmm0
565	pshufb	xmm7,[eax]
566	movdqa	[ebx-32],xmm2
567	paddd	xmm5,xmm7
568	pxor	xmm3,xmm5
569	movdqa	xmm2,[ebx-48]
570	movdqa	xmm0,xmm3
571	pslld	xmm3,12
572	psrld	xmm0,20
573	por	xmm3,xmm0
574	movdqa	xmm0,[ebx-128]
575	paddd	xmm1,xmm3
576	pxor	xmm7,xmm1
577	movdqa	[ebx-80],xmm1
578	pshufb	xmm7,[16+eax]
579	paddd	xmm5,xmm7
580	movdqa	xmm6,xmm7
581	pxor	xmm3,xmm5
582	paddd	xmm0,xmm2
583	movdqa	xmm1,xmm3
584	pslld	xmm3,7
585	psrld	xmm1,25
586	pxor	xmm6,xmm0
587	por	xmm3,xmm1
588	pshufb	xmm6,[eax]
589	movdqa	[ebx-16],xmm3
590	paddd	xmm4,xmm6
591	pxor	xmm2,xmm4
592	movdqa	xmm3,[ebx-32]
593	movdqa	xmm1,xmm2
594	pslld	xmm2,12
595	psrld	xmm1,20
596	por	xmm2,xmm1
597	movdqa	xmm1,[ebx-112]
598	paddd	xmm0,xmm2
599	movdqa	xmm7,[64+ebx]
600	pxor	xmm6,xmm0
601	movdqa	[ebx-128],xmm0
602	pshufb	xmm6,[16+eax]
603	paddd	xmm4,xmm6
604	movdqa	[112+ebx],xmm6
605	pxor	xmm2,xmm4
606	paddd	xmm1,xmm3
607	movdqa	xmm0,xmm2
608	pslld	xmm2,7
609	psrld	xmm0,25
610	pxor	xmm7,xmm1
611	por	xmm2,xmm0
612	movdqa	[32+ebx],xmm4
613	pshufb	xmm7,[eax]
614	movdqa	[ebx-48],xmm2
615	paddd	xmm5,xmm7
616	movdqa	xmm4,[ebx]
617	pxor	xmm3,xmm5
618	movdqa	xmm2,[ebx-16]
619	movdqa	xmm0,xmm3
620	pslld	xmm3,12
621	psrld	xmm0,20
622	por	xmm3,xmm0
623	movdqa	xmm0,[ebx-96]
624	paddd	xmm1,xmm3
625	movdqa	xmm6,[80+ebx]
626	pxor	xmm7,xmm1
627	movdqa	[ebx-112],xmm1
628	pshufb	xmm7,[16+eax]
629	paddd	xmm5,xmm7
630	movdqa	[64+ebx],xmm7
631	pxor	xmm3,xmm5
632	paddd	xmm0,xmm2
633	movdqa	xmm1,xmm3
634	pslld	xmm3,7
635	psrld	xmm1,25
636	pxor	xmm6,xmm0
637	por	xmm3,xmm1
638	movdqa	[48+ebx],xmm5
639	pshufb	xmm6,[eax]
640	movdqa	[ebx-32],xmm3
641	paddd	xmm4,xmm6
642	movdqa	xmm5,[16+ebx]
643	pxor	xmm2,xmm4
644	movdqa	xmm3,[ebx-64]
645	movdqa	xmm1,xmm2
646	pslld	xmm2,12
647	psrld	xmm1,20
648	por	xmm2,xmm1
649	movdqa	xmm1,[ebx-80]
650	paddd	xmm0,xmm2
651	movdqa	xmm7,[96+ebx]
652	pxor	xmm6,xmm0
653	movdqa	[ebx-96],xmm0
654	pshufb	xmm6,[16+eax]
655	paddd	xmm4,xmm6
656	movdqa	[80+ebx],xmm6
657	pxor	xmm2,xmm4
658	paddd	xmm1,xmm3
659	movdqa	xmm0,xmm2
660	pslld	xmm2,7
661	psrld	xmm0,25
662	pxor	xmm7,xmm1
663	por	xmm2,xmm0
664	pshufb	xmm7,[eax]
665	movdqa	[ebx-16],xmm2
666	paddd	xmm5,xmm7
667	pxor	xmm3,xmm5
668	movdqa	xmm0,xmm3
669	pslld	xmm3,12
670	psrld	xmm0,20
671	por	xmm3,xmm0
672	movdqa	xmm0,[ebx-128]
673	paddd	xmm1,xmm3
674	movdqa	xmm6,[64+ebx]
675	pxor	xmm7,xmm1
676	movdqa	[ebx-80],xmm1
677	pshufb	xmm7,[16+eax]
678	paddd	xmm5,xmm7
679	movdqa	[96+ebx],xmm7
680	pxor	xmm3,xmm5
681	movdqa	xmm1,xmm3
682	pslld	xmm3,7
683	psrld	xmm1,25
684	por	xmm3,xmm1
685	dec	edx
686	jnz	NEAR L$010loop
687	movdqa	[ebx-64],xmm3
688	movdqa	[ebx],xmm4
689	movdqa	[16+ebx],xmm5
690	movdqa	[64+ebx],xmm6
691	movdqa	[96+ebx],xmm7
692	movdqa	xmm1,[ebx-112]
693	movdqa	xmm2,[ebx-96]
694	movdqa	xmm3,[ebx-80]
695	paddd	xmm0,[ebp-128]
696	paddd	xmm1,[ebp-112]
697	paddd	xmm2,[ebp-96]
698	paddd	xmm3,[ebp-80]
699	movdqa	xmm6,xmm0
700	punpckldq	xmm0,xmm1
701	movdqa	xmm7,xmm2
702	punpckldq	xmm2,xmm3
703	punpckhdq	xmm6,xmm1
704	punpckhdq	xmm7,xmm3
705	movdqa	xmm1,xmm0
706	punpcklqdq	xmm0,xmm2
707	movdqa	xmm3,xmm6
708	punpcklqdq	xmm6,xmm7
709	punpckhqdq	xmm1,xmm2
710	punpckhqdq	xmm3,xmm7
711	movdqu	xmm4,[esi-128]
712	movdqu	xmm5,[esi-64]
713	movdqu	xmm2,[esi]
714	movdqu	xmm7,[64+esi]
715	lea	esi,[16+esi]
716	pxor	xmm4,xmm0
717	movdqa	xmm0,[ebx-64]
718	pxor	xmm5,xmm1
719	movdqa	xmm1,[ebx-48]
720	pxor	xmm6,xmm2
721	movdqa	xmm2,[ebx-32]
722	pxor	xmm7,xmm3
723	movdqa	xmm3,[ebx-16]
724	movdqu	[edi-128],xmm4
725	movdqu	[edi-64],xmm5
726	movdqu	[edi],xmm6
727	movdqu	[64+edi],xmm7
728	lea	edi,[16+edi]
729	paddd	xmm0,[ebp-64]
730	paddd	xmm1,[ebp-48]
731	paddd	xmm2,[ebp-32]
732	paddd	xmm3,[ebp-16]
733	movdqa	xmm6,xmm0
734	punpckldq	xmm0,xmm1
735	movdqa	xmm7,xmm2
736	punpckldq	xmm2,xmm3
737	punpckhdq	xmm6,xmm1
738	punpckhdq	xmm7,xmm3
739	movdqa	xmm1,xmm0
740	punpcklqdq	xmm0,xmm2
741	movdqa	xmm3,xmm6
742	punpcklqdq	xmm6,xmm7
743	punpckhqdq	xmm1,xmm2
744	punpckhqdq	xmm3,xmm7
745	movdqu	xmm4,[esi-128]
746	movdqu	xmm5,[esi-64]
747	movdqu	xmm2,[esi]
748	movdqu	xmm7,[64+esi]
749	lea	esi,[16+esi]
750	pxor	xmm4,xmm0
751	movdqa	xmm0,[ebx]
752	pxor	xmm5,xmm1
753	movdqa	xmm1,[16+ebx]
754	pxor	xmm6,xmm2
755	movdqa	xmm2,[32+ebx]
756	pxor	xmm7,xmm3
757	movdqa	xmm3,[48+ebx]
758	movdqu	[edi-128],xmm4
759	movdqu	[edi-64],xmm5
760	movdqu	[edi],xmm6
761	movdqu	[64+edi],xmm7
762	lea	edi,[16+edi]
763	paddd	xmm0,[ebp]
764	paddd	xmm1,[16+ebp]
765	paddd	xmm2,[32+ebp]
766	paddd	xmm3,[48+ebp]
767	movdqa	xmm6,xmm0
768	punpckldq	xmm0,xmm1
769	movdqa	xmm7,xmm2
770	punpckldq	xmm2,xmm3
771	punpckhdq	xmm6,xmm1
772	punpckhdq	xmm7,xmm3
773	movdqa	xmm1,xmm0
774	punpcklqdq	xmm0,xmm2
775	movdqa	xmm3,xmm6
776	punpcklqdq	xmm6,xmm7
777	punpckhqdq	xmm1,xmm2
778	punpckhqdq	xmm3,xmm7
779	movdqu	xmm4,[esi-128]
780	movdqu	xmm5,[esi-64]
781	movdqu	xmm2,[esi]
782	movdqu	xmm7,[64+esi]
783	lea	esi,[16+esi]
784	pxor	xmm4,xmm0
785	movdqa	xmm0,[64+ebx]
786	pxor	xmm5,xmm1
787	movdqa	xmm1,[80+ebx]
788	pxor	xmm6,xmm2
789	movdqa	xmm2,[96+ebx]
790	pxor	xmm7,xmm3
791	movdqa	xmm3,[112+ebx]
792	movdqu	[edi-128],xmm4
793	movdqu	[edi-64],xmm5
794	movdqu	[edi],xmm6
795	movdqu	[64+edi],xmm7
796	lea	edi,[16+edi]
797	paddd	xmm0,[64+ebp]
798	paddd	xmm1,[80+ebp]
799	paddd	xmm2,[96+ebp]
800	paddd	xmm3,[112+ebp]
801	movdqa	xmm6,xmm0
802	punpckldq	xmm0,xmm1
803	movdqa	xmm7,xmm2
804	punpckldq	xmm2,xmm3
805	punpckhdq	xmm6,xmm1
806	punpckhdq	xmm7,xmm3
807	movdqa	xmm1,xmm0
808	punpcklqdq	xmm0,xmm2
809	movdqa	xmm3,xmm6
810	punpcklqdq	xmm6,xmm7
811	punpckhqdq	xmm1,xmm2
812	punpckhqdq	xmm3,xmm7
813	movdqu	xmm4,[esi-128]
814	movdqu	xmm5,[esi-64]
815	movdqu	xmm2,[esi]
816	movdqu	xmm7,[64+esi]
817	lea	esi,[208+esi]
818	pxor	xmm4,xmm0
819	pxor	xmm5,xmm1
820	pxor	xmm6,xmm2
821	pxor	xmm7,xmm3
822	movdqu	[edi-128],xmm4
823	movdqu	[edi-64],xmm5
824	movdqu	[edi],xmm6
825	movdqu	[64+edi],xmm7
826	lea	edi,[208+edi]
827	sub	ecx,256
828	jnc	NEAR L$009outer_loop
829	add	ecx,256
830	jz	NEAR L$011done
831	mov	ebx,DWORD [520+esp]
832	lea	esi,[esi-128]
833	mov	edx,DWORD [516+esp]
834	lea	edi,[edi-128]
835	movd	xmm2,DWORD [64+ebp]
836	movdqu	xmm3,[ebx]
837	paddd	xmm2,[96+eax]
838	pand	xmm3,[112+eax]
839	por	xmm3,xmm2
840L$0081x:
841	movdqa	xmm0,[32+eax]
842	movdqu	xmm1,[edx]
843	movdqu	xmm2,[16+edx]
844	movdqa	xmm6,[eax]
845	movdqa	xmm7,[16+eax]
846	mov	DWORD [48+esp],ebp
847	movdqa	[esp],xmm0
848	movdqa	[16+esp],xmm1
849	movdqa	[32+esp],xmm2
850	movdqa	[48+esp],xmm3
851	mov	edx,10
852	jmp	NEAR L$012loop1x
853align	16
854L$013outer1x:
855	movdqa	xmm3,[80+eax]
856	movdqa	xmm0,[esp]
857	movdqa	xmm1,[16+esp]
858	movdqa	xmm2,[32+esp]
859	paddd	xmm3,[48+esp]
860	mov	edx,10
861	movdqa	[48+esp],xmm3
862	jmp	NEAR L$012loop1x
863align	16
864L$012loop1x:
865	paddd	xmm0,xmm1
866	pxor	xmm3,xmm0
867db	102,15,56,0,222
868	paddd	xmm2,xmm3
869	pxor	xmm1,xmm2
870	movdqa	xmm4,xmm1
871	psrld	xmm1,20
872	pslld	xmm4,12
873	por	xmm1,xmm4
874	paddd	xmm0,xmm1
875	pxor	xmm3,xmm0
876db	102,15,56,0,223
877	paddd	xmm2,xmm3
878	pxor	xmm1,xmm2
879	movdqa	xmm4,xmm1
880	psrld	xmm1,25
881	pslld	xmm4,7
882	por	xmm1,xmm4
883	pshufd	xmm2,xmm2,78
884	pshufd	xmm1,xmm1,57
885	pshufd	xmm3,xmm3,147
886	nop
887	paddd	xmm0,xmm1
888	pxor	xmm3,xmm0
889db	102,15,56,0,222
890	paddd	xmm2,xmm3
891	pxor	xmm1,xmm2
892	movdqa	xmm4,xmm1
893	psrld	xmm1,20
894	pslld	xmm4,12
895	por	xmm1,xmm4
896	paddd	xmm0,xmm1
897	pxor	xmm3,xmm0
898db	102,15,56,0,223
899	paddd	xmm2,xmm3
900	pxor	xmm1,xmm2
901	movdqa	xmm4,xmm1
902	psrld	xmm1,25
903	pslld	xmm4,7
904	por	xmm1,xmm4
905	pshufd	xmm2,xmm2,78
906	pshufd	xmm1,xmm1,147
907	pshufd	xmm3,xmm3,57
908	dec	edx
909	jnz	NEAR L$012loop1x
910	paddd	xmm0,[esp]
911	paddd	xmm1,[16+esp]
912	paddd	xmm2,[32+esp]
913	paddd	xmm3,[48+esp]
914	cmp	ecx,64
915	jb	NEAR L$014tail
916	movdqu	xmm4,[esi]
917	movdqu	xmm5,[16+esi]
918	pxor	xmm0,xmm4
919	movdqu	xmm4,[32+esi]
920	pxor	xmm1,xmm5
921	movdqu	xmm5,[48+esi]
922	pxor	xmm2,xmm4
923	pxor	xmm3,xmm5
924	lea	esi,[64+esi]
925	movdqu	[edi],xmm0
926	movdqu	[16+edi],xmm1
927	movdqu	[32+edi],xmm2
928	movdqu	[48+edi],xmm3
929	lea	edi,[64+edi]
930	sub	ecx,64
931	jnz	NEAR L$013outer1x
932	jmp	NEAR L$011done
933L$014tail:
934	movdqa	[esp],xmm0
935	movdqa	[16+esp],xmm1
936	movdqa	[32+esp],xmm2
937	movdqa	[48+esp],xmm3
938	xor	eax,eax
939	xor	edx,edx
940	xor	ebp,ebp
941L$015tail_loop:
942	mov	al,BYTE [ebp*1+esp]
943	mov	dl,BYTE [ebp*1+esi]
944	lea	ebp,[1+ebp]
945	xor	al,dl
946	mov	BYTE [ebp*1+edi-1],al
947	dec	ecx
948	jnz	NEAR L$015tail_loop
949L$011done:
950	mov	esp,DWORD [512+esp]
951	pop	edi
952	pop	esi
953	pop	ebx
954	pop	ebp
955	ret
956align	64
957L$ssse3_data:
958db	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
959db	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
960dd	1634760805,857760878,2036477234,1797285236
961dd	0,1,2,3
962dd	4,4,4,4
963dd	1,0,0,0
964dd	4,0,0,0
965dd	0,-1,-1,-1
966align	64
967db	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
968db	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
969db	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
970db	114,103,62,0
971segment	.bss
972common	_OPENSSL_ia32cap_P 16
973%else
974; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
975ret
976%endif
977