1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifdef BORINGSSL_PREFIX
5%include "boringssl_prefix_symbols_nasm.inc"
6%endif
7%ifidn __OUTPUT_FORMAT__,obj
8section	code	use32 class=code align=64
9%elifidn __OUTPUT_FORMAT__,win32
10$@feat.00 equ 1
11section	.text	code align=64
12%else
13section	.text	code
14%endif
15global	_ChaCha20_ctr32
16align	16
17_ChaCha20_ctr32:
18L$_ChaCha20_ctr32_begin:
19	push	ebp
20	push	ebx
21	push	esi
22	push	edi
23	xor	eax,eax
24	cmp	eax,DWORD [28+esp]
25	je	NEAR L$000no_data
26	call	L$pic_point
27L$pic_point:
28	pop	eax
29	lea	ebp,[_OPENSSL_ia32cap_P]
30	test	DWORD [ebp],16777216
31	jz	NEAR L$001x86
32	test	DWORD [4+ebp],512
33	jz	NEAR L$001x86
34	jmp	NEAR L$ssse3_shortcut
35L$001x86:
36	mov	esi,DWORD [32+esp]
37	mov	edi,DWORD [36+esp]
38	sub	esp,132
39	mov	eax,DWORD [esi]
40	mov	ebx,DWORD [4+esi]
41	mov	ecx,DWORD [8+esi]
42	mov	edx,DWORD [12+esi]
43	mov	DWORD [80+esp],eax
44	mov	DWORD [84+esp],ebx
45	mov	DWORD [88+esp],ecx
46	mov	DWORD [92+esp],edx
47	mov	eax,DWORD [16+esi]
48	mov	ebx,DWORD [20+esi]
49	mov	ecx,DWORD [24+esi]
50	mov	edx,DWORD [28+esi]
51	mov	DWORD [96+esp],eax
52	mov	DWORD [100+esp],ebx
53	mov	DWORD [104+esp],ecx
54	mov	DWORD [108+esp],edx
55	mov	eax,DWORD [edi]
56	mov	ebx,DWORD [4+edi]
57	mov	ecx,DWORD [8+edi]
58	mov	edx,DWORD [12+edi]
59	sub	eax,1
60	mov	DWORD [112+esp],eax
61	mov	DWORD [116+esp],ebx
62	mov	DWORD [120+esp],ecx
63	mov	DWORD [124+esp],edx
64	jmp	NEAR L$002entry
65align	16
66L$003outer_loop:
67	mov	DWORD [156+esp],ebx
68	mov	DWORD [152+esp],eax
69	mov	DWORD [160+esp],ecx
70L$002entry:
71	mov	eax,1634760805
72	mov	DWORD [4+esp],857760878
73	mov	DWORD [8+esp],2036477234
74	mov	DWORD [12+esp],1797285236
75	mov	ebx,DWORD [84+esp]
76	mov	ebp,DWORD [88+esp]
77	mov	ecx,DWORD [104+esp]
78	mov	esi,DWORD [108+esp]
79	mov	edx,DWORD [116+esp]
80	mov	edi,DWORD [120+esp]
81	mov	DWORD [20+esp],ebx
82	mov	DWORD [24+esp],ebp
83	mov	DWORD [40+esp],ecx
84	mov	DWORD [44+esp],esi
85	mov	DWORD [52+esp],edx
86	mov	DWORD [56+esp],edi
87	mov	ebx,DWORD [92+esp]
88	mov	edi,DWORD [124+esp]
89	mov	edx,DWORD [112+esp]
90	mov	ebp,DWORD [80+esp]
91	mov	ecx,DWORD [96+esp]
92	mov	esi,DWORD [100+esp]
93	add	edx,1
94	mov	DWORD [28+esp],ebx
95	mov	DWORD [60+esp],edi
96	mov	DWORD [112+esp],edx
97	mov	ebx,10
98	jmp	NEAR L$004loop
99align	16
100L$004loop:
101	add	eax,ebp
102	mov	DWORD [128+esp],ebx
103	mov	ebx,ebp
104	xor	edx,eax
105	rol	edx,16
106	add	ecx,edx
107	xor	ebx,ecx
108	mov	edi,DWORD [52+esp]
109	rol	ebx,12
110	mov	ebp,DWORD [20+esp]
111	add	eax,ebx
112	xor	edx,eax
113	mov	DWORD [esp],eax
114	rol	edx,8
115	mov	eax,DWORD [4+esp]
116	add	ecx,edx
117	mov	DWORD [48+esp],edx
118	xor	ebx,ecx
119	add	eax,ebp
120	rol	ebx,7
121	xor	edi,eax
122	mov	DWORD [32+esp],ecx
123	rol	edi,16
124	mov	DWORD [16+esp],ebx
125	add	esi,edi
126	mov	ecx,DWORD [40+esp]
127	xor	ebp,esi
128	mov	edx,DWORD [56+esp]
129	rol	ebp,12
130	mov	ebx,DWORD [24+esp]
131	add	eax,ebp
132	xor	edi,eax
133	mov	DWORD [4+esp],eax
134	rol	edi,8
135	mov	eax,DWORD [8+esp]
136	add	esi,edi
137	mov	DWORD [52+esp],edi
138	xor	ebp,esi
139	add	eax,ebx
140	rol	ebp,7
141	xor	edx,eax
142	mov	DWORD [36+esp],esi
143	rol	edx,16
144	mov	DWORD [20+esp],ebp
145	add	ecx,edx
146	mov	esi,DWORD [44+esp]
147	xor	ebx,ecx
148	mov	edi,DWORD [60+esp]
149	rol	ebx,12
150	mov	ebp,DWORD [28+esp]
151	add	eax,ebx
152	xor	edx,eax
153	mov	DWORD [8+esp],eax
154	rol	edx,8
155	mov	eax,DWORD [12+esp]
156	add	ecx,edx
157	mov	DWORD [56+esp],edx
158	xor	ebx,ecx
159	add	eax,ebp
160	rol	ebx,7
161	xor	edi,eax
162	rol	edi,16
163	mov	DWORD [24+esp],ebx
164	add	esi,edi
165	xor	ebp,esi
166	rol	ebp,12
167	mov	ebx,DWORD [20+esp]
168	add	eax,ebp
169	xor	edi,eax
170	mov	DWORD [12+esp],eax
171	rol	edi,8
172	mov	eax,DWORD [esp]
173	add	esi,edi
174	mov	edx,edi
175	xor	ebp,esi
176	add	eax,ebx
177	rol	ebp,7
178	xor	edx,eax
179	rol	edx,16
180	mov	DWORD [28+esp],ebp
181	add	ecx,edx
182	xor	ebx,ecx
183	mov	edi,DWORD [48+esp]
184	rol	ebx,12
185	mov	ebp,DWORD [24+esp]
186	add	eax,ebx
187	xor	edx,eax
188	mov	DWORD [esp],eax
189	rol	edx,8
190	mov	eax,DWORD [4+esp]
191	add	ecx,edx
192	mov	DWORD [60+esp],edx
193	xor	ebx,ecx
194	add	eax,ebp
195	rol	ebx,7
196	xor	edi,eax
197	mov	DWORD [40+esp],ecx
198	rol	edi,16
199	mov	DWORD [20+esp],ebx
200	add	esi,edi
201	mov	ecx,DWORD [32+esp]
202	xor	ebp,esi
203	mov	edx,DWORD [52+esp]
204	rol	ebp,12
205	mov	ebx,DWORD [28+esp]
206	add	eax,ebp
207	xor	edi,eax
208	mov	DWORD [4+esp],eax
209	rol	edi,8
210	mov	eax,DWORD [8+esp]
211	add	esi,edi
212	mov	DWORD [48+esp],edi
213	xor	ebp,esi
214	add	eax,ebx
215	rol	ebp,7
216	xor	edx,eax
217	mov	DWORD [44+esp],esi
218	rol	edx,16
219	mov	DWORD [24+esp],ebp
220	add	ecx,edx
221	mov	esi,DWORD [36+esp]
222	xor	ebx,ecx
223	mov	edi,DWORD [56+esp]
224	rol	ebx,12
225	mov	ebp,DWORD [16+esp]
226	add	eax,ebx
227	xor	edx,eax
228	mov	DWORD [8+esp],eax
229	rol	edx,8
230	mov	eax,DWORD [12+esp]
231	add	ecx,edx
232	mov	DWORD [52+esp],edx
233	xor	ebx,ecx
234	add	eax,ebp
235	rol	ebx,7
236	xor	edi,eax
237	rol	edi,16
238	mov	DWORD [28+esp],ebx
239	add	esi,edi
240	xor	ebp,esi
241	mov	edx,DWORD [48+esp]
242	rol	ebp,12
243	mov	ebx,DWORD [128+esp]
244	add	eax,ebp
245	xor	edi,eax
246	mov	DWORD [12+esp],eax
247	rol	edi,8
248	mov	eax,DWORD [esp]
249	add	esi,edi
250	mov	DWORD [56+esp],edi
251	xor	ebp,esi
252	rol	ebp,7
253	dec	ebx
254	jnz	NEAR L$004loop
255	mov	ebx,DWORD [160+esp]
256	add	eax,1634760805
257	add	ebp,DWORD [80+esp]
258	add	ecx,DWORD [96+esp]
259	add	esi,DWORD [100+esp]
260	cmp	ebx,64
261	jb	NEAR L$005tail
262	mov	ebx,DWORD [156+esp]
263	add	edx,DWORD [112+esp]
264	add	edi,DWORD [120+esp]
265	xor	eax,DWORD [ebx]
266	xor	ebp,DWORD [16+ebx]
267	mov	DWORD [esp],eax
268	mov	eax,DWORD [152+esp]
269	xor	ecx,DWORD [32+ebx]
270	xor	esi,DWORD [36+ebx]
271	xor	edx,DWORD [48+ebx]
272	xor	edi,DWORD [56+ebx]
273	mov	DWORD [16+eax],ebp
274	mov	DWORD [32+eax],ecx
275	mov	DWORD [36+eax],esi
276	mov	DWORD [48+eax],edx
277	mov	DWORD [56+eax],edi
278	mov	ebp,DWORD [4+esp]
279	mov	ecx,DWORD [8+esp]
280	mov	esi,DWORD [12+esp]
281	mov	edx,DWORD [20+esp]
282	mov	edi,DWORD [24+esp]
283	add	ebp,857760878
284	add	ecx,2036477234
285	add	esi,1797285236
286	add	edx,DWORD [84+esp]
287	add	edi,DWORD [88+esp]
288	xor	ebp,DWORD [4+ebx]
289	xor	ecx,DWORD [8+ebx]
290	xor	esi,DWORD [12+ebx]
291	xor	edx,DWORD [20+ebx]
292	xor	edi,DWORD [24+ebx]
293	mov	DWORD [4+eax],ebp
294	mov	DWORD [8+eax],ecx
295	mov	DWORD [12+eax],esi
296	mov	DWORD [20+eax],edx
297	mov	DWORD [24+eax],edi
298	mov	ebp,DWORD [28+esp]
299	mov	ecx,DWORD [40+esp]
300	mov	esi,DWORD [44+esp]
301	mov	edx,DWORD [52+esp]
302	mov	edi,DWORD [60+esp]
303	add	ebp,DWORD [92+esp]
304	add	ecx,DWORD [104+esp]
305	add	esi,DWORD [108+esp]
306	add	edx,DWORD [116+esp]
307	add	edi,DWORD [124+esp]
308	xor	ebp,DWORD [28+ebx]
309	xor	ecx,DWORD [40+ebx]
310	xor	esi,DWORD [44+ebx]
311	xor	edx,DWORD [52+ebx]
312	xor	edi,DWORD [60+ebx]
313	lea	ebx,[64+ebx]
314	mov	DWORD [28+eax],ebp
315	mov	ebp,DWORD [esp]
316	mov	DWORD [40+eax],ecx
317	mov	ecx,DWORD [160+esp]
318	mov	DWORD [44+eax],esi
319	mov	DWORD [52+eax],edx
320	mov	DWORD [60+eax],edi
321	mov	DWORD [eax],ebp
322	lea	eax,[64+eax]
323	sub	ecx,64
324	jnz	NEAR L$003outer_loop
325	jmp	NEAR L$006done
326L$005tail:
327	add	edx,DWORD [112+esp]
328	add	edi,DWORD [120+esp]
329	mov	DWORD [esp],eax
330	mov	DWORD [16+esp],ebp
331	mov	DWORD [32+esp],ecx
332	mov	DWORD [36+esp],esi
333	mov	DWORD [48+esp],edx
334	mov	DWORD [56+esp],edi
335	mov	ebp,DWORD [4+esp]
336	mov	ecx,DWORD [8+esp]
337	mov	esi,DWORD [12+esp]
338	mov	edx,DWORD [20+esp]
339	mov	edi,DWORD [24+esp]
340	add	ebp,857760878
341	add	ecx,2036477234
342	add	esi,1797285236
343	add	edx,DWORD [84+esp]
344	add	edi,DWORD [88+esp]
345	mov	DWORD [4+esp],ebp
346	mov	DWORD [8+esp],ecx
347	mov	DWORD [12+esp],esi
348	mov	DWORD [20+esp],edx
349	mov	DWORD [24+esp],edi
350	mov	ebp,DWORD [28+esp]
351	mov	ecx,DWORD [40+esp]
352	mov	esi,DWORD [44+esp]
353	mov	edx,DWORD [52+esp]
354	mov	edi,DWORD [60+esp]
355	add	ebp,DWORD [92+esp]
356	add	ecx,DWORD [104+esp]
357	add	esi,DWORD [108+esp]
358	add	edx,DWORD [116+esp]
359	add	edi,DWORD [124+esp]
360	mov	DWORD [28+esp],ebp
361	mov	ebp,DWORD [156+esp]
362	mov	DWORD [40+esp],ecx
363	mov	ecx,DWORD [152+esp]
364	mov	DWORD [44+esp],esi
365	xor	esi,esi
366	mov	DWORD [52+esp],edx
367	mov	DWORD [60+esp],edi
368	xor	eax,eax
369	xor	edx,edx
370L$007tail_loop:
371	mov	al,BYTE [ebp*1+esi]
372	mov	dl,BYTE [esi*1+esp]
373	lea	esi,[1+esi]
374	xor	al,dl
375	mov	BYTE [esi*1+ecx-1],al
376	dec	ebx
377	jnz	NEAR L$007tail_loop
378L$006done:
379	add	esp,132
380L$000no_data:
381	pop	edi
382	pop	esi
383	pop	ebx
384	pop	ebp
385	ret
386global	_ChaCha20_ssse3
387align	16
388_ChaCha20_ssse3:
389L$_ChaCha20_ssse3_begin:
390	push	ebp
391	push	ebx
392	push	esi
393	push	edi
394L$ssse3_shortcut:
395	mov	edi,DWORD [20+esp]
396	mov	esi,DWORD [24+esp]
397	mov	ecx,DWORD [28+esp]
398	mov	edx,DWORD [32+esp]
399	mov	ebx,DWORD [36+esp]
400	mov	ebp,esp
401	sub	esp,524
402	and	esp,-64
403	mov	DWORD [512+esp],ebp
404	lea	eax,[(L$ssse3_data-L$pic_point)+eax]
405	movdqu	xmm3,[ebx]
406	cmp	ecx,256
407	jb	NEAR L$0081x
408	mov	DWORD [516+esp],edx
409	mov	DWORD [520+esp],ebx
410	sub	ecx,256
411	lea	ebp,[384+esp]
412	movdqu	xmm7,[edx]
413	pshufd	xmm0,xmm3,0
414	pshufd	xmm1,xmm3,85
415	pshufd	xmm2,xmm3,170
416	pshufd	xmm3,xmm3,255
417	paddd	xmm0,[48+eax]
418	pshufd	xmm4,xmm7,0
419	pshufd	xmm5,xmm7,85
420	psubd	xmm0,[64+eax]
421	pshufd	xmm6,xmm7,170
422	pshufd	xmm7,xmm7,255
423	movdqa	[64+ebp],xmm0
424	movdqa	[80+ebp],xmm1
425	movdqa	[96+ebp],xmm2
426	movdqa	[112+ebp],xmm3
427	movdqu	xmm3,[16+edx]
428	movdqa	[ebp-64],xmm4
429	movdqa	[ebp-48],xmm5
430	movdqa	[ebp-32],xmm6
431	movdqa	[ebp-16],xmm7
432	movdqa	xmm7,[32+eax]
433	lea	ebx,[128+esp]
434	pshufd	xmm0,xmm3,0
435	pshufd	xmm1,xmm3,85
436	pshufd	xmm2,xmm3,170
437	pshufd	xmm3,xmm3,255
438	pshufd	xmm4,xmm7,0
439	pshufd	xmm5,xmm7,85
440	pshufd	xmm6,xmm7,170
441	pshufd	xmm7,xmm7,255
442	movdqa	[ebp],xmm0
443	movdqa	[16+ebp],xmm1
444	movdqa	[32+ebp],xmm2
445	movdqa	[48+ebp],xmm3
446	movdqa	[ebp-128],xmm4
447	movdqa	[ebp-112],xmm5
448	movdqa	[ebp-96],xmm6
449	movdqa	[ebp-80],xmm7
450	lea	esi,[128+esi]
451	lea	edi,[128+edi]
452	jmp	NEAR L$009outer_loop
453align	16
454L$009outer_loop:
455	movdqa	xmm1,[ebp-112]
456	movdqa	xmm2,[ebp-96]
457	movdqa	xmm3,[ebp-80]
458	movdqa	xmm5,[ebp-48]
459	movdqa	xmm6,[ebp-32]
460	movdqa	xmm7,[ebp-16]
461	movdqa	[ebx-112],xmm1
462	movdqa	[ebx-96],xmm2
463	movdqa	[ebx-80],xmm3
464	movdqa	[ebx-48],xmm5
465	movdqa	[ebx-32],xmm6
466	movdqa	[ebx-16],xmm7
467	movdqa	xmm2,[32+ebp]
468	movdqa	xmm3,[48+ebp]
469	movdqa	xmm4,[64+ebp]
470	movdqa	xmm5,[80+ebp]
471	movdqa	xmm6,[96+ebp]
472	movdqa	xmm7,[112+ebp]
473	paddd	xmm4,[64+eax]
474	movdqa	[32+ebx],xmm2
475	movdqa	[48+ebx],xmm3
476	movdqa	[64+ebx],xmm4
477	movdqa	[80+ebx],xmm5
478	movdqa	[96+ebx],xmm6
479	movdqa	[112+ebx],xmm7
480	movdqa	[64+ebp],xmm4
481	movdqa	xmm0,[ebp-128]
482	movdqa	xmm6,xmm4
483	movdqa	xmm3,[ebp-64]
484	movdqa	xmm4,[ebp]
485	movdqa	xmm5,[16+ebp]
486	mov	edx,10
487	nop
488align	16
489L$010loop:
490	paddd	xmm0,xmm3
491	movdqa	xmm2,xmm3
492	pxor	xmm6,xmm0
493	pshufb	xmm6,[eax]
494	paddd	xmm4,xmm6
495	pxor	xmm2,xmm4
496	movdqa	xmm3,[ebx-48]
497	movdqa	xmm1,xmm2
498	pslld	xmm2,12
499	psrld	xmm1,20
500	por	xmm2,xmm1
501	movdqa	xmm1,[ebx-112]
502	paddd	xmm0,xmm2
503	movdqa	xmm7,[80+ebx]
504	pxor	xmm6,xmm0
505	movdqa	[ebx-128],xmm0
506	pshufb	xmm6,[16+eax]
507	paddd	xmm4,xmm6
508	movdqa	[64+ebx],xmm6
509	pxor	xmm2,xmm4
510	paddd	xmm1,xmm3
511	movdqa	xmm0,xmm2
512	pslld	xmm2,7
513	psrld	xmm0,25
514	pxor	xmm7,xmm1
515	por	xmm2,xmm0
516	movdqa	[ebx],xmm4
517	pshufb	xmm7,[eax]
518	movdqa	[ebx-64],xmm2
519	paddd	xmm5,xmm7
520	movdqa	xmm4,[32+ebx]
521	pxor	xmm3,xmm5
522	movdqa	xmm2,[ebx-32]
523	movdqa	xmm0,xmm3
524	pslld	xmm3,12
525	psrld	xmm0,20
526	por	xmm3,xmm0
527	movdqa	xmm0,[ebx-96]
528	paddd	xmm1,xmm3
529	movdqa	xmm6,[96+ebx]
530	pxor	xmm7,xmm1
531	movdqa	[ebx-112],xmm1
532	pshufb	xmm7,[16+eax]
533	paddd	xmm5,xmm7
534	movdqa	[80+ebx],xmm7
535	pxor	xmm3,xmm5
536	paddd	xmm0,xmm2
537	movdqa	xmm1,xmm3
538	pslld	xmm3,7
539	psrld	xmm1,25
540	pxor	xmm6,xmm0
541	por	xmm3,xmm1
542	movdqa	[16+ebx],xmm5
543	pshufb	xmm6,[eax]
544	movdqa	[ebx-48],xmm3
545	paddd	xmm4,xmm6
546	movdqa	xmm5,[48+ebx]
547	pxor	xmm2,xmm4
548	movdqa	xmm3,[ebx-16]
549	movdqa	xmm1,xmm2
550	pslld	xmm2,12
551	psrld	xmm1,20
552	por	xmm2,xmm1
553	movdqa	xmm1,[ebx-80]
554	paddd	xmm0,xmm2
555	movdqa	xmm7,[112+ebx]
556	pxor	xmm6,xmm0
557	movdqa	[ebx-96],xmm0
558	pshufb	xmm6,[16+eax]
559	paddd	xmm4,xmm6
560	movdqa	[96+ebx],xmm6
561	pxor	xmm2,xmm4
562	paddd	xmm1,xmm3
563	movdqa	xmm0,xmm2
564	pslld	xmm2,7
565	psrld	xmm0,25
566	pxor	xmm7,xmm1
567	por	xmm2,xmm0
568	pshufb	xmm7,[eax]
569	movdqa	[ebx-32],xmm2
570	paddd	xmm5,xmm7
571	pxor	xmm3,xmm5
572	movdqa	xmm2,[ebx-48]
573	movdqa	xmm0,xmm3
574	pslld	xmm3,12
575	psrld	xmm0,20
576	por	xmm3,xmm0
577	movdqa	xmm0,[ebx-128]
578	paddd	xmm1,xmm3
579	pxor	xmm7,xmm1
580	movdqa	[ebx-80],xmm1
581	pshufb	xmm7,[16+eax]
582	paddd	xmm5,xmm7
583	movdqa	xmm6,xmm7
584	pxor	xmm3,xmm5
585	paddd	xmm0,xmm2
586	movdqa	xmm1,xmm3
587	pslld	xmm3,7
588	psrld	xmm1,25
589	pxor	xmm6,xmm0
590	por	xmm3,xmm1
591	pshufb	xmm6,[eax]
592	movdqa	[ebx-16],xmm3
593	paddd	xmm4,xmm6
594	pxor	xmm2,xmm4
595	movdqa	xmm3,[ebx-32]
596	movdqa	xmm1,xmm2
597	pslld	xmm2,12
598	psrld	xmm1,20
599	por	xmm2,xmm1
600	movdqa	xmm1,[ebx-112]
601	paddd	xmm0,xmm2
602	movdqa	xmm7,[64+ebx]
603	pxor	xmm6,xmm0
604	movdqa	[ebx-128],xmm0
605	pshufb	xmm6,[16+eax]
606	paddd	xmm4,xmm6
607	movdqa	[112+ebx],xmm6
608	pxor	xmm2,xmm4
609	paddd	xmm1,xmm3
610	movdqa	xmm0,xmm2
611	pslld	xmm2,7
612	psrld	xmm0,25
613	pxor	xmm7,xmm1
614	por	xmm2,xmm0
615	movdqa	[32+ebx],xmm4
616	pshufb	xmm7,[eax]
617	movdqa	[ebx-48],xmm2
618	paddd	xmm5,xmm7
619	movdqa	xmm4,[ebx]
620	pxor	xmm3,xmm5
621	movdqa	xmm2,[ebx-16]
622	movdqa	xmm0,xmm3
623	pslld	xmm3,12
624	psrld	xmm0,20
625	por	xmm3,xmm0
626	movdqa	xmm0,[ebx-96]
627	paddd	xmm1,xmm3
628	movdqa	xmm6,[80+ebx]
629	pxor	xmm7,xmm1
630	movdqa	[ebx-112],xmm1
631	pshufb	xmm7,[16+eax]
632	paddd	xmm5,xmm7
633	movdqa	[64+ebx],xmm7
634	pxor	xmm3,xmm5
635	paddd	xmm0,xmm2
636	movdqa	xmm1,xmm3
637	pslld	xmm3,7
638	psrld	xmm1,25
639	pxor	xmm6,xmm0
640	por	xmm3,xmm1
641	movdqa	[48+ebx],xmm5
642	pshufb	xmm6,[eax]
643	movdqa	[ebx-32],xmm3
644	paddd	xmm4,xmm6
645	movdqa	xmm5,[16+ebx]
646	pxor	xmm2,xmm4
647	movdqa	xmm3,[ebx-64]
648	movdqa	xmm1,xmm2
649	pslld	xmm2,12
650	psrld	xmm1,20
651	por	xmm2,xmm1
652	movdqa	xmm1,[ebx-80]
653	paddd	xmm0,xmm2
654	movdqa	xmm7,[96+ebx]
655	pxor	xmm6,xmm0
656	movdqa	[ebx-96],xmm0
657	pshufb	xmm6,[16+eax]
658	paddd	xmm4,xmm6
659	movdqa	[80+ebx],xmm6
660	pxor	xmm2,xmm4
661	paddd	xmm1,xmm3
662	movdqa	xmm0,xmm2
663	pslld	xmm2,7
664	psrld	xmm0,25
665	pxor	xmm7,xmm1
666	por	xmm2,xmm0
667	pshufb	xmm7,[eax]
668	movdqa	[ebx-16],xmm2
669	paddd	xmm5,xmm7
670	pxor	xmm3,xmm5
671	movdqa	xmm0,xmm3
672	pslld	xmm3,12
673	psrld	xmm0,20
674	por	xmm3,xmm0
675	movdqa	xmm0,[ebx-128]
676	paddd	xmm1,xmm3
677	movdqa	xmm6,[64+ebx]
678	pxor	xmm7,xmm1
679	movdqa	[ebx-80],xmm1
680	pshufb	xmm7,[16+eax]
681	paddd	xmm5,xmm7
682	movdqa	[96+ebx],xmm7
683	pxor	xmm3,xmm5
684	movdqa	xmm1,xmm3
685	pslld	xmm3,7
686	psrld	xmm1,25
687	por	xmm3,xmm1
688	dec	edx
689	jnz	NEAR L$010loop
690	movdqa	[ebx-64],xmm3
691	movdqa	[ebx],xmm4
692	movdqa	[16+ebx],xmm5
693	movdqa	[64+ebx],xmm6
694	movdqa	[96+ebx],xmm7
695	movdqa	xmm1,[ebx-112]
696	movdqa	xmm2,[ebx-96]
697	movdqa	xmm3,[ebx-80]
698	paddd	xmm0,[ebp-128]
699	paddd	xmm1,[ebp-112]
700	paddd	xmm2,[ebp-96]
701	paddd	xmm3,[ebp-80]
702	movdqa	xmm6,xmm0
703	punpckldq	xmm0,xmm1
704	movdqa	xmm7,xmm2
705	punpckldq	xmm2,xmm3
706	punpckhdq	xmm6,xmm1
707	punpckhdq	xmm7,xmm3
708	movdqa	xmm1,xmm0
709	punpcklqdq	xmm0,xmm2
710	movdqa	xmm3,xmm6
711	punpcklqdq	xmm6,xmm7
712	punpckhqdq	xmm1,xmm2
713	punpckhqdq	xmm3,xmm7
714	movdqu	xmm4,[esi-128]
715	movdqu	xmm5,[esi-64]
716	movdqu	xmm2,[esi]
717	movdqu	xmm7,[64+esi]
718	lea	esi,[16+esi]
719	pxor	xmm4,xmm0
720	movdqa	xmm0,[ebx-64]
721	pxor	xmm5,xmm1
722	movdqa	xmm1,[ebx-48]
723	pxor	xmm6,xmm2
724	movdqa	xmm2,[ebx-32]
725	pxor	xmm7,xmm3
726	movdqa	xmm3,[ebx-16]
727	movdqu	[edi-128],xmm4
728	movdqu	[edi-64],xmm5
729	movdqu	[edi],xmm6
730	movdqu	[64+edi],xmm7
731	lea	edi,[16+edi]
732	paddd	xmm0,[ebp-64]
733	paddd	xmm1,[ebp-48]
734	paddd	xmm2,[ebp-32]
735	paddd	xmm3,[ebp-16]
736	movdqa	xmm6,xmm0
737	punpckldq	xmm0,xmm1
738	movdqa	xmm7,xmm2
739	punpckldq	xmm2,xmm3
740	punpckhdq	xmm6,xmm1
741	punpckhdq	xmm7,xmm3
742	movdqa	xmm1,xmm0
743	punpcklqdq	xmm0,xmm2
744	movdqa	xmm3,xmm6
745	punpcklqdq	xmm6,xmm7
746	punpckhqdq	xmm1,xmm2
747	punpckhqdq	xmm3,xmm7
748	movdqu	xmm4,[esi-128]
749	movdqu	xmm5,[esi-64]
750	movdqu	xmm2,[esi]
751	movdqu	xmm7,[64+esi]
752	lea	esi,[16+esi]
753	pxor	xmm4,xmm0
754	movdqa	xmm0,[ebx]
755	pxor	xmm5,xmm1
756	movdqa	xmm1,[16+ebx]
757	pxor	xmm6,xmm2
758	movdqa	xmm2,[32+ebx]
759	pxor	xmm7,xmm3
760	movdqa	xmm3,[48+ebx]
761	movdqu	[edi-128],xmm4
762	movdqu	[edi-64],xmm5
763	movdqu	[edi],xmm6
764	movdqu	[64+edi],xmm7
765	lea	edi,[16+edi]
766	paddd	xmm0,[ebp]
767	paddd	xmm1,[16+ebp]
768	paddd	xmm2,[32+ebp]
769	paddd	xmm3,[48+ebp]
770	movdqa	xmm6,xmm0
771	punpckldq	xmm0,xmm1
772	movdqa	xmm7,xmm2
773	punpckldq	xmm2,xmm3
774	punpckhdq	xmm6,xmm1
775	punpckhdq	xmm7,xmm3
776	movdqa	xmm1,xmm0
777	punpcklqdq	xmm0,xmm2
778	movdqa	xmm3,xmm6
779	punpcklqdq	xmm6,xmm7
780	punpckhqdq	xmm1,xmm2
781	punpckhqdq	xmm3,xmm7
782	movdqu	xmm4,[esi-128]
783	movdqu	xmm5,[esi-64]
784	movdqu	xmm2,[esi]
785	movdqu	xmm7,[64+esi]
786	lea	esi,[16+esi]
787	pxor	xmm4,xmm0
788	movdqa	xmm0,[64+ebx]
789	pxor	xmm5,xmm1
790	movdqa	xmm1,[80+ebx]
791	pxor	xmm6,xmm2
792	movdqa	xmm2,[96+ebx]
793	pxor	xmm7,xmm3
794	movdqa	xmm3,[112+ebx]
795	movdqu	[edi-128],xmm4
796	movdqu	[edi-64],xmm5
797	movdqu	[edi],xmm6
798	movdqu	[64+edi],xmm7
799	lea	edi,[16+edi]
800	paddd	xmm0,[64+ebp]
801	paddd	xmm1,[80+ebp]
802	paddd	xmm2,[96+ebp]
803	paddd	xmm3,[112+ebp]
804	movdqa	xmm6,xmm0
805	punpckldq	xmm0,xmm1
806	movdqa	xmm7,xmm2
807	punpckldq	xmm2,xmm3
808	punpckhdq	xmm6,xmm1
809	punpckhdq	xmm7,xmm3
810	movdqa	xmm1,xmm0
811	punpcklqdq	xmm0,xmm2
812	movdqa	xmm3,xmm6
813	punpcklqdq	xmm6,xmm7
814	punpckhqdq	xmm1,xmm2
815	punpckhqdq	xmm3,xmm7
816	movdqu	xmm4,[esi-128]
817	movdqu	xmm5,[esi-64]
818	movdqu	xmm2,[esi]
819	movdqu	xmm7,[64+esi]
820	lea	esi,[208+esi]
821	pxor	xmm4,xmm0
822	pxor	xmm5,xmm1
823	pxor	xmm6,xmm2
824	pxor	xmm7,xmm3
825	movdqu	[edi-128],xmm4
826	movdqu	[edi-64],xmm5
827	movdqu	[edi],xmm6
828	movdqu	[64+edi],xmm7
829	lea	edi,[208+edi]
830	sub	ecx,256
831	jnc	NEAR L$009outer_loop
832	add	ecx,256
833	jz	NEAR L$011done
834	mov	ebx,DWORD [520+esp]
835	lea	esi,[esi-128]
836	mov	edx,DWORD [516+esp]
837	lea	edi,[edi-128]
838	movd	xmm2,DWORD [64+ebp]
839	movdqu	xmm3,[ebx]
840	paddd	xmm2,[96+eax]
841	pand	xmm3,[112+eax]
842	por	xmm3,xmm2
843L$0081x:
844	movdqa	xmm0,[32+eax]
845	movdqu	xmm1,[edx]
846	movdqu	xmm2,[16+edx]
847	movdqa	xmm6,[eax]
848	movdqa	xmm7,[16+eax]
849	mov	DWORD [48+esp],ebp
850	movdqa	[esp],xmm0
851	movdqa	[16+esp],xmm1
852	movdqa	[32+esp],xmm2
853	movdqa	[48+esp],xmm3
854	mov	edx,10
855	jmp	NEAR L$012loop1x
856align	16
857L$013outer1x:
858	movdqa	xmm3,[80+eax]
859	movdqa	xmm0,[esp]
860	movdqa	xmm1,[16+esp]
861	movdqa	xmm2,[32+esp]
862	paddd	xmm3,[48+esp]
863	mov	edx,10
864	movdqa	[48+esp],xmm3
865	jmp	NEAR L$012loop1x
866align	16
867L$012loop1x:
868	paddd	xmm0,xmm1
869	pxor	xmm3,xmm0
870db	102,15,56,0,222
871	paddd	xmm2,xmm3
872	pxor	xmm1,xmm2
873	movdqa	xmm4,xmm1
874	psrld	xmm1,20
875	pslld	xmm4,12
876	por	xmm1,xmm4
877	paddd	xmm0,xmm1
878	pxor	xmm3,xmm0
879db	102,15,56,0,223
880	paddd	xmm2,xmm3
881	pxor	xmm1,xmm2
882	movdqa	xmm4,xmm1
883	psrld	xmm1,25
884	pslld	xmm4,7
885	por	xmm1,xmm4
886	pshufd	xmm2,xmm2,78
887	pshufd	xmm1,xmm1,57
888	pshufd	xmm3,xmm3,147
889	nop
890	paddd	xmm0,xmm1
891	pxor	xmm3,xmm0
892db	102,15,56,0,222
893	paddd	xmm2,xmm3
894	pxor	xmm1,xmm2
895	movdqa	xmm4,xmm1
896	psrld	xmm1,20
897	pslld	xmm4,12
898	por	xmm1,xmm4
899	paddd	xmm0,xmm1
900	pxor	xmm3,xmm0
901db	102,15,56,0,223
902	paddd	xmm2,xmm3
903	pxor	xmm1,xmm2
904	movdqa	xmm4,xmm1
905	psrld	xmm1,25
906	pslld	xmm4,7
907	por	xmm1,xmm4
908	pshufd	xmm2,xmm2,78
909	pshufd	xmm1,xmm1,147
910	pshufd	xmm3,xmm3,57
911	dec	edx
912	jnz	NEAR L$012loop1x
913	paddd	xmm0,[esp]
914	paddd	xmm1,[16+esp]
915	paddd	xmm2,[32+esp]
916	paddd	xmm3,[48+esp]
917	cmp	ecx,64
918	jb	NEAR L$014tail
919	movdqu	xmm4,[esi]
920	movdqu	xmm5,[16+esi]
921	pxor	xmm0,xmm4
922	movdqu	xmm4,[32+esi]
923	pxor	xmm1,xmm5
924	movdqu	xmm5,[48+esi]
925	pxor	xmm2,xmm4
926	pxor	xmm3,xmm5
927	lea	esi,[64+esi]
928	movdqu	[edi],xmm0
929	movdqu	[16+edi],xmm1
930	movdqu	[32+edi],xmm2
931	movdqu	[48+edi],xmm3
932	lea	edi,[64+edi]
933	sub	ecx,64
934	jnz	NEAR L$013outer1x
935	jmp	NEAR L$011done
936L$014tail:
937	movdqa	[esp],xmm0
938	movdqa	[16+esp],xmm1
939	movdqa	[32+esp],xmm2
940	movdqa	[48+esp],xmm3
941	xor	eax,eax
942	xor	edx,edx
943	xor	ebp,ebp
944L$015tail_loop:
945	mov	al,BYTE [ebp*1+esp]
946	mov	dl,BYTE [ebp*1+esi]
947	lea	ebp,[1+ebp]
948	xor	al,dl
949	mov	BYTE [ebp*1+edi-1],al
950	dec	ecx
951	jnz	NEAR L$015tail_loop
952L$011done:
953	mov	esp,DWORD [512+esp]
954	pop	edi
955	pop	esi
956	pop	ebx
957	pop	ebp
958	ret
959align	64
960L$ssse3_data:
961db	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
962db	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
963dd	1634760805,857760878,2036477234,1797285236
964dd	0,1,2,3
965dd	4,4,4,4
966dd	1,0,0,0
967dd	4,0,0,0
968dd	0,-1,-1,-1
969align	64
970db	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
971db	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
972db	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
973db	114,103,62,0
974segment	.bss
975common	_OPENSSL_ia32cap_P 16
976