xref: /aosp_15_r20/external/cronet/third_party/boringssl/src/gen/crypto/chacha-x86-apple.S (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
7.text
8.globl	_ChaCha20_ctr32_nohw
9.private_extern	_ChaCha20_ctr32_nohw
10.align	4
11_ChaCha20_ctr32_nohw:
12L_ChaCha20_ctr32_nohw_begin:
13	pushl	%ebp
14	pushl	%ebx
15	pushl	%esi
16	pushl	%edi
17	movl	32(%esp),%esi
18	movl	36(%esp),%edi
19	subl	$132,%esp
20	movl	(%esi),%eax
21	movl	4(%esi),%ebx
22	movl	8(%esi),%ecx
23	movl	12(%esi),%edx
24	movl	%eax,80(%esp)
25	movl	%ebx,84(%esp)
26	movl	%ecx,88(%esp)
27	movl	%edx,92(%esp)
28	movl	16(%esi),%eax
29	movl	20(%esi),%ebx
30	movl	24(%esi),%ecx
31	movl	28(%esi),%edx
32	movl	%eax,96(%esp)
33	movl	%ebx,100(%esp)
34	movl	%ecx,104(%esp)
35	movl	%edx,108(%esp)
36	movl	(%edi),%eax
37	movl	4(%edi),%ebx
38	movl	8(%edi),%ecx
39	movl	12(%edi),%edx
40	subl	$1,%eax
41	movl	%eax,112(%esp)
42	movl	%ebx,116(%esp)
43	movl	%ecx,120(%esp)
44	movl	%edx,124(%esp)
45	jmp	L000entry
46.align	4,0x90
47L001outer_loop:
48	movl	%ebx,156(%esp)
49	movl	%eax,152(%esp)
50	movl	%ecx,160(%esp)
51L000entry:
52	movl	$1634760805,%eax
53	movl	$857760878,4(%esp)
54	movl	$2036477234,8(%esp)
55	movl	$1797285236,12(%esp)
56	movl	84(%esp),%ebx
57	movl	88(%esp),%ebp
58	movl	104(%esp),%ecx
59	movl	108(%esp),%esi
60	movl	116(%esp),%edx
61	movl	120(%esp),%edi
62	movl	%ebx,20(%esp)
63	movl	%ebp,24(%esp)
64	movl	%ecx,40(%esp)
65	movl	%esi,44(%esp)
66	movl	%edx,52(%esp)
67	movl	%edi,56(%esp)
68	movl	92(%esp),%ebx
69	movl	124(%esp),%edi
70	movl	112(%esp),%edx
71	movl	80(%esp),%ebp
72	movl	96(%esp),%ecx
73	movl	100(%esp),%esi
74	addl	$1,%edx
75	movl	%ebx,28(%esp)
76	movl	%edi,60(%esp)
77	movl	%edx,112(%esp)
78	movl	$10,%ebx
79	jmp	L002loop
80.align	4,0x90
81L002loop:
82	addl	%ebp,%eax
83	movl	%ebx,128(%esp)
84	movl	%ebp,%ebx
85	xorl	%eax,%edx
86	roll	$16,%edx
87	addl	%edx,%ecx
88	xorl	%ecx,%ebx
89	movl	52(%esp),%edi
90	roll	$12,%ebx
91	movl	20(%esp),%ebp
92	addl	%ebx,%eax
93	xorl	%eax,%edx
94	movl	%eax,(%esp)
95	roll	$8,%edx
96	movl	4(%esp),%eax
97	addl	%edx,%ecx
98	movl	%edx,48(%esp)
99	xorl	%ecx,%ebx
100	addl	%ebp,%eax
101	roll	$7,%ebx
102	xorl	%eax,%edi
103	movl	%ecx,32(%esp)
104	roll	$16,%edi
105	movl	%ebx,16(%esp)
106	addl	%edi,%esi
107	movl	40(%esp),%ecx
108	xorl	%esi,%ebp
109	movl	56(%esp),%edx
110	roll	$12,%ebp
111	movl	24(%esp),%ebx
112	addl	%ebp,%eax
113	xorl	%eax,%edi
114	movl	%eax,4(%esp)
115	roll	$8,%edi
116	movl	8(%esp),%eax
117	addl	%edi,%esi
118	movl	%edi,52(%esp)
119	xorl	%esi,%ebp
120	addl	%ebx,%eax
121	roll	$7,%ebp
122	xorl	%eax,%edx
123	movl	%esi,36(%esp)
124	roll	$16,%edx
125	movl	%ebp,20(%esp)
126	addl	%edx,%ecx
127	movl	44(%esp),%esi
128	xorl	%ecx,%ebx
129	movl	60(%esp),%edi
130	roll	$12,%ebx
131	movl	28(%esp),%ebp
132	addl	%ebx,%eax
133	xorl	%eax,%edx
134	movl	%eax,8(%esp)
135	roll	$8,%edx
136	movl	12(%esp),%eax
137	addl	%edx,%ecx
138	movl	%edx,56(%esp)
139	xorl	%ecx,%ebx
140	addl	%ebp,%eax
141	roll	$7,%ebx
142	xorl	%eax,%edi
143	roll	$16,%edi
144	movl	%ebx,24(%esp)
145	addl	%edi,%esi
146	xorl	%esi,%ebp
147	roll	$12,%ebp
148	movl	20(%esp),%ebx
149	addl	%ebp,%eax
150	xorl	%eax,%edi
151	movl	%eax,12(%esp)
152	roll	$8,%edi
153	movl	(%esp),%eax
154	addl	%edi,%esi
155	movl	%edi,%edx
156	xorl	%esi,%ebp
157	addl	%ebx,%eax
158	roll	$7,%ebp
159	xorl	%eax,%edx
160	roll	$16,%edx
161	movl	%ebp,28(%esp)
162	addl	%edx,%ecx
163	xorl	%ecx,%ebx
164	movl	48(%esp),%edi
165	roll	$12,%ebx
166	movl	24(%esp),%ebp
167	addl	%ebx,%eax
168	xorl	%eax,%edx
169	movl	%eax,(%esp)
170	roll	$8,%edx
171	movl	4(%esp),%eax
172	addl	%edx,%ecx
173	movl	%edx,60(%esp)
174	xorl	%ecx,%ebx
175	addl	%ebp,%eax
176	roll	$7,%ebx
177	xorl	%eax,%edi
178	movl	%ecx,40(%esp)
179	roll	$16,%edi
180	movl	%ebx,20(%esp)
181	addl	%edi,%esi
182	movl	32(%esp),%ecx
183	xorl	%esi,%ebp
184	movl	52(%esp),%edx
185	roll	$12,%ebp
186	movl	28(%esp),%ebx
187	addl	%ebp,%eax
188	xorl	%eax,%edi
189	movl	%eax,4(%esp)
190	roll	$8,%edi
191	movl	8(%esp),%eax
192	addl	%edi,%esi
193	movl	%edi,48(%esp)
194	xorl	%esi,%ebp
195	addl	%ebx,%eax
196	roll	$7,%ebp
197	xorl	%eax,%edx
198	movl	%esi,44(%esp)
199	roll	$16,%edx
200	movl	%ebp,24(%esp)
201	addl	%edx,%ecx
202	movl	36(%esp),%esi
203	xorl	%ecx,%ebx
204	movl	56(%esp),%edi
205	roll	$12,%ebx
206	movl	16(%esp),%ebp
207	addl	%ebx,%eax
208	xorl	%eax,%edx
209	movl	%eax,8(%esp)
210	roll	$8,%edx
211	movl	12(%esp),%eax
212	addl	%edx,%ecx
213	movl	%edx,52(%esp)
214	xorl	%ecx,%ebx
215	addl	%ebp,%eax
216	roll	$7,%ebx
217	xorl	%eax,%edi
218	roll	$16,%edi
219	movl	%ebx,28(%esp)
220	addl	%edi,%esi
221	xorl	%esi,%ebp
222	movl	48(%esp),%edx
223	roll	$12,%ebp
224	movl	128(%esp),%ebx
225	addl	%ebp,%eax
226	xorl	%eax,%edi
227	movl	%eax,12(%esp)
228	roll	$8,%edi
229	movl	(%esp),%eax
230	addl	%edi,%esi
231	movl	%edi,56(%esp)
232	xorl	%esi,%ebp
233	roll	$7,%ebp
234	decl	%ebx
235	jnz	L002loop
236	movl	160(%esp),%ebx
237	addl	$1634760805,%eax
238	addl	80(%esp),%ebp
239	addl	96(%esp),%ecx
240	addl	100(%esp),%esi
241	cmpl	$64,%ebx
242	jb	L003tail
243	movl	156(%esp),%ebx
244	addl	112(%esp),%edx
245	addl	120(%esp),%edi
246	xorl	(%ebx),%eax
247	xorl	16(%ebx),%ebp
248	movl	%eax,(%esp)
249	movl	152(%esp),%eax
250	xorl	32(%ebx),%ecx
251	xorl	36(%ebx),%esi
252	xorl	48(%ebx),%edx
253	xorl	56(%ebx),%edi
254	movl	%ebp,16(%eax)
255	movl	%ecx,32(%eax)
256	movl	%esi,36(%eax)
257	movl	%edx,48(%eax)
258	movl	%edi,56(%eax)
259	movl	4(%esp),%ebp
260	movl	8(%esp),%ecx
261	movl	12(%esp),%esi
262	movl	20(%esp),%edx
263	movl	24(%esp),%edi
264	addl	$857760878,%ebp
265	addl	$2036477234,%ecx
266	addl	$1797285236,%esi
267	addl	84(%esp),%edx
268	addl	88(%esp),%edi
269	xorl	4(%ebx),%ebp
270	xorl	8(%ebx),%ecx
271	xorl	12(%ebx),%esi
272	xorl	20(%ebx),%edx
273	xorl	24(%ebx),%edi
274	movl	%ebp,4(%eax)
275	movl	%ecx,8(%eax)
276	movl	%esi,12(%eax)
277	movl	%edx,20(%eax)
278	movl	%edi,24(%eax)
279	movl	28(%esp),%ebp
280	movl	40(%esp),%ecx
281	movl	44(%esp),%esi
282	movl	52(%esp),%edx
283	movl	60(%esp),%edi
284	addl	92(%esp),%ebp
285	addl	104(%esp),%ecx
286	addl	108(%esp),%esi
287	addl	116(%esp),%edx
288	addl	124(%esp),%edi
289	xorl	28(%ebx),%ebp
290	xorl	40(%ebx),%ecx
291	xorl	44(%ebx),%esi
292	xorl	52(%ebx),%edx
293	xorl	60(%ebx),%edi
294	leal	64(%ebx),%ebx
295	movl	%ebp,28(%eax)
296	movl	(%esp),%ebp
297	movl	%ecx,40(%eax)
298	movl	160(%esp),%ecx
299	movl	%esi,44(%eax)
300	movl	%edx,52(%eax)
301	movl	%edi,60(%eax)
302	movl	%ebp,(%eax)
303	leal	64(%eax),%eax
304	subl	$64,%ecx
305	jnz	L001outer_loop
306	jmp	L004done
307L003tail:
308	addl	112(%esp),%edx
309	addl	120(%esp),%edi
310	movl	%eax,(%esp)
311	movl	%ebp,16(%esp)
312	movl	%ecx,32(%esp)
313	movl	%esi,36(%esp)
314	movl	%edx,48(%esp)
315	movl	%edi,56(%esp)
316	movl	4(%esp),%ebp
317	movl	8(%esp),%ecx
318	movl	12(%esp),%esi
319	movl	20(%esp),%edx
320	movl	24(%esp),%edi
321	addl	$857760878,%ebp
322	addl	$2036477234,%ecx
323	addl	$1797285236,%esi
324	addl	84(%esp),%edx
325	addl	88(%esp),%edi
326	movl	%ebp,4(%esp)
327	movl	%ecx,8(%esp)
328	movl	%esi,12(%esp)
329	movl	%edx,20(%esp)
330	movl	%edi,24(%esp)
331	movl	28(%esp),%ebp
332	movl	40(%esp),%ecx
333	movl	44(%esp),%esi
334	movl	52(%esp),%edx
335	movl	60(%esp),%edi
336	addl	92(%esp),%ebp
337	addl	104(%esp),%ecx
338	addl	108(%esp),%esi
339	addl	116(%esp),%edx
340	addl	124(%esp),%edi
341	movl	%ebp,28(%esp)
342	movl	156(%esp),%ebp
343	movl	%ecx,40(%esp)
344	movl	152(%esp),%ecx
345	movl	%esi,44(%esp)
346	xorl	%esi,%esi
347	movl	%edx,52(%esp)
348	movl	%edi,60(%esp)
349	xorl	%eax,%eax
350	xorl	%edx,%edx
351L005tail_loop:
352	movb	(%esi,%ebp,1),%al
353	movb	(%esp,%esi,1),%dl
354	leal	1(%esi),%esi
355	xorb	%dl,%al
356	movb	%al,-1(%ecx,%esi,1)
357	decl	%ebx
358	jnz	L005tail_loop
359L004done:
360	addl	$132,%esp
361	popl	%edi
362	popl	%esi
363	popl	%ebx
364	popl	%ebp
365	ret
366.globl	_ChaCha20_ctr32_ssse3
367.private_extern	_ChaCha20_ctr32_ssse3
368.align	4
369_ChaCha20_ctr32_ssse3:
370L_ChaCha20_ctr32_ssse3_begin:
371	pushl	%ebp
372	pushl	%ebx
373	pushl	%esi
374	pushl	%edi
375	call	Lpic_point
376Lpic_point:
377	popl	%eax
378	movl	20(%esp),%edi
379	movl	24(%esp),%esi
380	movl	28(%esp),%ecx
381	movl	32(%esp),%edx
382	movl	36(%esp),%ebx
383	movl	%esp,%ebp
384	subl	$524,%esp
385	andl	$-64,%esp
386	movl	%ebp,512(%esp)
387	leal	Lssse3_data-Lpic_point(%eax),%eax
388	movdqu	(%ebx),%xmm3
389	cmpl	$256,%ecx
390	jb	L0061x
391	movl	%edx,516(%esp)
392	movl	%ebx,520(%esp)
393	subl	$256,%ecx
394	leal	384(%esp),%ebp
395	movdqu	(%edx),%xmm7
396	pshufd	$0,%xmm3,%xmm0
397	pshufd	$85,%xmm3,%xmm1
398	pshufd	$170,%xmm3,%xmm2
399	pshufd	$255,%xmm3,%xmm3
400	paddd	48(%eax),%xmm0
401	pshufd	$0,%xmm7,%xmm4
402	pshufd	$85,%xmm7,%xmm5
403	psubd	64(%eax),%xmm0
404	pshufd	$170,%xmm7,%xmm6
405	pshufd	$255,%xmm7,%xmm7
406	movdqa	%xmm0,64(%ebp)
407	movdqa	%xmm1,80(%ebp)
408	movdqa	%xmm2,96(%ebp)
409	movdqa	%xmm3,112(%ebp)
410	movdqu	16(%edx),%xmm3
411	movdqa	%xmm4,-64(%ebp)
412	movdqa	%xmm5,-48(%ebp)
413	movdqa	%xmm6,-32(%ebp)
414	movdqa	%xmm7,-16(%ebp)
415	movdqa	32(%eax),%xmm7
416	leal	128(%esp),%ebx
417	pshufd	$0,%xmm3,%xmm0
418	pshufd	$85,%xmm3,%xmm1
419	pshufd	$170,%xmm3,%xmm2
420	pshufd	$255,%xmm3,%xmm3
421	pshufd	$0,%xmm7,%xmm4
422	pshufd	$85,%xmm7,%xmm5
423	pshufd	$170,%xmm7,%xmm6
424	pshufd	$255,%xmm7,%xmm7
425	movdqa	%xmm0,(%ebp)
426	movdqa	%xmm1,16(%ebp)
427	movdqa	%xmm2,32(%ebp)
428	movdqa	%xmm3,48(%ebp)
429	movdqa	%xmm4,-128(%ebp)
430	movdqa	%xmm5,-112(%ebp)
431	movdqa	%xmm6,-96(%ebp)
432	movdqa	%xmm7,-80(%ebp)
433	leal	128(%esi),%esi
434	leal	128(%edi),%edi
435	jmp	L007outer_loop
436.align	4,0x90
437L007outer_loop:
438	movdqa	-112(%ebp),%xmm1
439	movdqa	-96(%ebp),%xmm2
440	movdqa	-80(%ebp),%xmm3
441	movdqa	-48(%ebp),%xmm5
442	movdqa	-32(%ebp),%xmm6
443	movdqa	-16(%ebp),%xmm7
444	movdqa	%xmm1,-112(%ebx)
445	movdqa	%xmm2,-96(%ebx)
446	movdqa	%xmm3,-80(%ebx)
447	movdqa	%xmm5,-48(%ebx)
448	movdqa	%xmm6,-32(%ebx)
449	movdqa	%xmm7,-16(%ebx)
450	movdqa	32(%ebp),%xmm2
451	movdqa	48(%ebp),%xmm3
452	movdqa	64(%ebp),%xmm4
453	movdqa	80(%ebp),%xmm5
454	movdqa	96(%ebp),%xmm6
455	movdqa	112(%ebp),%xmm7
456	paddd	64(%eax),%xmm4
457	movdqa	%xmm2,32(%ebx)
458	movdqa	%xmm3,48(%ebx)
459	movdqa	%xmm4,64(%ebx)
460	movdqa	%xmm5,80(%ebx)
461	movdqa	%xmm6,96(%ebx)
462	movdqa	%xmm7,112(%ebx)
463	movdqa	%xmm4,64(%ebp)
464	movdqa	-128(%ebp),%xmm0
465	movdqa	%xmm4,%xmm6
466	movdqa	-64(%ebp),%xmm3
467	movdqa	(%ebp),%xmm4
468	movdqa	16(%ebp),%xmm5
469	movl	$10,%edx
470	nop
471.align	4,0x90
472L008loop:
473	paddd	%xmm3,%xmm0
474	movdqa	%xmm3,%xmm2
475	pxor	%xmm0,%xmm6
476	pshufb	(%eax),%xmm6
477	paddd	%xmm6,%xmm4
478	pxor	%xmm4,%xmm2
479	movdqa	-48(%ebx),%xmm3
480	movdqa	%xmm2,%xmm1
481	pslld	$12,%xmm2
482	psrld	$20,%xmm1
483	por	%xmm1,%xmm2
484	movdqa	-112(%ebx),%xmm1
485	paddd	%xmm2,%xmm0
486	movdqa	80(%ebx),%xmm7
487	pxor	%xmm0,%xmm6
488	movdqa	%xmm0,-128(%ebx)
489	pshufb	16(%eax),%xmm6
490	paddd	%xmm6,%xmm4
491	movdqa	%xmm6,64(%ebx)
492	pxor	%xmm4,%xmm2
493	paddd	%xmm3,%xmm1
494	movdqa	%xmm2,%xmm0
495	pslld	$7,%xmm2
496	psrld	$25,%xmm0
497	pxor	%xmm1,%xmm7
498	por	%xmm0,%xmm2
499	movdqa	%xmm4,(%ebx)
500	pshufb	(%eax),%xmm7
501	movdqa	%xmm2,-64(%ebx)
502	paddd	%xmm7,%xmm5
503	movdqa	32(%ebx),%xmm4
504	pxor	%xmm5,%xmm3
505	movdqa	-32(%ebx),%xmm2
506	movdqa	%xmm3,%xmm0
507	pslld	$12,%xmm3
508	psrld	$20,%xmm0
509	por	%xmm0,%xmm3
510	movdqa	-96(%ebx),%xmm0
511	paddd	%xmm3,%xmm1
512	movdqa	96(%ebx),%xmm6
513	pxor	%xmm1,%xmm7
514	movdqa	%xmm1,-112(%ebx)
515	pshufb	16(%eax),%xmm7
516	paddd	%xmm7,%xmm5
517	movdqa	%xmm7,80(%ebx)
518	pxor	%xmm5,%xmm3
519	paddd	%xmm2,%xmm0
520	movdqa	%xmm3,%xmm1
521	pslld	$7,%xmm3
522	psrld	$25,%xmm1
523	pxor	%xmm0,%xmm6
524	por	%xmm1,%xmm3
525	movdqa	%xmm5,16(%ebx)
526	pshufb	(%eax),%xmm6
527	movdqa	%xmm3,-48(%ebx)
528	paddd	%xmm6,%xmm4
529	movdqa	48(%ebx),%xmm5
530	pxor	%xmm4,%xmm2
531	movdqa	-16(%ebx),%xmm3
532	movdqa	%xmm2,%xmm1
533	pslld	$12,%xmm2
534	psrld	$20,%xmm1
535	por	%xmm1,%xmm2
536	movdqa	-80(%ebx),%xmm1
537	paddd	%xmm2,%xmm0
538	movdqa	112(%ebx),%xmm7
539	pxor	%xmm0,%xmm6
540	movdqa	%xmm0,-96(%ebx)
541	pshufb	16(%eax),%xmm6
542	paddd	%xmm6,%xmm4
543	movdqa	%xmm6,96(%ebx)
544	pxor	%xmm4,%xmm2
545	paddd	%xmm3,%xmm1
546	movdqa	%xmm2,%xmm0
547	pslld	$7,%xmm2
548	psrld	$25,%xmm0
549	pxor	%xmm1,%xmm7
550	por	%xmm0,%xmm2
551	pshufb	(%eax),%xmm7
552	movdqa	%xmm2,-32(%ebx)
553	paddd	%xmm7,%xmm5
554	pxor	%xmm5,%xmm3
555	movdqa	-48(%ebx),%xmm2
556	movdqa	%xmm3,%xmm0
557	pslld	$12,%xmm3
558	psrld	$20,%xmm0
559	por	%xmm0,%xmm3
560	movdqa	-128(%ebx),%xmm0
561	paddd	%xmm3,%xmm1
562	pxor	%xmm1,%xmm7
563	movdqa	%xmm1,-80(%ebx)
564	pshufb	16(%eax),%xmm7
565	paddd	%xmm7,%xmm5
566	movdqa	%xmm7,%xmm6
567	pxor	%xmm5,%xmm3
568	paddd	%xmm2,%xmm0
569	movdqa	%xmm3,%xmm1
570	pslld	$7,%xmm3
571	psrld	$25,%xmm1
572	pxor	%xmm0,%xmm6
573	por	%xmm1,%xmm3
574	pshufb	(%eax),%xmm6
575	movdqa	%xmm3,-16(%ebx)
576	paddd	%xmm6,%xmm4
577	pxor	%xmm4,%xmm2
578	movdqa	-32(%ebx),%xmm3
579	movdqa	%xmm2,%xmm1
580	pslld	$12,%xmm2
581	psrld	$20,%xmm1
582	por	%xmm1,%xmm2
583	movdqa	-112(%ebx),%xmm1
584	paddd	%xmm2,%xmm0
585	movdqa	64(%ebx),%xmm7
586	pxor	%xmm0,%xmm6
587	movdqa	%xmm0,-128(%ebx)
588	pshufb	16(%eax),%xmm6
589	paddd	%xmm6,%xmm4
590	movdqa	%xmm6,112(%ebx)
591	pxor	%xmm4,%xmm2
592	paddd	%xmm3,%xmm1
593	movdqa	%xmm2,%xmm0
594	pslld	$7,%xmm2
595	psrld	$25,%xmm0
596	pxor	%xmm1,%xmm7
597	por	%xmm0,%xmm2
598	movdqa	%xmm4,32(%ebx)
599	pshufb	(%eax),%xmm7
600	movdqa	%xmm2,-48(%ebx)
601	paddd	%xmm7,%xmm5
602	movdqa	(%ebx),%xmm4
603	pxor	%xmm5,%xmm3
604	movdqa	-16(%ebx),%xmm2
605	movdqa	%xmm3,%xmm0
606	pslld	$12,%xmm3
607	psrld	$20,%xmm0
608	por	%xmm0,%xmm3
609	movdqa	-96(%ebx),%xmm0
610	paddd	%xmm3,%xmm1
611	movdqa	80(%ebx),%xmm6
612	pxor	%xmm1,%xmm7
613	movdqa	%xmm1,-112(%ebx)
614	pshufb	16(%eax),%xmm7
615	paddd	%xmm7,%xmm5
616	movdqa	%xmm7,64(%ebx)
617	pxor	%xmm5,%xmm3
618	paddd	%xmm2,%xmm0
619	movdqa	%xmm3,%xmm1
620	pslld	$7,%xmm3
621	psrld	$25,%xmm1
622	pxor	%xmm0,%xmm6
623	por	%xmm1,%xmm3
624	movdqa	%xmm5,48(%ebx)
625	pshufb	(%eax),%xmm6
626	movdqa	%xmm3,-32(%ebx)
627	paddd	%xmm6,%xmm4
628	movdqa	16(%ebx),%xmm5
629	pxor	%xmm4,%xmm2
630	movdqa	-64(%ebx),%xmm3
631	movdqa	%xmm2,%xmm1
632	pslld	$12,%xmm2
633	psrld	$20,%xmm1
634	por	%xmm1,%xmm2
635	movdqa	-80(%ebx),%xmm1
636	paddd	%xmm2,%xmm0
637	movdqa	96(%ebx),%xmm7
638	pxor	%xmm0,%xmm6
639	movdqa	%xmm0,-96(%ebx)
640	pshufb	16(%eax),%xmm6
641	paddd	%xmm6,%xmm4
642	movdqa	%xmm6,80(%ebx)
643	pxor	%xmm4,%xmm2
644	paddd	%xmm3,%xmm1
645	movdqa	%xmm2,%xmm0
646	pslld	$7,%xmm2
647	psrld	$25,%xmm0
648	pxor	%xmm1,%xmm7
649	por	%xmm0,%xmm2
650	pshufb	(%eax),%xmm7
651	movdqa	%xmm2,-16(%ebx)
652	paddd	%xmm7,%xmm5
653	pxor	%xmm5,%xmm3
654	movdqa	%xmm3,%xmm0
655	pslld	$12,%xmm3
656	psrld	$20,%xmm0
657	por	%xmm0,%xmm3
658	movdqa	-128(%ebx),%xmm0
659	paddd	%xmm3,%xmm1
660	movdqa	64(%ebx),%xmm6
661	pxor	%xmm1,%xmm7
662	movdqa	%xmm1,-80(%ebx)
663	pshufb	16(%eax),%xmm7
664	paddd	%xmm7,%xmm5
665	movdqa	%xmm7,96(%ebx)
666	pxor	%xmm5,%xmm3
667	movdqa	%xmm3,%xmm1
668	pslld	$7,%xmm3
669	psrld	$25,%xmm1
670	por	%xmm1,%xmm3
671	decl	%edx
672	jnz	L008loop
673	movdqa	%xmm3,-64(%ebx)
674	movdqa	%xmm4,(%ebx)
675	movdqa	%xmm5,16(%ebx)
676	movdqa	%xmm6,64(%ebx)
677	movdqa	%xmm7,96(%ebx)
678	movdqa	-112(%ebx),%xmm1
679	movdqa	-96(%ebx),%xmm2
680	movdqa	-80(%ebx),%xmm3
681	paddd	-128(%ebp),%xmm0
682	paddd	-112(%ebp),%xmm1
683	paddd	-96(%ebp),%xmm2
684	paddd	-80(%ebp),%xmm3
685	movdqa	%xmm0,%xmm6
686	punpckldq	%xmm1,%xmm0
687	movdqa	%xmm2,%xmm7
688	punpckldq	%xmm3,%xmm2
689	punpckhdq	%xmm1,%xmm6
690	punpckhdq	%xmm3,%xmm7
691	movdqa	%xmm0,%xmm1
692	punpcklqdq	%xmm2,%xmm0
693	movdqa	%xmm6,%xmm3
694	punpcklqdq	%xmm7,%xmm6
695	punpckhqdq	%xmm2,%xmm1
696	punpckhqdq	%xmm7,%xmm3
697	movdqu	-128(%esi),%xmm4
698	movdqu	-64(%esi),%xmm5
699	movdqu	(%esi),%xmm2
700	movdqu	64(%esi),%xmm7
701	leal	16(%esi),%esi
702	pxor	%xmm0,%xmm4
703	movdqa	-64(%ebx),%xmm0
704	pxor	%xmm1,%xmm5
705	movdqa	-48(%ebx),%xmm1
706	pxor	%xmm2,%xmm6
707	movdqa	-32(%ebx),%xmm2
708	pxor	%xmm3,%xmm7
709	movdqa	-16(%ebx),%xmm3
710	movdqu	%xmm4,-128(%edi)
711	movdqu	%xmm5,-64(%edi)
712	movdqu	%xmm6,(%edi)
713	movdqu	%xmm7,64(%edi)
714	leal	16(%edi),%edi
715	paddd	-64(%ebp),%xmm0
716	paddd	-48(%ebp),%xmm1
717	paddd	-32(%ebp),%xmm2
718	paddd	-16(%ebp),%xmm3
719	movdqa	%xmm0,%xmm6
720	punpckldq	%xmm1,%xmm0
721	movdqa	%xmm2,%xmm7
722	punpckldq	%xmm3,%xmm2
723	punpckhdq	%xmm1,%xmm6
724	punpckhdq	%xmm3,%xmm7
725	movdqa	%xmm0,%xmm1
726	punpcklqdq	%xmm2,%xmm0
727	movdqa	%xmm6,%xmm3
728	punpcklqdq	%xmm7,%xmm6
729	punpckhqdq	%xmm2,%xmm1
730	punpckhqdq	%xmm7,%xmm3
731	movdqu	-128(%esi),%xmm4
732	movdqu	-64(%esi),%xmm5
733	movdqu	(%esi),%xmm2
734	movdqu	64(%esi),%xmm7
735	leal	16(%esi),%esi
736	pxor	%xmm0,%xmm4
737	movdqa	(%ebx),%xmm0
738	pxor	%xmm1,%xmm5
739	movdqa	16(%ebx),%xmm1
740	pxor	%xmm2,%xmm6
741	movdqa	32(%ebx),%xmm2
742	pxor	%xmm3,%xmm7
743	movdqa	48(%ebx),%xmm3
744	movdqu	%xmm4,-128(%edi)
745	movdqu	%xmm5,-64(%edi)
746	movdqu	%xmm6,(%edi)
747	movdqu	%xmm7,64(%edi)
748	leal	16(%edi),%edi
749	paddd	(%ebp),%xmm0
750	paddd	16(%ebp),%xmm1
751	paddd	32(%ebp),%xmm2
752	paddd	48(%ebp),%xmm3
753	movdqa	%xmm0,%xmm6
754	punpckldq	%xmm1,%xmm0
755	movdqa	%xmm2,%xmm7
756	punpckldq	%xmm3,%xmm2
757	punpckhdq	%xmm1,%xmm6
758	punpckhdq	%xmm3,%xmm7
759	movdqa	%xmm0,%xmm1
760	punpcklqdq	%xmm2,%xmm0
761	movdqa	%xmm6,%xmm3
762	punpcklqdq	%xmm7,%xmm6
763	punpckhqdq	%xmm2,%xmm1
764	punpckhqdq	%xmm7,%xmm3
765	movdqu	-128(%esi),%xmm4
766	movdqu	-64(%esi),%xmm5
767	movdqu	(%esi),%xmm2
768	movdqu	64(%esi),%xmm7
769	leal	16(%esi),%esi
770	pxor	%xmm0,%xmm4
771	movdqa	64(%ebx),%xmm0
772	pxor	%xmm1,%xmm5
773	movdqa	80(%ebx),%xmm1
774	pxor	%xmm2,%xmm6
775	movdqa	96(%ebx),%xmm2
776	pxor	%xmm3,%xmm7
777	movdqa	112(%ebx),%xmm3
778	movdqu	%xmm4,-128(%edi)
779	movdqu	%xmm5,-64(%edi)
780	movdqu	%xmm6,(%edi)
781	movdqu	%xmm7,64(%edi)
782	leal	16(%edi),%edi
783	paddd	64(%ebp),%xmm0
784	paddd	80(%ebp),%xmm1
785	paddd	96(%ebp),%xmm2
786	paddd	112(%ebp),%xmm3
787	movdqa	%xmm0,%xmm6
788	punpckldq	%xmm1,%xmm0
789	movdqa	%xmm2,%xmm7
790	punpckldq	%xmm3,%xmm2
791	punpckhdq	%xmm1,%xmm6
792	punpckhdq	%xmm3,%xmm7
793	movdqa	%xmm0,%xmm1
794	punpcklqdq	%xmm2,%xmm0
795	movdqa	%xmm6,%xmm3
796	punpcklqdq	%xmm7,%xmm6
797	punpckhqdq	%xmm2,%xmm1
798	punpckhqdq	%xmm7,%xmm3
799	movdqu	-128(%esi),%xmm4
800	movdqu	-64(%esi),%xmm5
801	movdqu	(%esi),%xmm2
802	movdqu	64(%esi),%xmm7
803	leal	208(%esi),%esi
804	pxor	%xmm0,%xmm4
805	pxor	%xmm1,%xmm5
806	pxor	%xmm2,%xmm6
807	pxor	%xmm3,%xmm7
808	movdqu	%xmm4,-128(%edi)
809	movdqu	%xmm5,-64(%edi)
810	movdqu	%xmm6,(%edi)
811	movdqu	%xmm7,64(%edi)
812	leal	208(%edi),%edi
813	subl	$256,%ecx
814	jnc	L007outer_loop
815	addl	$256,%ecx
816	jz	L009done
817	movl	520(%esp),%ebx
818	leal	-128(%esi),%esi
819	movl	516(%esp),%edx
820	leal	-128(%edi),%edi
821	movd	64(%ebp),%xmm2
822	movdqu	(%ebx),%xmm3
823	paddd	96(%eax),%xmm2
824	pand	112(%eax),%xmm3
825	por	%xmm2,%xmm3
826L0061x:
827	movdqa	32(%eax),%xmm0
828	movdqu	(%edx),%xmm1
829	movdqu	16(%edx),%xmm2
830	movdqa	(%eax),%xmm6
831	movdqa	16(%eax),%xmm7
832	movl	%ebp,48(%esp)
833	movdqa	%xmm0,(%esp)
834	movdqa	%xmm1,16(%esp)
835	movdqa	%xmm2,32(%esp)
836	movdqa	%xmm3,48(%esp)
837	movl	$10,%edx
838	jmp	L010loop1x
839.align	4,0x90
840L011outer1x:
841	movdqa	80(%eax),%xmm3
842	movdqa	(%esp),%xmm0
843	movdqa	16(%esp),%xmm1
844	movdqa	32(%esp),%xmm2
845	paddd	48(%esp),%xmm3
846	movl	$10,%edx
847	movdqa	%xmm3,48(%esp)
848	jmp	L010loop1x
849.align	4,0x90
850L010loop1x:
851	paddd	%xmm1,%xmm0
852	pxor	%xmm0,%xmm3
853.byte	102,15,56,0,222
854	paddd	%xmm3,%xmm2
855	pxor	%xmm2,%xmm1
856	movdqa	%xmm1,%xmm4
857	psrld	$20,%xmm1
858	pslld	$12,%xmm4
859	por	%xmm4,%xmm1
860	paddd	%xmm1,%xmm0
861	pxor	%xmm0,%xmm3
862.byte	102,15,56,0,223
863	paddd	%xmm3,%xmm2
864	pxor	%xmm2,%xmm1
865	movdqa	%xmm1,%xmm4
866	psrld	$25,%xmm1
867	pslld	$7,%xmm4
868	por	%xmm4,%xmm1
869	pshufd	$78,%xmm2,%xmm2
870	pshufd	$57,%xmm1,%xmm1
871	pshufd	$147,%xmm3,%xmm3
872	nop
873	paddd	%xmm1,%xmm0
874	pxor	%xmm0,%xmm3
875.byte	102,15,56,0,222
876	paddd	%xmm3,%xmm2
877	pxor	%xmm2,%xmm1
878	movdqa	%xmm1,%xmm4
879	psrld	$20,%xmm1
880	pslld	$12,%xmm4
881	por	%xmm4,%xmm1
882	paddd	%xmm1,%xmm0
883	pxor	%xmm0,%xmm3
884.byte	102,15,56,0,223
885	paddd	%xmm3,%xmm2
886	pxor	%xmm2,%xmm1
887	movdqa	%xmm1,%xmm4
888	psrld	$25,%xmm1
889	pslld	$7,%xmm4
890	por	%xmm4,%xmm1
891	pshufd	$78,%xmm2,%xmm2
892	pshufd	$147,%xmm1,%xmm1
893	pshufd	$57,%xmm3,%xmm3
894	decl	%edx
895	jnz	L010loop1x
896	paddd	(%esp),%xmm0
897	paddd	16(%esp),%xmm1
898	paddd	32(%esp),%xmm2
899	paddd	48(%esp),%xmm3
900	cmpl	$64,%ecx
901	jb	L012tail
902	movdqu	(%esi),%xmm4
903	movdqu	16(%esi),%xmm5
904	pxor	%xmm4,%xmm0
905	movdqu	32(%esi),%xmm4
906	pxor	%xmm5,%xmm1
907	movdqu	48(%esi),%xmm5
908	pxor	%xmm4,%xmm2
909	pxor	%xmm5,%xmm3
910	leal	64(%esi),%esi
911	movdqu	%xmm0,(%edi)
912	movdqu	%xmm1,16(%edi)
913	movdqu	%xmm2,32(%edi)
914	movdqu	%xmm3,48(%edi)
915	leal	64(%edi),%edi
916	subl	$64,%ecx
917	jnz	L011outer1x
918	jmp	L009done
919L012tail:
920	movdqa	%xmm0,(%esp)
921	movdqa	%xmm1,16(%esp)
922	movdqa	%xmm2,32(%esp)
923	movdqa	%xmm3,48(%esp)
924	xorl	%eax,%eax
925	xorl	%edx,%edx
926	xorl	%ebp,%ebp
927L013tail_loop:
928	movb	(%esp,%ebp,1),%al
929	movb	(%esi,%ebp,1),%dl
930	leal	1(%ebp),%ebp
931	xorb	%dl,%al
932	movb	%al,-1(%edi,%ebp,1)
933	decl	%ecx
934	jnz	L013tail_loop
935L009done:
936	movl	512(%esp),%esp
937	popl	%edi
938	popl	%esi
939	popl	%ebx
940	popl	%ebp
941	ret
942.align	6,0x90
943Lssse3_data:
944.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
945.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
946.long	1634760805,857760878,2036477234,1797285236
947.long	0,1,2,3
948.long	4,4,4,4
949.long	1,0,0,0
950.long	4,0,0,0
951.long	0,-1,-1,-1
952.align	6,0x90
953.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
954.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
955.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
956.byte	114,103,62,0
957#endif  // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__)
958