xref: /aosp_15_r20/bionic/libc/arch-x86/string/sse2-memmove-slm.S (revision 8d67ca893c1523eb926b9080dbe4e2ffd2a27ba1)
1*8d67ca89SAndroid Build Coastguard Worker/*
2*8d67ca89SAndroid Build Coastguard WorkerCopyright (c) 2014, Intel Corporation
3*8d67ca89SAndroid Build Coastguard WorkerAll rights reserved.
4*8d67ca89SAndroid Build Coastguard Worker
5*8d67ca89SAndroid Build Coastguard WorkerRedistribution and use in source and binary forms, with or without
6*8d67ca89SAndroid Build Coastguard Workermodification, are permitted provided that the following conditions are met:
7*8d67ca89SAndroid Build Coastguard Worker
8*8d67ca89SAndroid Build Coastguard Worker    * Redistributions of source code must retain the above copyright notice,
9*8d67ca89SAndroid Build Coastguard Worker    * this list of conditions and the following disclaimer.
10*8d67ca89SAndroid Build Coastguard Worker
11*8d67ca89SAndroid Build Coastguard Worker    * Redistributions in binary form must reproduce the above copyright notice,
12*8d67ca89SAndroid Build Coastguard Worker    * this list of conditions and the following disclaimer in the documentation
13*8d67ca89SAndroid Build Coastguard Worker    * and/or other materials provided with the distribution.
14*8d67ca89SAndroid Build Coastguard Worker
15*8d67ca89SAndroid Build Coastguard Worker    * Neither the name of Intel Corporation nor the names of its contributors
16*8d67ca89SAndroid Build Coastguard Worker    * may be used to endorse or promote products derived from this software
17*8d67ca89SAndroid Build Coastguard Worker    * without specific prior written permission.
18*8d67ca89SAndroid Build Coastguard Worker
19*8d67ca89SAndroid Build Coastguard WorkerTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20*8d67ca89SAndroid Build Coastguard WorkerANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21*8d67ca89SAndroid Build Coastguard WorkerWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22*8d67ca89SAndroid Build Coastguard WorkerDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23*8d67ca89SAndroid Build Coastguard WorkerANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24*8d67ca89SAndroid Build Coastguard Worker(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25*8d67ca89SAndroid Build Coastguard WorkerLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26*8d67ca89SAndroid Build Coastguard WorkerANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27*8d67ca89SAndroid Build Coastguard Worker(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28*8d67ca89SAndroid Build Coastguard WorkerSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*8d67ca89SAndroid Build Coastguard Worker*/
30*8d67ca89SAndroid Build Coastguard Worker
31*8d67ca89SAndroid Build Coastguard Worker#define FOR_SILVERMONT
32*8d67ca89SAndroid Build Coastguard Worker
33*8d67ca89SAndroid Build Coastguard Worker#ifndef MEMMOVE
34*8d67ca89SAndroid Build Coastguard Worker# define MEMMOVE	memmove
35*8d67ca89SAndroid Build Coastguard Worker#endif
36*8d67ca89SAndroid Build Coastguard Worker
37*8d67ca89SAndroid Build Coastguard Worker#ifndef L
38*8d67ca89SAndroid Build Coastguard Worker# define L(label)	.L##label
39*8d67ca89SAndroid Build Coastguard Worker#endif
40*8d67ca89SAndroid Build Coastguard Worker
41*8d67ca89SAndroid Build Coastguard Worker#ifndef cfi_startproc
42*8d67ca89SAndroid Build Coastguard Worker# define cfi_startproc	.cfi_startproc
43*8d67ca89SAndroid Build Coastguard Worker#endif
44*8d67ca89SAndroid Build Coastguard Worker
45*8d67ca89SAndroid Build Coastguard Worker#ifndef cfi_endproc
46*8d67ca89SAndroid Build Coastguard Worker# define cfi_endproc	.cfi_endproc
47*8d67ca89SAndroid Build Coastguard Worker#endif
48*8d67ca89SAndroid Build Coastguard Worker
49*8d67ca89SAndroid Build Coastguard Worker#ifndef cfi_rel_offset
50*8d67ca89SAndroid Build Coastguard Worker# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
51*8d67ca89SAndroid Build Coastguard Worker#endif
52*8d67ca89SAndroid Build Coastguard Worker
53*8d67ca89SAndroid Build Coastguard Worker#ifndef cfi_restore
54*8d67ca89SAndroid Build Coastguard Worker# define cfi_restore(reg)	.cfi_restore reg
55*8d67ca89SAndroid Build Coastguard Worker#endif
56*8d67ca89SAndroid Build Coastguard Worker
57*8d67ca89SAndroid Build Coastguard Worker#ifndef cfi_adjust_cfa_offset
58*8d67ca89SAndroid Build Coastguard Worker# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
59*8d67ca89SAndroid Build Coastguard Worker#endif
60*8d67ca89SAndroid Build Coastguard Worker
61*8d67ca89SAndroid Build Coastguard Worker#ifndef ENTRY
62*8d67ca89SAndroid Build Coastguard Worker# define ENTRY(name)		\
63*8d67ca89SAndroid Build Coastguard Worker	.type name,  @function;		\
64*8d67ca89SAndroid Build Coastguard Worker	.globl name;		\
65*8d67ca89SAndroid Build Coastguard Worker	.p2align 4;		\
66*8d67ca89SAndroid Build Coastguard Workername:		\
67*8d67ca89SAndroid Build Coastguard Worker	cfi_startproc
68*8d67ca89SAndroid Build Coastguard Worker#endif
69*8d67ca89SAndroid Build Coastguard Worker
70*8d67ca89SAndroid Build Coastguard Worker#ifndef END
71*8d67ca89SAndroid Build Coastguard Worker# define END(name)		\
72*8d67ca89SAndroid Build Coastguard Worker	cfi_endproc;		\
73*8d67ca89SAndroid Build Coastguard Worker	.size name, .-name
74*8d67ca89SAndroid Build Coastguard Worker#endif
75*8d67ca89SAndroid Build Coastguard Worker
76*8d67ca89SAndroid Build Coastguard Worker#define DEST		PARMS
77*8d67ca89SAndroid Build Coastguard Worker#define SRC		DEST+4
78*8d67ca89SAndroid Build Coastguard Worker#define LEN		SRC+4
79*8d67ca89SAndroid Build Coastguard Worker
80*8d67ca89SAndroid Build Coastguard Worker#define CFI_PUSH(REG)		\
81*8d67ca89SAndroid Build Coastguard Worker  cfi_adjust_cfa_offset (4);		\
82*8d67ca89SAndroid Build Coastguard Worker  cfi_rel_offset (REG, 0)
83*8d67ca89SAndroid Build Coastguard Worker
84*8d67ca89SAndroid Build Coastguard Worker#define CFI_POP(REG)		\
85*8d67ca89SAndroid Build Coastguard Worker  cfi_adjust_cfa_offset (-4);		\
86*8d67ca89SAndroid Build Coastguard Worker  cfi_restore (REG)
87*8d67ca89SAndroid Build Coastguard Worker
88*8d67ca89SAndroid Build Coastguard Worker#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
89*8d67ca89SAndroid Build Coastguard Worker#define POP(REG)	popl REG; CFI_POP (REG)
90*8d67ca89SAndroid Build Coastguard Worker
91*8d67ca89SAndroid Build Coastguard Worker#define PARMS		8		/* Preserve EBX.  */
92*8d67ca89SAndroid Build Coastguard Worker#define ENTRANCE	PUSH (%ebx);
93*8d67ca89SAndroid Build Coastguard Worker#define RETURN_END	POP (%ebx); ret
94*8d67ca89SAndroid Build Coastguard Worker#define RETURN		RETURN_END; CFI_PUSH (%ebx)
95*8d67ca89SAndroid Build Coastguard Worker
96*8d67ca89SAndroid Build Coastguard Worker#define SETUP_PIC_REG(x)	call	__x86.get_pc_thunk.x
97*8d67ca89SAndroid Build Coastguard Worker
98*8d67ca89SAndroid Build Coastguard Worker	.section .text.sse2,"ax",@progbits
99*8d67ca89SAndroid Build Coastguard WorkerENTRY (MEMMOVE)
100*8d67ca89SAndroid Build Coastguard Worker	ENTRANCE
101*8d67ca89SAndroid Build Coastguard Worker	movl	LEN(%esp), %ecx
102*8d67ca89SAndroid Build Coastguard Worker	movl	SRC(%esp), %eax
103*8d67ca89SAndroid Build Coastguard Worker	movl	DEST(%esp), %edx
104*8d67ca89SAndroid Build Coastguard Worker
105*8d67ca89SAndroid Build Coastguard Worker/* Check whether we should copy backward or forward.  */
106*8d67ca89SAndroid Build Coastguard Worker	cmp	%eax, %edx
107*8d67ca89SAndroid Build Coastguard Worker	je	L(mm_return)
108*8d67ca89SAndroid Build Coastguard Worker	jg	L(mm_len_0_or_more_backward)
109*8d67ca89SAndroid Build Coastguard Worker
110*8d67ca89SAndroid Build Coastguard Worker/* Now do checks for lengths. We do [0..16], [0..32], [0..64], [0..128]
111*8d67ca89SAndroid Build Coastguard Worker	separately.  */
112*8d67ca89SAndroid Build Coastguard Worker	cmp	$16, %ecx
113*8d67ca89SAndroid Build Coastguard Worker	jbe	L(mm_len_0_16_bytes_forward)
114*8d67ca89SAndroid Build Coastguard Worker
115*8d67ca89SAndroid Build Coastguard Worker	cmpl	$32, %ecx
116*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_len_32_or_more_forward)
117*8d67ca89SAndroid Build Coastguard Worker
118*8d67ca89SAndroid Build Coastguard Worker/* Copy [0..32] and return.  */
119*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%eax), %xmm0
120*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%eax, %ecx), %xmm1
121*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm0, (%edx)
122*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm1, -16(%edx, %ecx)
123*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
124*8d67ca89SAndroid Build Coastguard Worker
125*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_32_or_more_forward):
126*8d67ca89SAndroid Build Coastguard Worker	cmpl	$64, %ecx
127*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_len_64_or_more_forward)
128*8d67ca89SAndroid Build Coastguard Worker
129*8d67ca89SAndroid Build Coastguard Worker/* Copy [0..64] and return.  */
130*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%eax), %xmm0
131*8d67ca89SAndroid Build Coastguard Worker	movdqu	16(%eax), %xmm1
132*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%eax, %ecx), %xmm2
133*8d67ca89SAndroid Build Coastguard Worker	movdqu	-32(%eax, %ecx), %xmm3
134*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm0, (%edx)
135*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm1, 16(%edx)
136*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm2, -16(%edx, %ecx)
137*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm3, -32(%edx, %ecx)
138*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
139*8d67ca89SAndroid Build Coastguard Worker
140*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_64_or_more_forward):
141*8d67ca89SAndroid Build Coastguard Worker	cmpl	$128, %ecx
142*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_len_128_or_more_forward)
143*8d67ca89SAndroid Build Coastguard Worker
144*8d67ca89SAndroid Build Coastguard Worker/* Copy [0..128] and return.  */
145*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%eax), %xmm0
146*8d67ca89SAndroid Build Coastguard Worker	movdqu	16(%eax), %xmm1
147*8d67ca89SAndroid Build Coastguard Worker	movdqu	32(%eax), %xmm2
148*8d67ca89SAndroid Build Coastguard Worker	movdqu	48(%eax), %xmm3
149*8d67ca89SAndroid Build Coastguard Worker	movdqu	-64(%eax, %ecx), %xmm4
150*8d67ca89SAndroid Build Coastguard Worker	movdqu	-48(%eax, %ecx), %xmm5
151*8d67ca89SAndroid Build Coastguard Worker	movdqu	-32(%eax, %ecx), %xmm6
152*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%eax, %ecx), %xmm7
153*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm0, (%edx)
154*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm1, 16(%edx)
155*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm2, 32(%edx)
156*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm3, 48(%edx)
157*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm4, -64(%edx, %ecx)
158*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm5, -48(%edx, %ecx)
159*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm6, -32(%edx, %ecx)
160*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm7, -16(%edx, %ecx)
161*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
162*8d67ca89SAndroid Build Coastguard Worker
163*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_128_or_more_forward):
164*8d67ca89SAndroid Build Coastguard Worker	PUSH (%esi)
165*8d67ca89SAndroid Build Coastguard Worker	PUSH (%edi)
166*8d67ca89SAndroid Build Coastguard Worker
167*8d67ca89SAndroid Build Coastguard Worker/* Aligning the address of destination.  */
168*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%eax), %xmm0
169*8d67ca89SAndroid Build Coastguard Worker	movdqu	16(%eax), %xmm1
170*8d67ca89SAndroid Build Coastguard Worker	movdqu	32(%eax), %xmm2
171*8d67ca89SAndroid Build Coastguard Worker	movdqu	48(%eax), %xmm3
172*8d67ca89SAndroid Build Coastguard Worker
173*8d67ca89SAndroid Build Coastguard Worker	leal	64(%edx), %edi
174*8d67ca89SAndroid Build Coastguard Worker	andl	$-64, %edi
175*8d67ca89SAndroid Build Coastguard Worker	subl	%edx, %eax
176*8d67ca89SAndroid Build Coastguard Worker
177*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%eax, %edi), %xmm4
178*8d67ca89SAndroid Build Coastguard Worker	movdqu	16(%eax, %edi), %xmm5
179*8d67ca89SAndroid Build Coastguard Worker	movdqu	32(%eax, %edi), %xmm6
180*8d67ca89SAndroid Build Coastguard Worker	movdqu	48(%eax, %edi), %xmm7
181*8d67ca89SAndroid Build Coastguard Worker
182*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm0, (%edx)
183*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm1, 16(%edx)
184*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm2, 32(%edx)
185*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm3, 48(%edx)
186*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, (%edi)
187*8d67ca89SAndroid Build Coastguard Worker	movaps	%xmm5, 16(%edi)
188*8d67ca89SAndroid Build Coastguard Worker	movaps	%xmm6, 32(%edi)
189*8d67ca89SAndroid Build Coastguard Worker	movaps	%xmm7, 48(%edi)
190*8d67ca89SAndroid Build Coastguard Worker	addl	$64, %edi
191*8d67ca89SAndroid Build Coastguard Worker
192*8d67ca89SAndroid Build Coastguard Worker	leal	(%edx, %ecx), %ebx
193*8d67ca89SAndroid Build Coastguard Worker	andl	$-64, %ebx
194*8d67ca89SAndroid Build Coastguard Worker	cmp	%edi, %ebx
195*8d67ca89SAndroid Build Coastguard Worker	jbe	L(mm_copy_remaining_forward)
196*8d67ca89SAndroid Build Coastguard Worker
197*8d67ca89SAndroid Build Coastguard Worker	PUSH(%ebx)
198*8d67ca89SAndroid Build Coastguard Worker	SETUP_PIC_REG(bx)
199*8d67ca89SAndroid Build Coastguard Worker	add	$_GLOBAL_OFFSET_TABLE_, %ebx
200*8d67ca89SAndroid Build Coastguard Worker	cmp	__x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
201*8d67ca89SAndroid Build Coastguard Worker	/* Restore ebx. We can place a pop before jump as it doesn't affect any flags. */
202*8d67ca89SAndroid Build Coastguard Worker	POP(%ebx)
203*8d67ca89SAndroid Build Coastguard Worker
204*8d67ca89SAndroid Build Coastguard Worker	jae	L(mm_large_page_loop_forward)
205*8d67ca89SAndroid Build Coastguard Worker
206*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
207*8d67ca89SAndroid Build Coastguard WorkerL(mm_main_loop_forward):
208*8d67ca89SAndroid Build Coastguard Worker
209*8d67ca89SAndroid Build Coastguard Worker	prefetcht0 128(%eax, %edi)
210*8d67ca89SAndroid Build Coastguard Worker
211*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%eax, %edi), %xmm0
212*8d67ca89SAndroid Build Coastguard Worker	movdqu	16(%eax, %edi), %xmm1
213*8d67ca89SAndroid Build Coastguard Worker	movdqu	32(%eax, %edi), %xmm2
214*8d67ca89SAndroid Build Coastguard Worker	movdqu	48(%eax, %edi), %xmm3
215*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm0, (%edi)
216*8d67ca89SAndroid Build Coastguard Worker	movaps	%xmm1, 16(%edi)
217*8d67ca89SAndroid Build Coastguard Worker	movaps	%xmm2, 32(%edi)
218*8d67ca89SAndroid Build Coastguard Worker	movaps	%xmm3, 48(%edi)
219*8d67ca89SAndroid Build Coastguard Worker	leal	64(%edi), %edi
220*8d67ca89SAndroid Build Coastguard Worker	cmp	%edi, %ebx
221*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_main_loop_forward)
222*8d67ca89SAndroid Build Coastguard Worker
223*8d67ca89SAndroid Build Coastguard WorkerL(mm_copy_remaining_forward):
224*8d67ca89SAndroid Build Coastguard Worker	addl	%edx, %ecx
225*8d67ca89SAndroid Build Coastguard Worker	subl	%edi, %ecx
226*8d67ca89SAndroid Build Coastguard Worker/* We copied all up till %edi position in the dst.
227*8d67ca89SAndroid Build Coastguard Worker	In %ecx now is how many bytes are left to copy.
228*8d67ca89SAndroid Build Coastguard Worker	Now we need to advance %esi. */
229*8d67ca89SAndroid Build Coastguard Worker	leal	(%edi, %eax), %esi
230*8d67ca89SAndroid Build Coastguard Worker
231*8d67ca89SAndroid Build Coastguard WorkerL(mm_remaining_0_64_bytes_forward):
232*8d67ca89SAndroid Build Coastguard Worker	cmp	$32, %ecx
233*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_remaining_33_64_bytes_forward)
234*8d67ca89SAndroid Build Coastguard Worker	cmp	$16, %ecx
235*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_remaining_17_32_bytes_forward)
236*8d67ca89SAndroid Build Coastguard Worker	testl	%ecx, %ecx
237*8d67ca89SAndroid Build Coastguard Worker	.p2align 4,,2
238*8d67ca89SAndroid Build Coastguard Worker	je	L(mm_return_pop_all)
239*8d67ca89SAndroid Build Coastguard Worker
240*8d67ca89SAndroid Build Coastguard Worker	cmpb	$8, %cl
241*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_remaining_9_16_bytes_forward)
242*8d67ca89SAndroid Build Coastguard Worker	cmpb	$4, %cl
243*8d67ca89SAndroid Build Coastguard Worker	.p2align 4,,5
244*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_remaining_5_8_bytes_forward)
245*8d67ca89SAndroid Build Coastguard Worker	cmpb	$2, %cl
246*8d67ca89SAndroid Build Coastguard Worker	.p2align 4,,1
247*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_remaining_3_4_bytes_forward)
248*8d67ca89SAndroid Build Coastguard Worker	movzbl	-1(%esi,%ecx), %eax
249*8d67ca89SAndroid Build Coastguard Worker	movzbl	(%esi), %ebx
250*8d67ca89SAndroid Build Coastguard Worker	movb	%al, -1(%edi,%ecx)
251*8d67ca89SAndroid Build Coastguard Worker	movb	%bl, (%edi)
252*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return_pop_all)
253*8d67ca89SAndroid Build Coastguard Worker
254*8d67ca89SAndroid Build Coastguard WorkerL(mm_remaining_33_64_bytes_forward):
255*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%esi), %xmm0
256*8d67ca89SAndroid Build Coastguard Worker	movdqu	16(%esi), %xmm1
257*8d67ca89SAndroid Build Coastguard Worker	movdqu	-32(%esi, %ecx), %xmm2
258*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%esi, %ecx), %xmm3
259*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm0, (%edi)
260*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm1, 16(%edi)
261*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm2, -32(%edi, %ecx)
262*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm3, -16(%edi, %ecx)
263*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return_pop_all)
264*8d67ca89SAndroid Build Coastguard Worker
265*8d67ca89SAndroid Build Coastguard WorkerL(mm_remaining_17_32_bytes_forward):
266*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%esi), %xmm0
267*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%esi, %ecx), %xmm1
268*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm0, (%edi)
269*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm1, -16(%edi, %ecx)
270*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return_pop_all)
271*8d67ca89SAndroid Build Coastguard Worker
272*8d67ca89SAndroid Build Coastguard WorkerL(mm_remaining_9_16_bytes_forward):
273*8d67ca89SAndroid Build Coastguard Worker	movq	(%esi), %xmm0
274*8d67ca89SAndroid Build Coastguard Worker	movq	-8(%esi, %ecx), %xmm1
275*8d67ca89SAndroid Build Coastguard Worker	movq	%xmm0, (%edi)
276*8d67ca89SAndroid Build Coastguard Worker	movq	%xmm1, -8(%edi, %ecx)
277*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return_pop_all)
278*8d67ca89SAndroid Build Coastguard Worker
279*8d67ca89SAndroid Build Coastguard WorkerL(mm_remaining_5_8_bytes_forward):
280*8d67ca89SAndroid Build Coastguard Worker	movl	(%esi), %eax
281*8d67ca89SAndroid Build Coastguard Worker	movl	-4(%esi,%ecx), %ebx
282*8d67ca89SAndroid Build Coastguard Worker	movl	%eax, (%edi)
283*8d67ca89SAndroid Build Coastguard Worker	movl	%ebx, -4(%edi,%ecx)
284*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return_pop_all)
285*8d67ca89SAndroid Build Coastguard Worker
286*8d67ca89SAndroid Build Coastguard WorkerL(mm_remaining_3_4_bytes_forward):
287*8d67ca89SAndroid Build Coastguard Worker	movzwl	-2(%esi,%ecx), %eax
288*8d67ca89SAndroid Build Coastguard Worker	movzwl	(%esi), %ebx
289*8d67ca89SAndroid Build Coastguard Worker	movw	%ax, -2(%edi,%ecx)
290*8d67ca89SAndroid Build Coastguard Worker	movw	%bx, (%edi)
291*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return_pop_all)
292*8d67ca89SAndroid Build Coastguard Worker
293*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_0_16_bytes_forward):
294*8d67ca89SAndroid Build Coastguard Worker	testb	$24, %cl
295*8d67ca89SAndroid Build Coastguard Worker	jne	L(mm_len_9_16_bytes_forward)
296*8d67ca89SAndroid Build Coastguard Worker	testb	$4, %cl
297*8d67ca89SAndroid Build Coastguard Worker	.p2align 4,,5
298*8d67ca89SAndroid Build Coastguard Worker	jne	L(mm_len_5_8_bytes_forward)
299*8d67ca89SAndroid Build Coastguard Worker	testl	%ecx, %ecx
300*8d67ca89SAndroid Build Coastguard Worker	.p2align 4,,2
301*8d67ca89SAndroid Build Coastguard Worker	je	L(mm_return)
302*8d67ca89SAndroid Build Coastguard Worker	testb	$2, %cl
303*8d67ca89SAndroid Build Coastguard Worker	.p2align 4,,1
304*8d67ca89SAndroid Build Coastguard Worker	jne	L(mm_len_2_4_bytes_forward)
305*8d67ca89SAndroid Build Coastguard Worker	movzbl	-1(%eax,%ecx), %ebx
306*8d67ca89SAndroid Build Coastguard Worker	movzbl	(%eax), %eax
307*8d67ca89SAndroid Build Coastguard Worker	movb	%bl, -1(%edx,%ecx)
308*8d67ca89SAndroid Build Coastguard Worker	movb	%al, (%edx)
309*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
310*8d67ca89SAndroid Build Coastguard Worker
311*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_2_4_bytes_forward):
312*8d67ca89SAndroid Build Coastguard Worker	movzwl	-2(%eax,%ecx), %ebx
313*8d67ca89SAndroid Build Coastguard Worker	movzwl	(%eax), %eax
314*8d67ca89SAndroid Build Coastguard Worker	movw	%bx, -2(%edx,%ecx)
315*8d67ca89SAndroid Build Coastguard Worker	movw	%ax, (%edx)
316*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
317*8d67ca89SAndroid Build Coastguard Worker
318*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_5_8_bytes_forward):
319*8d67ca89SAndroid Build Coastguard Worker	movl	(%eax), %ebx
320*8d67ca89SAndroid Build Coastguard Worker	movl	-4(%eax,%ecx), %eax
321*8d67ca89SAndroid Build Coastguard Worker	movl	%ebx, (%edx)
322*8d67ca89SAndroid Build Coastguard Worker	movl	%eax, -4(%edx,%ecx)
323*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
324*8d67ca89SAndroid Build Coastguard Worker
325*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_9_16_bytes_forward):
326*8d67ca89SAndroid Build Coastguard Worker	movq	(%eax), %xmm0
327*8d67ca89SAndroid Build Coastguard Worker	movq	-8(%eax, %ecx), %xmm1
328*8d67ca89SAndroid Build Coastguard Worker	movq	%xmm0, (%edx)
329*8d67ca89SAndroid Build Coastguard Worker	movq	%xmm1, -8(%edx, %ecx)
330*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
331*8d67ca89SAndroid Build Coastguard Worker
332*8d67ca89SAndroid Build Coastguard Worker	CFI_POP (%edi)
333*8d67ca89SAndroid Build Coastguard Worker	CFI_POP (%esi)
334*8d67ca89SAndroid Build Coastguard Worker
335*8d67ca89SAndroid Build Coastguard WorkerL(mm_recalc_len):
336*8d67ca89SAndroid Build Coastguard Worker/* Compute in %ecx how many bytes are left to copy after
337*8d67ca89SAndroid Build Coastguard Worker	the main loop stops.  */
338*8d67ca89SAndroid Build Coastguard Worker	movl	%ebx, %ecx
339*8d67ca89SAndroid Build Coastguard Worker	subl	%edx, %ecx
340*8d67ca89SAndroid Build Coastguard Worker/* The code for copying backwards.  */
341*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_0_or_more_backward):
342*8d67ca89SAndroid Build Coastguard Worker
343*8d67ca89SAndroid Build Coastguard Worker/* Now do checks for lengths. We do [0..16], [16..32], [32..64], [64..128]
344*8d67ca89SAndroid Build Coastguard Worker	separately.  */
345*8d67ca89SAndroid Build Coastguard Worker	cmp	$16, %ecx
346*8d67ca89SAndroid Build Coastguard Worker	jbe	L(mm_len_0_16_bytes_backward)
347*8d67ca89SAndroid Build Coastguard Worker
348*8d67ca89SAndroid Build Coastguard Worker	cmpl	$32, %ecx
349*8d67ca89SAndroid Build Coastguard Worker	jg	L(mm_len_32_or_more_backward)
350*8d67ca89SAndroid Build Coastguard Worker
351*8d67ca89SAndroid Build Coastguard Worker/* Copy [0..32] and return.  */
352*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%eax), %xmm0
353*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%eax, %ecx), %xmm1
354*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm0, (%edx)
355*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm1, -16(%edx, %ecx)
356*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
357*8d67ca89SAndroid Build Coastguard Worker
358*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_32_or_more_backward):
359*8d67ca89SAndroid Build Coastguard Worker	cmpl	$64, %ecx
360*8d67ca89SAndroid Build Coastguard Worker	jg	L(mm_len_64_or_more_backward)
361*8d67ca89SAndroid Build Coastguard Worker
362*8d67ca89SAndroid Build Coastguard Worker/* Copy [0..64] and return.  */
363*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%eax), %xmm0
364*8d67ca89SAndroid Build Coastguard Worker	movdqu	16(%eax), %xmm1
365*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%eax, %ecx), %xmm2
366*8d67ca89SAndroid Build Coastguard Worker	movdqu	-32(%eax, %ecx), %xmm3
367*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm0, (%edx)
368*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm1, 16(%edx)
369*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm2, -16(%edx, %ecx)
370*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm3, -32(%edx, %ecx)
371*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
372*8d67ca89SAndroid Build Coastguard Worker
373*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_64_or_more_backward):
374*8d67ca89SAndroid Build Coastguard Worker	cmpl	$128, %ecx
375*8d67ca89SAndroid Build Coastguard Worker	jg	L(mm_len_128_or_more_backward)
376*8d67ca89SAndroid Build Coastguard Worker
377*8d67ca89SAndroid Build Coastguard Worker/* Copy [0..128] and return.  */
378*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%eax), %xmm0
379*8d67ca89SAndroid Build Coastguard Worker	movdqu	16(%eax), %xmm1
380*8d67ca89SAndroid Build Coastguard Worker	movdqu	32(%eax), %xmm2
381*8d67ca89SAndroid Build Coastguard Worker	movdqu	48(%eax), %xmm3
382*8d67ca89SAndroid Build Coastguard Worker	movdqu	-64(%eax, %ecx), %xmm4
383*8d67ca89SAndroid Build Coastguard Worker	movdqu	-48(%eax, %ecx), %xmm5
384*8d67ca89SAndroid Build Coastguard Worker	movdqu	-32(%eax, %ecx), %xmm6
385*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%eax, %ecx), %xmm7
386*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm0, (%edx)
387*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm1, 16(%edx)
388*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm2, 32(%edx)
389*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm3, 48(%edx)
390*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm4, -64(%edx, %ecx)
391*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm5, -48(%edx, %ecx)
392*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm6, -32(%edx, %ecx)
393*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm7, -16(%edx, %ecx)
394*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
395*8d67ca89SAndroid Build Coastguard Worker
396*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_128_or_more_backward):
397*8d67ca89SAndroid Build Coastguard Worker	PUSH (%esi)
398*8d67ca89SAndroid Build Coastguard Worker	PUSH (%edi)
399*8d67ca89SAndroid Build Coastguard Worker
400*8d67ca89SAndroid Build Coastguard Worker/* Aligning the address of destination. We need to save
401*8d67ca89SAndroid Build Coastguard Worker	16 bits from the source in order not to overwrite them.  */
402*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%eax, %ecx), %xmm0
403*8d67ca89SAndroid Build Coastguard Worker	movdqu	-32(%eax, %ecx), %xmm1
404*8d67ca89SAndroid Build Coastguard Worker	movdqu	-48(%eax, %ecx), %xmm2
405*8d67ca89SAndroid Build Coastguard Worker	movdqu	-64(%eax, %ecx), %xmm3
406*8d67ca89SAndroid Build Coastguard Worker
407*8d67ca89SAndroid Build Coastguard Worker	leal	(%edx, %ecx), %edi
408*8d67ca89SAndroid Build Coastguard Worker	andl	$-64, %edi
409*8d67ca89SAndroid Build Coastguard Worker
410*8d67ca89SAndroid Build Coastguard Worker	movl	%eax, %esi
411*8d67ca89SAndroid Build Coastguard Worker	subl	%edx, %esi
412*8d67ca89SAndroid Build Coastguard Worker
413*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%edi, %esi), %xmm4
414*8d67ca89SAndroid Build Coastguard Worker	movdqu	-32(%edi, %esi), %xmm5
415*8d67ca89SAndroid Build Coastguard Worker	movdqu	-48(%edi, %esi), %xmm6
416*8d67ca89SAndroid Build Coastguard Worker	movdqu	-64(%edi, %esi), %xmm7
417*8d67ca89SAndroid Build Coastguard Worker
418*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm0, -16(%edx, %ecx)
419*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm1, -32(%edx, %ecx)
420*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm2, -48(%edx, %ecx)
421*8d67ca89SAndroid Build Coastguard Worker	movdqu	%xmm3, -64(%edx, %ecx)
422*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm4, -16(%edi)
423*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm5, -32(%edi)
424*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm6, -48(%edi)
425*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm7, -64(%edi)
426*8d67ca89SAndroid Build Coastguard Worker	leal	-64(%edi), %edi
427*8d67ca89SAndroid Build Coastguard Worker
428*8d67ca89SAndroid Build Coastguard Worker	leal	64(%edx), %ebx
429*8d67ca89SAndroid Build Coastguard Worker	andl	$-64, %ebx
430*8d67ca89SAndroid Build Coastguard Worker
431*8d67ca89SAndroid Build Coastguard Worker	cmp	%edi, %ebx
432*8d67ca89SAndroid Build Coastguard Worker	jae	L(mm_main_loop_backward_end)
433*8d67ca89SAndroid Build Coastguard Worker
434*8d67ca89SAndroid Build Coastguard Worker	PUSH(%ebx)
435*8d67ca89SAndroid Build Coastguard Worker	SETUP_PIC_REG(bx)
436*8d67ca89SAndroid Build Coastguard Worker	add	$_GLOBAL_OFFSET_TABLE_, %ebx
437*8d67ca89SAndroid Build Coastguard Worker	cmp	__x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
438*8d67ca89SAndroid Build Coastguard Worker	/* Restore ebx. We can place a pop before jump as it doesn't affect any flags. */
439*8d67ca89SAndroid Build Coastguard Worker	POP(%ebx)
440*8d67ca89SAndroid Build Coastguard Worker
441*8d67ca89SAndroid Build Coastguard Worker	jae	L(mm_large_page_loop_backward)
442*8d67ca89SAndroid Build Coastguard Worker
443*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
444*8d67ca89SAndroid Build Coastguard WorkerL(mm_main_loop_backward):
445*8d67ca89SAndroid Build Coastguard Worker
446*8d67ca89SAndroid Build Coastguard Worker	prefetcht0 -128(%edi, %esi)
447*8d67ca89SAndroid Build Coastguard Worker
448*8d67ca89SAndroid Build Coastguard Worker	movdqu	-64(%edi, %esi), %xmm0
449*8d67ca89SAndroid Build Coastguard Worker	movdqu	-48(%edi, %esi), %xmm1
450*8d67ca89SAndroid Build Coastguard Worker	movdqu	-32(%edi, %esi), %xmm2
451*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%edi, %esi), %xmm3
452*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm0, -64(%edi)
453*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm1, -48(%edi)
454*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm2, -32(%edi)
455*8d67ca89SAndroid Build Coastguard Worker	movdqa	%xmm3, -16(%edi)
456*8d67ca89SAndroid Build Coastguard Worker	leal	-64(%edi), %edi
457*8d67ca89SAndroid Build Coastguard Worker	cmp	%edi, %ebx
458*8d67ca89SAndroid Build Coastguard Worker	jb	L(mm_main_loop_backward)
459*8d67ca89SAndroid Build Coastguard WorkerL(mm_main_loop_backward_end):
460*8d67ca89SAndroid Build Coastguard Worker	POP (%edi)
461*8d67ca89SAndroid Build Coastguard Worker	POP (%esi)
462*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_recalc_len)
463*8d67ca89SAndroid Build Coastguard Worker
464*8d67ca89SAndroid Build Coastguard Worker/* Copy [0..16] and return.  */
465*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_0_16_bytes_backward):
466*8d67ca89SAndroid Build Coastguard Worker	testb	$24, %cl
467*8d67ca89SAndroid Build Coastguard Worker	jnz	L(mm_len_9_16_bytes_backward)
468*8d67ca89SAndroid Build Coastguard Worker	testb	$4, %cl
469*8d67ca89SAndroid Build Coastguard Worker	.p2align 4,,5
470*8d67ca89SAndroid Build Coastguard Worker	jnz	L(mm_len_5_8_bytes_backward)
471*8d67ca89SAndroid Build Coastguard Worker	testl	%ecx, %ecx
472*8d67ca89SAndroid Build Coastguard Worker	.p2align 4,,2
473*8d67ca89SAndroid Build Coastguard Worker	je	L(mm_return)
474*8d67ca89SAndroid Build Coastguard Worker	testb	$2, %cl
475*8d67ca89SAndroid Build Coastguard Worker	.p2align 4,,1
476*8d67ca89SAndroid Build Coastguard Worker	jne	L(mm_len_3_4_bytes_backward)
477*8d67ca89SAndroid Build Coastguard Worker	movzbl	-1(%eax,%ecx), %ebx
478*8d67ca89SAndroid Build Coastguard Worker	movzbl	(%eax), %eax
479*8d67ca89SAndroid Build Coastguard Worker	movb	%bl, -1(%edx,%ecx)
480*8d67ca89SAndroid Build Coastguard Worker	movb	%al, (%edx)
481*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
482*8d67ca89SAndroid Build Coastguard Worker
483*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_3_4_bytes_backward):
484*8d67ca89SAndroid Build Coastguard Worker	movzwl	-2(%eax,%ecx), %ebx
485*8d67ca89SAndroid Build Coastguard Worker	movzwl	(%eax), %eax
486*8d67ca89SAndroid Build Coastguard Worker	movw	%bx, -2(%edx,%ecx)
487*8d67ca89SAndroid Build Coastguard Worker	movw	%ax, (%edx)
488*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_return)
489*8d67ca89SAndroid Build Coastguard Worker
490*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_9_16_bytes_backward):
491*8d67ca89SAndroid Build Coastguard Worker	PUSH (%esi)
492*8d67ca89SAndroid Build Coastguard Worker	movl	-4(%eax,%ecx), %ebx
493*8d67ca89SAndroid Build Coastguard Worker	movl	-8(%eax,%ecx), %esi
494*8d67ca89SAndroid Build Coastguard Worker	movl	%ebx, -4(%edx,%ecx)
495*8d67ca89SAndroid Build Coastguard Worker	movl	%esi, -8(%edx,%ecx)
496*8d67ca89SAndroid Build Coastguard Worker	subl	$8, %ecx
497*8d67ca89SAndroid Build Coastguard Worker	POP (%esi)
498*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_len_0_16_bytes_backward)
499*8d67ca89SAndroid Build Coastguard Worker
500*8d67ca89SAndroid Build Coastguard WorkerL(mm_len_5_8_bytes_backward):
501*8d67ca89SAndroid Build Coastguard Worker	movl	(%eax), %ebx
502*8d67ca89SAndroid Build Coastguard Worker	movl	-4(%eax,%ecx), %eax
503*8d67ca89SAndroid Build Coastguard Worker	movl	%ebx, (%edx)
504*8d67ca89SAndroid Build Coastguard Worker	movl	%eax, -4(%edx,%ecx)
505*8d67ca89SAndroid Build Coastguard Worker
506*8d67ca89SAndroid Build Coastguard WorkerL(mm_return):
507*8d67ca89SAndroid Build Coastguard Worker	movl	%edx, %eax
508*8d67ca89SAndroid Build Coastguard Worker	RETURN
509*8d67ca89SAndroid Build Coastguard Worker
510*8d67ca89SAndroid Build Coastguard WorkerL(mm_return_pop_all):
511*8d67ca89SAndroid Build Coastguard Worker	movl	%edx, %eax
512*8d67ca89SAndroid Build Coastguard Worker	POP (%edi)
513*8d67ca89SAndroid Build Coastguard Worker	POP (%esi)
514*8d67ca89SAndroid Build Coastguard Worker	RETURN
515*8d67ca89SAndroid Build Coastguard Worker
516*8d67ca89SAndroid Build Coastguard Worker/* Big length copy forward part.  */
517*8d67ca89SAndroid Build Coastguard Worker
518*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
519*8d67ca89SAndroid Build Coastguard WorkerL(mm_large_page_loop_forward):
520*8d67ca89SAndroid Build Coastguard Worker	movdqu	(%eax, %edi), %xmm0
521*8d67ca89SAndroid Build Coastguard Worker	movdqu	16(%eax, %edi), %xmm1
522*8d67ca89SAndroid Build Coastguard Worker	movdqu	32(%eax, %edi), %xmm2
523*8d67ca89SAndroid Build Coastguard Worker	movdqu	48(%eax, %edi), %xmm3
524*8d67ca89SAndroid Build Coastguard Worker	movntdq	%xmm0, (%edi)
525*8d67ca89SAndroid Build Coastguard Worker	movntdq	%xmm1, 16(%edi)
526*8d67ca89SAndroid Build Coastguard Worker	movntdq	%xmm2, 32(%edi)
527*8d67ca89SAndroid Build Coastguard Worker	movntdq	%xmm3, 48(%edi)
528*8d67ca89SAndroid Build Coastguard Worker	leal	64(%edi), %edi
529*8d67ca89SAndroid Build Coastguard Worker	cmp	%edi, %ebx
530*8d67ca89SAndroid Build Coastguard Worker	ja	L(mm_large_page_loop_forward)
531*8d67ca89SAndroid Build Coastguard Worker	sfence
532*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_copy_remaining_forward)
533*8d67ca89SAndroid Build Coastguard Worker
534*8d67ca89SAndroid Build Coastguard Worker/* Big length copy backward part.  */
535*8d67ca89SAndroid Build Coastguard Worker	.p2align 4
536*8d67ca89SAndroid Build Coastguard WorkerL(mm_large_page_loop_backward):
537*8d67ca89SAndroid Build Coastguard Worker	movdqu	-64(%edi, %esi), %xmm0
538*8d67ca89SAndroid Build Coastguard Worker	movdqu	-48(%edi, %esi), %xmm1
539*8d67ca89SAndroid Build Coastguard Worker	movdqu	-32(%edi, %esi), %xmm2
540*8d67ca89SAndroid Build Coastguard Worker	movdqu	-16(%edi, %esi), %xmm3
541*8d67ca89SAndroid Build Coastguard Worker	movntdq	%xmm0, -64(%edi)
542*8d67ca89SAndroid Build Coastguard Worker	movntdq	%xmm1, -48(%edi)
543*8d67ca89SAndroid Build Coastguard Worker	movntdq	%xmm2, -32(%edi)
544*8d67ca89SAndroid Build Coastguard Worker	movntdq	%xmm3, -16(%edi)
545*8d67ca89SAndroid Build Coastguard Worker	leal	-64(%edi), %edi
546*8d67ca89SAndroid Build Coastguard Worker	cmp	%edi, %ebx
547*8d67ca89SAndroid Build Coastguard Worker	jb	L(mm_large_page_loop_backward)
548*8d67ca89SAndroid Build Coastguard Worker	sfence
549*8d67ca89SAndroid Build Coastguard Worker	POP (%edi)
550*8d67ca89SAndroid Build Coastguard Worker	POP (%esi)
551*8d67ca89SAndroid Build Coastguard Worker	jmp	L(mm_recalc_len)
552*8d67ca89SAndroid Build Coastguard Worker
553*8d67ca89SAndroid Build Coastguard WorkerEND (MEMMOVE)
554*8d67ca89SAndroid Build Coastguard Worker
555*8d67ca89SAndroid Build Coastguard Worker// N.B., `private/bionic_asm.h` provides ALIAS_SYMBOL, but that file provides
556*8d67ca89SAndroid Build Coastguard Worker// conflicting definitions for some macros in this file. Since ALIAS_SYMBOL is
557*8d67ca89SAndroid Build Coastguard Worker// small, inline it here.
558*8d67ca89SAndroid Build Coastguard Worker.globl memcpy;
559*8d67ca89SAndroid Build Coastguard Worker.equ memcpy, MEMMOVE
560