xref: /aosp_15_r20/external/arm-optimized-routines/string/arm/strcpy.c (revision 412f47f9e737e10ed5cc46ec6a8d7fa2264f8a14)
1*412f47f9SXin Li /*
2*412f47f9SXin Li  * strcpy
3*412f47f9SXin Li  *
4*412f47f9SXin Li  * Copyright (c) 2008-2020, Arm Limited.
5*412f47f9SXin Li  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*412f47f9SXin Li  */
7*412f47f9SXin Li 
8*412f47f9SXin Li #if defined (__thumb2__) && !defined (__thumb__)
9*412f47f9SXin Li 
10*412f47f9SXin Li /* For GLIBC:
11*412f47f9SXin Li #include <string.h>
12*412f47f9SXin Li #include <memcopy.h>
13*412f47f9SXin Li 
14*412f47f9SXin Li #undef strcmp
15*412f47f9SXin Li */
16*412f47f9SXin Li 
17*412f47f9SXin Li #ifdef __thumb2__
18*412f47f9SXin Li #define magic1(REG) "#0x01010101"
19*412f47f9SXin Li #define magic2(REG) "#0x80808080"
20*412f47f9SXin Li #else
21*412f47f9SXin Li #define magic1(REG) #REG
22*412f47f9SXin Li #define magic2(REG) #REG ", lsl #7"
23*412f47f9SXin Li #endif
24*412f47f9SXin Li 
25*412f47f9SXin Li char* __attribute__((naked))
__strcpy_arm(char * dst,const char * src)26*412f47f9SXin Li __strcpy_arm (char* dst, const char* src)
27*412f47f9SXin Li {
28*412f47f9SXin Li   __asm__ (
29*412f47f9SXin Li        "pld	[r1, #0]\n\t"
30*412f47f9SXin Li        "eor	r2, r0, r1\n\t"
31*412f47f9SXin Li        "mov	ip, r0\n\t"
32*412f47f9SXin Li        "tst	r2, #3\n\t"
33*412f47f9SXin Li        "bne	4f\n\t"
34*412f47f9SXin Li        "tst	r1, #3\n\t"
35*412f47f9SXin Li        "bne	3f\n"
36*412f47f9SXin Li   "5:\n\t"
37*412f47f9SXin Li # ifndef __thumb2__
38*412f47f9SXin Li        "str	r5, [sp, #-4]!\n\t"
39*412f47f9SXin Li        "mov	r5, #0x01\n\t"
40*412f47f9SXin Li        "orr	r5, r5, r5, lsl #8\n\t"
41*412f47f9SXin Li        "orr	r5, r5, r5, lsl #16\n\t"
42*412f47f9SXin Li # endif
43*412f47f9SXin Li 
44*412f47f9SXin Li        "str	r4, [sp, #-4]!\n\t"
45*412f47f9SXin Li        "tst	r1, #4\n\t"
46*412f47f9SXin Li        "ldr	r3, [r1], #4\n\t"
47*412f47f9SXin Li        "beq	2f\n\t"
48*412f47f9SXin Li        "sub	r2, r3, "magic1(r5)"\n\t"
49*412f47f9SXin Li        "bics	r2, r2, r3\n\t"
50*412f47f9SXin Li        "tst	r2, "magic2(r5)"\n\t"
51*412f47f9SXin Li        "itt	eq\n\t"
52*412f47f9SXin Li        "streq	r3, [ip], #4\n\t"
53*412f47f9SXin Li        "ldreq	r3, [r1], #4\n"
54*412f47f9SXin Li        "bne	1f\n\t"
55*412f47f9SXin Li        /* Inner loop.  We now know that r1 is 64-bit aligned, so we
56*412f47f9SXin Li 	  can safely fetch up to two words.  This allows us to avoid
57*412f47f9SXin Li 	  load stalls.  */
58*412f47f9SXin Li        ".p2align 2\n"
59*412f47f9SXin Li   "2:\n\t"
60*412f47f9SXin Li        "pld	[r1, #8]\n\t"
61*412f47f9SXin Li        "ldr	r4, [r1], #4\n\t"
62*412f47f9SXin Li        "sub	r2, r3, "magic1(r5)"\n\t"
63*412f47f9SXin Li        "bics	r2, r2, r3\n\t"
64*412f47f9SXin Li        "tst	r2, "magic2(r5)"\n\t"
65*412f47f9SXin Li        "sub	r2, r4, "magic1(r5)"\n\t"
66*412f47f9SXin Li        "bne	1f\n\t"
67*412f47f9SXin Li        "str	r3, [ip], #4\n\t"
68*412f47f9SXin Li        "bics	r2, r2, r4\n\t"
69*412f47f9SXin Li        "tst	r2, "magic2(r5)"\n\t"
70*412f47f9SXin Li        "itt	eq\n\t"
71*412f47f9SXin Li        "ldreq	r3, [r1], #4\n\t"
72*412f47f9SXin Li        "streq	r4, [ip], #4\n\t"
73*412f47f9SXin Li        "beq	2b\n\t"
74*412f47f9SXin Li        "mov	r3, r4\n"
75*412f47f9SXin Li   "1:\n\t"
76*412f47f9SXin Li # ifdef __ARMEB__
77*412f47f9SXin Li        "rors	r3, r3, #24\n\t"
78*412f47f9SXin Li # endif
79*412f47f9SXin Li        "strb	r3, [ip], #1\n\t"
80*412f47f9SXin Li        "tst	r3, #0xff\n\t"
81*412f47f9SXin Li # ifdef __ARMEL__
82*412f47f9SXin Li        "ror	r3, r3, #8\n\t"
83*412f47f9SXin Li # endif
84*412f47f9SXin Li        "bne	1b\n\t"
85*412f47f9SXin Li        "ldr	r4, [sp], #4\n\t"
86*412f47f9SXin Li # ifndef __thumb2__
87*412f47f9SXin Li        "ldr	r5, [sp], #4\n\t"
88*412f47f9SXin Li # endif
89*412f47f9SXin Li        "BX LR\n"
90*412f47f9SXin Li 
91*412f47f9SXin Li        /* Strings have the same offset from word alignment, but it's
92*412f47f9SXin Li 	  not zero.  */
93*412f47f9SXin Li   "3:\n\t"
94*412f47f9SXin Li        "tst	r1, #1\n\t"
95*412f47f9SXin Li        "beq	1f\n\t"
96*412f47f9SXin Li        "ldrb	r2, [r1], #1\n\t"
97*412f47f9SXin Li        "strb	r2, [ip], #1\n\t"
98*412f47f9SXin Li        "cmp	r2, #0\n\t"
99*412f47f9SXin Li        "it	eq\n"
100*412f47f9SXin Li        "BXEQ LR\n"
101*412f47f9SXin Li   "1:\n\t"
102*412f47f9SXin Li        "tst	r1, #2\n\t"
103*412f47f9SXin Li        "beq	5b\n\t"
104*412f47f9SXin Li        "ldrh	r2, [r1], #2\n\t"
105*412f47f9SXin Li # ifdef __ARMEB__
106*412f47f9SXin Li        "tst	r2, #0xff00\n\t"
107*412f47f9SXin Li        "iteet	ne\n\t"
108*412f47f9SXin Li        "strneh	r2, [ip], #2\n\t"
109*412f47f9SXin Li        "lsreq	r2, r2, #8\n\t"
110*412f47f9SXin Li        "streqb	r2, [ip]\n\t"
111*412f47f9SXin Li        "tstne	r2, #0xff\n\t"
112*412f47f9SXin Li # else
113*412f47f9SXin Li        "tst	r2, #0xff\n\t"
114*412f47f9SXin Li        "itet	ne\n\t"
115*412f47f9SXin Li        "strneh	r2, [ip], #2\n\t"
116*412f47f9SXin Li        "streqb	r2, [ip]\n\t"
117*412f47f9SXin Li        "tstne	r2, #0xff00\n\t"
118*412f47f9SXin Li # endif
119*412f47f9SXin Li        "bne	5b\n\t"
120*412f47f9SXin Li        "BX LR\n"
121*412f47f9SXin Li 
122*412f47f9SXin Li        /* src and dst do not have a common word-alignement.  Fall back to
123*412f47f9SXin Li 	  byte copying.  */
124*412f47f9SXin Li   "4:\n\t"
125*412f47f9SXin Li        "ldrb	r2, [r1], #1\n\t"
126*412f47f9SXin Li        "strb	r2, [ip], #1\n\t"
127*412f47f9SXin Li        "cmp	r2, #0\n\t"
128*412f47f9SXin Li        "bne	4b\n\t"
129*412f47f9SXin Li        "BX LR");
130*412f47f9SXin Li }
131*412f47f9SXin Li /* For GLIBC: libc_hidden_builtin_def (strcpy) */
132*412f47f9SXin Li 
133*412f47f9SXin Li #endif /* defined (__thumb2__) && !defined (__thumb__)  */
134