1*412f47f9SXin Li /*
2*412f47f9SXin Li * strcpy
3*412f47f9SXin Li *
4*412f47f9SXin Li * Copyright (c) 2008-2020, Arm Limited.
5*412f47f9SXin Li * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*412f47f9SXin Li */
7*412f47f9SXin Li
8*412f47f9SXin Li #if defined (__thumb2__) && !defined (__thumb__)
9*412f47f9SXin Li
10*412f47f9SXin Li /* For GLIBC:
11*412f47f9SXin Li #include <string.h>
12*412f47f9SXin Li #include <memcopy.h>
13*412f47f9SXin Li
14*412f47f9SXin Li #undef strcmp
15*412f47f9SXin Li */
16*412f47f9SXin Li
17*412f47f9SXin Li #ifdef __thumb2__
18*412f47f9SXin Li #define magic1(REG) "#0x01010101"
19*412f47f9SXin Li #define magic2(REG) "#0x80808080"
20*412f47f9SXin Li #else
21*412f47f9SXin Li #define magic1(REG) #REG
22*412f47f9SXin Li #define magic2(REG) #REG ", lsl #7"
23*412f47f9SXin Li #endif
24*412f47f9SXin Li
25*412f47f9SXin Li char* __attribute__((naked))
__strcpy_arm(char * dst,const char * src)26*412f47f9SXin Li __strcpy_arm (char* dst, const char* src)
27*412f47f9SXin Li {
28*412f47f9SXin Li __asm__ (
29*412f47f9SXin Li "pld [r1, #0]\n\t"
30*412f47f9SXin Li "eor r2, r0, r1\n\t"
31*412f47f9SXin Li "mov ip, r0\n\t"
32*412f47f9SXin Li "tst r2, #3\n\t"
33*412f47f9SXin Li "bne 4f\n\t"
34*412f47f9SXin Li "tst r1, #3\n\t"
35*412f47f9SXin Li "bne 3f\n"
36*412f47f9SXin Li "5:\n\t"
37*412f47f9SXin Li # ifndef __thumb2__
38*412f47f9SXin Li "str r5, [sp, #-4]!\n\t"
39*412f47f9SXin Li "mov r5, #0x01\n\t"
40*412f47f9SXin Li "orr r5, r5, r5, lsl #8\n\t"
41*412f47f9SXin Li "orr r5, r5, r5, lsl #16\n\t"
42*412f47f9SXin Li # endif
43*412f47f9SXin Li
44*412f47f9SXin Li "str r4, [sp, #-4]!\n\t"
45*412f47f9SXin Li "tst r1, #4\n\t"
46*412f47f9SXin Li "ldr r3, [r1], #4\n\t"
47*412f47f9SXin Li "beq 2f\n\t"
48*412f47f9SXin Li "sub r2, r3, "magic1(r5)"\n\t"
49*412f47f9SXin Li "bics r2, r2, r3\n\t"
50*412f47f9SXin Li "tst r2, "magic2(r5)"\n\t"
51*412f47f9SXin Li "itt eq\n\t"
52*412f47f9SXin Li "streq r3, [ip], #4\n\t"
53*412f47f9SXin Li "ldreq r3, [r1], #4\n"
54*412f47f9SXin Li "bne 1f\n\t"
55*412f47f9SXin Li /* Inner loop. We now know that r1 is 64-bit aligned, so we
56*412f47f9SXin Li can safely fetch up to two words. This allows us to avoid
57*412f47f9SXin Li load stalls. */
58*412f47f9SXin Li ".p2align 2\n"
59*412f47f9SXin Li "2:\n\t"
60*412f47f9SXin Li "pld [r1, #8]\n\t"
61*412f47f9SXin Li "ldr r4, [r1], #4\n\t"
62*412f47f9SXin Li "sub r2, r3, "magic1(r5)"\n\t"
63*412f47f9SXin Li "bics r2, r2, r3\n\t"
64*412f47f9SXin Li "tst r2, "magic2(r5)"\n\t"
65*412f47f9SXin Li "sub r2, r4, "magic1(r5)"\n\t"
66*412f47f9SXin Li "bne 1f\n\t"
67*412f47f9SXin Li "str r3, [ip], #4\n\t"
68*412f47f9SXin Li "bics r2, r2, r4\n\t"
69*412f47f9SXin Li "tst r2, "magic2(r5)"\n\t"
70*412f47f9SXin Li "itt eq\n\t"
71*412f47f9SXin Li "ldreq r3, [r1], #4\n\t"
72*412f47f9SXin Li "streq r4, [ip], #4\n\t"
73*412f47f9SXin Li "beq 2b\n\t"
74*412f47f9SXin Li "mov r3, r4\n"
75*412f47f9SXin Li "1:\n\t"
76*412f47f9SXin Li # ifdef __ARMEB__
77*412f47f9SXin Li "rors r3, r3, #24\n\t"
78*412f47f9SXin Li # endif
79*412f47f9SXin Li "strb r3, [ip], #1\n\t"
80*412f47f9SXin Li "tst r3, #0xff\n\t"
81*412f47f9SXin Li # ifdef __ARMEL__
82*412f47f9SXin Li "ror r3, r3, #8\n\t"
83*412f47f9SXin Li # endif
84*412f47f9SXin Li "bne 1b\n\t"
85*412f47f9SXin Li "ldr r4, [sp], #4\n\t"
86*412f47f9SXin Li # ifndef __thumb2__
87*412f47f9SXin Li "ldr r5, [sp], #4\n\t"
88*412f47f9SXin Li # endif
89*412f47f9SXin Li "BX LR\n"
90*412f47f9SXin Li
91*412f47f9SXin Li /* Strings have the same offset from word alignment, but it's
92*412f47f9SXin Li not zero. */
93*412f47f9SXin Li "3:\n\t"
94*412f47f9SXin Li "tst r1, #1\n\t"
95*412f47f9SXin Li "beq 1f\n\t"
96*412f47f9SXin Li "ldrb r2, [r1], #1\n\t"
97*412f47f9SXin Li "strb r2, [ip], #1\n\t"
98*412f47f9SXin Li "cmp r2, #0\n\t"
99*412f47f9SXin Li "it eq\n"
100*412f47f9SXin Li "BXEQ LR\n"
101*412f47f9SXin Li "1:\n\t"
102*412f47f9SXin Li "tst r1, #2\n\t"
103*412f47f9SXin Li "beq 5b\n\t"
104*412f47f9SXin Li "ldrh r2, [r1], #2\n\t"
105*412f47f9SXin Li # ifdef __ARMEB__
106*412f47f9SXin Li "tst r2, #0xff00\n\t"
107*412f47f9SXin Li "iteet ne\n\t"
108*412f47f9SXin Li "strneh r2, [ip], #2\n\t"
109*412f47f9SXin Li "lsreq r2, r2, #8\n\t"
110*412f47f9SXin Li "streqb r2, [ip]\n\t"
111*412f47f9SXin Li "tstne r2, #0xff\n\t"
112*412f47f9SXin Li # else
113*412f47f9SXin Li "tst r2, #0xff\n\t"
114*412f47f9SXin Li "itet ne\n\t"
115*412f47f9SXin Li "strneh r2, [ip], #2\n\t"
116*412f47f9SXin Li "streqb r2, [ip]\n\t"
117*412f47f9SXin Li "tstne r2, #0xff00\n\t"
118*412f47f9SXin Li # endif
119*412f47f9SXin Li "bne 5b\n\t"
120*412f47f9SXin Li "BX LR\n"
121*412f47f9SXin Li
122*412f47f9SXin Li /* src and dst do not have a common word-alignement. Fall back to
123*412f47f9SXin Li byte copying. */
124*412f47f9SXin Li "4:\n\t"
125*412f47f9SXin Li "ldrb r2, [r1], #1\n\t"
126*412f47f9SXin Li "strb r2, [ip], #1\n\t"
127*412f47f9SXin Li "cmp r2, #0\n\t"
128*412f47f9SXin Li "bne 4b\n\t"
129*412f47f9SXin Li "BX LR");
130*412f47f9SXin Li }
131*412f47f9SXin Li /* For GLIBC: libc_hidden_builtin_def (strcpy) */
132*412f47f9SXin Li
133*412f47f9SXin Li #endif /* defined (__thumb2__) && !defined (__thumb__) */
134