1*412f47f9SXin Li/* 2*412f47f9SXin Li * strcmp for ARMv7 3*412f47f9SXin Li * 4*412f47f9SXin Li * Copyright (c) 2012-2022, Arm Limited. 5*412f47f9SXin Li * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6*412f47f9SXin Li */ 7*412f47f9SXin Li 8*412f47f9SXin Li#if __ARM_ARCH >= 7 && __ARM_ARCH_ISA_ARM >= 1 9*412f47f9SXin Li 10*412f47f9SXin Li/* Implementation of strcmp for ARMv7 when DSP instructions are 11*412f47f9SXin Li available. Use ldrd to support wider loads, provided the data 12*412f47f9SXin Li is sufficiently aligned. Use saturating arithmetic to optimize 13*412f47f9SXin Li the compares. */ 14*412f47f9SXin Li 15*412f47f9SXin Li#include "asmdefs.h" 16*412f47f9SXin Li 17*412f47f9SXin Li/* Build Options: 18*412f47f9SXin Li STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first 19*412f47f9SXin Li byte in the string. If comparing completely random strings 20*412f47f9SXin Li the pre-check will save time, since there is a very high 21*412f47f9SXin Li probability of a mismatch in the first character: we save 22*412f47f9SXin Li significant overhead if this is the common case. However, 23*412f47f9SXin Li if strings are likely to be identical (eg because we're 24*412f47f9SXin Li verifying a hit in a hash table), then this check is largely 25*412f47f9SXin Li redundant. */ 26*412f47f9SXin Li 27*412f47f9SXin Li#define STRCMP_NO_PRECHECK 0 28*412f47f9SXin Li 29*412f47f9SXin Li/* Ensure the .cantunwind directive is prepended to .fnend. 30*412f47f9SXin Li Leaf functions cannot throw exceptions - EHABI only supports 31*412f47f9SXin Li synchronous exceptions. */ 32*412f47f9SXin Li#define IS_LEAF 33*412f47f9SXin Li 34*412f47f9SXin Li /* This version uses Thumb-2 code. */ 35*412f47f9SXin Li .thumb 36*412f47f9SXin Li .syntax unified 37*412f47f9SXin Li 38*412f47f9SXin Li#ifdef __ARM_BIG_ENDIAN 39*412f47f9SXin Li#define S2LO lsl 40*412f47f9SXin Li#define S2LOEQ lsleq 41*412f47f9SXin Li#define S2HI lsr 42*412f47f9SXin Li#define MSB 0x000000ff 43*412f47f9SXin Li#define LSB 0xff000000 44*412f47f9SXin Li#define BYTE0_OFFSET 24 45*412f47f9SXin Li#define BYTE1_OFFSET 16 46*412f47f9SXin Li#define BYTE2_OFFSET 8 47*412f47f9SXin Li#define BYTE3_OFFSET 0 48*412f47f9SXin Li#else /* not __ARM_BIG_ENDIAN */ 49*412f47f9SXin Li#define S2LO lsr 50*412f47f9SXin Li#define S2LOEQ lsreq 51*412f47f9SXin Li#define S2HI lsl 52*412f47f9SXin Li#define BYTE0_OFFSET 0 53*412f47f9SXin Li#define BYTE1_OFFSET 8 54*412f47f9SXin Li#define BYTE2_OFFSET 16 55*412f47f9SXin Li#define BYTE3_OFFSET 24 56*412f47f9SXin Li#define MSB 0xff000000 57*412f47f9SXin Li#define LSB 0x000000ff 58*412f47f9SXin Li#endif /* not __ARM_BIG_ENDIAN */ 59*412f47f9SXin Li 60*412f47f9SXin Li/* Parameters and result. */ 61*412f47f9SXin Li#define src1 r0 62*412f47f9SXin Li#define src2 r1 63*412f47f9SXin Li#define result r0 /* Overlaps src1. */ 64*412f47f9SXin Li 65*412f47f9SXin Li/* Internal variables. */ 66*412f47f9SXin Li#define tmp1 r4 67*412f47f9SXin Li#define tmp2 r5 68*412f47f9SXin Li#define const_m1 r12 69*412f47f9SXin Li 70*412f47f9SXin Li/* Additional internal variables for 64-bit aligned data. */ 71*412f47f9SXin Li#define data1a r2 72*412f47f9SXin Li#define data1b r3 73*412f47f9SXin Li#define data2a r6 74*412f47f9SXin Li#define data2b r7 75*412f47f9SXin Li#define syndrome_a tmp1 76*412f47f9SXin Li#define syndrome_b tmp2 77*412f47f9SXin Li 78*412f47f9SXin Li/* Additional internal variables for 32-bit aligned data. */ 79*412f47f9SXin Li#define data1 r2 80*412f47f9SXin Li#define data2 r3 81*412f47f9SXin Li#define syndrome tmp2 82*412f47f9SXin Li 83*412f47f9SXin Li 84*412f47f9SXin Li /* Macro to compute and return the result value for word-aligned 85*412f47f9SXin Li cases. */ 86*412f47f9SXin Li .macro strcmp_epilogue_aligned synd d1 d2 restore_r6 87*412f47f9SXin Li#ifdef __ARM_BIG_ENDIAN 88*412f47f9SXin Li /* If data1 contains a zero byte, then syndrome will contain a 1 in 89*412f47f9SXin Li bit 7 of that byte. Otherwise, the highest set bit in the 90*412f47f9SXin Li syndrome will highlight the first different bit. It is therefore 91*412f47f9SXin Li sufficient to extract the eight bits starting with the syndrome 92*412f47f9SXin Li bit. */ 93*412f47f9SXin Li clz tmp1, \synd 94*412f47f9SXin Li lsl r1, \d2, tmp1 95*412f47f9SXin Li .if \restore_r6 96*412f47f9SXin Li ldrd r6, r7, [sp, #8] 97*412f47f9SXin Li .endif 98*412f47f9SXin Li .cfi_restore 6 99*412f47f9SXin Li .cfi_restore 7 100*412f47f9SXin Li lsl \d1, \d1, tmp1 101*412f47f9SXin Li .cfi_remember_state 102*412f47f9SXin Li lsr result, \d1, #24 103*412f47f9SXin Li ldrd r4, r5, [sp], #16 104*412f47f9SXin Li .cfi_restore 4 105*412f47f9SXin Li .cfi_restore 5 106*412f47f9SXin Li .cfi_adjust_cfa_offset -16 107*412f47f9SXin Li sub result, result, r1, lsr #24 108*412f47f9SXin Li epilogue push_ip=HAVE_PAC_LEAF 109*412f47f9SXin Li#else 110*412f47f9SXin Li /* To use the big-endian trick we'd have to reverse all three words. 111*412f47f9SXin Li that's slower than this approach. */ 112*412f47f9SXin Li rev \synd, \synd 113*412f47f9SXin Li clz tmp1, \synd 114*412f47f9SXin Li bic tmp1, tmp1, #7 115*412f47f9SXin Li lsr r1, \d2, tmp1 116*412f47f9SXin Li .cfi_remember_state 117*412f47f9SXin Li .if \restore_r6 118*412f47f9SXin Li ldrd r6, r7, [sp, #8] 119*412f47f9SXin Li .endif 120*412f47f9SXin Li .cfi_restore 6 121*412f47f9SXin Li .cfi_restore 7 122*412f47f9SXin Li lsr \d1, \d1, tmp1 123*412f47f9SXin Li and result, \d1, #255 124*412f47f9SXin Li and r1, r1, #255 125*412f47f9SXin Li ldrd r4, r5, [sp], #16 126*412f47f9SXin Li .cfi_restore 4 127*412f47f9SXin Li .cfi_restore 5 128*412f47f9SXin Li .cfi_adjust_cfa_offset -16 129*412f47f9SXin Li sub result, result, r1 130*412f47f9SXin Li 131*412f47f9SXin Li epilogue push_ip=HAVE_PAC_LEAF 132*412f47f9SXin Li#endif 133*412f47f9SXin Li .endm 134*412f47f9SXin Li 135*412f47f9SXin LiENTRY(__strcmp_arm) 136*412f47f9SXin Li prologue push_ip=HAVE_PAC_LEAF 137*412f47f9SXin Li#if STRCMP_NO_PRECHECK == 0 138*412f47f9SXin Li ldrb r2, [src1] 139*412f47f9SXin Li ldrb r3, [src2] 140*412f47f9SXin Li cmp r2, #1 141*412f47f9SXin Li it cs 142*412f47f9SXin Li cmpcs r2, r3 143*412f47f9SXin Li bne L(fastpath_exit) 144*412f47f9SXin Li#endif 145*412f47f9SXin Li strd r4, r5, [sp, #-16]! 146*412f47f9SXin Li .cfi_adjust_cfa_offset 16 147*412f47f9SXin Li .cfi_rel_offset 4, 0 148*412f47f9SXin Li .cfi_rel_offset 5, 4 149*412f47f9SXin Li orr tmp1, src1, src2 150*412f47f9SXin Li strd r6, r7, [sp, #8] 151*412f47f9SXin Li .cfi_rel_offset 6, 8 152*412f47f9SXin Li .cfi_rel_offset 7, 12 153*412f47f9SXin Li mvn const_m1, #0 154*412f47f9SXin Li lsl r2, tmp1, #29 155*412f47f9SXin Li cbz r2, L(loop_aligned8) 156*412f47f9SXin Li 157*412f47f9SXin LiL(not_aligned): 158*412f47f9SXin Li eor tmp1, src1, src2 159*412f47f9SXin Li tst tmp1, #7 160*412f47f9SXin Li bne L(misaligned8) 161*412f47f9SXin Li 162*412f47f9SXin Li /* Deal with mutual misalignment by aligning downwards and then 163*412f47f9SXin Li masking off the unwanted loaded data to prevent a difference. */ 164*412f47f9SXin Li and tmp1, src1, #7 165*412f47f9SXin Li bic src1, src1, #7 166*412f47f9SXin Li and tmp2, tmp1, #3 167*412f47f9SXin Li bic src2, src2, #7 168*412f47f9SXin Li lsl tmp2, tmp2, #3 /* Bytes -> bits. */ 169*412f47f9SXin Li ldrd data1a, data1b, [src1], #16 170*412f47f9SXin Li tst tmp1, #4 171*412f47f9SXin Li ldrd data2a, data2b, [src2], #16 172*412f47f9SXin Li /* In thumb code we can't use MVN with a register shift, but 173*412f47f9SXin Li we do have ORN. */ 174*412f47f9SXin Li S2HI tmp1, const_m1, tmp2 175*412f47f9SXin Li orn data1a, data1a, tmp1 176*412f47f9SXin Li orn data2a, data2a, tmp1 177*412f47f9SXin Li beq L(start_realigned8) 178*412f47f9SXin Li orn data1b, data1b, tmp1 179*412f47f9SXin Li mov data1a, const_m1 180*412f47f9SXin Li orn data2b, data2b, tmp1 181*412f47f9SXin Li mov data2a, const_m1 182*412f47f9SXin Li b L(start_realigned8) 183*412f47f9SXin Li 184*412f47f9SXin Li /* Unwind the inner loop by a factor of 2, giving 16 bytes per 185*412f47f9SXin Li pass. */ 186*412f47f9SXin Li .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */ 187*412f47f9SXin Li .p2align 2 /* Always word aligned. */ 188*412f47f9SXin LiL(loop_aligned8): 189*412f47f9SXin Li ldrd data1a, data1b, [src1], #16 190*412f47f9SXin Li ldrd data2a, data2b, [src2], #16 191*412f47f9SXin LiL(start_realigned8): 192*412f47f9SXin Li uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ 193*412f47f9SXin Li eor syndrome_a, data1a, data2a 194*412f47f9SXin Li sel syndrome_a, syndrome_a, const_m1 195*412f47f9SXin Li cbnz syndrome_a, L(diff_in_a) 196*412f47f9SXin Li uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ 197*412f47f9SXin Li eor syndrome_b, data1b, data2b 198*412f47f9SXin Li sel syndrome_b, syndrome_b, const_m1 199*412f47f9SXin Li cbnz syndrome_b, L(diff_in_b) 200*412f47f9SXin Li 201*412f47f9SXin Li ldrd data1a, data1b, [src1, #-8] 202*412f47f9SXin Li ldrd data2a, data2b, [src2, #-8] 203*412f47f9SXin Li uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ 204*412f47f9SXin Li eor syndrome_a, data1a, data2a 205*412f47f9SXin Li sel syndrome_a, syndrome_a, const_m1 206*412f47f9SXin Li uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ 207*412f47f9SXin Li eor syndrome_b, data1b, data2b 208*412f47f9SXin Li sel syndrome_b, syndrome_b, const_m1 209*412f47f9SXin Li /* Can't use CBZ for backwards branch. */ 210*412f47f9SXin Li orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */ 211*412f47f9SXin Li beq L(loop_aligned8) 212*412f47f9SXin Li 213*412f47f9SXin LiL(diff_found): 214*412f47f9SXin Li cbnz syndrome_a, L(diff_in_a) 215*412f47f9SXin Li 216*412f47f9SXin LiL(diff_in_b): 217*412f47f9SXin Li strcmp_epilogue_aligned syndrome_b, data1b, data2b 1 218*412f47f9SXin Li 219*412f47f9SXin LiL(diff_in_a): 220*412f47f9SXin Li .cfi_restore_state 221*412f47f9SXin Li strcmp_epilogue_aligned syndrome_a, data1a, data2a 1 222*412f47f9SXin Li 223*412f47f9SXin Li .cfi_restore_state 224*412f47f9SXin LiL(misaligned8): 225*412f47f9SXin Li tst tmp1, #3 226*412f47f9SXin Li bne L(misaligned4) 227*412f47f9SXin Li ands tmp1, src1, #3 228*412f47f9SXin Li bne L(mutual_align4) 229*412f47f9SXin Li 230*412f47f9SXin Li /* Unrolled by a factor of 2, to reduce the number of post-increment 231*412f47f9SXin Li operations. */ 232*412f47f9SXin LiL(loop_aligned4): 233*412f47f9SXin Li ldr data1, [src1], #8 234*412f47f9SXin Li ldr data2, [src2], #8 235*412f47f9SXin LiL(start_realigned4): 236*412f47f9SXin Li uadd8 syndrome, data1, const_m1 /* Only need GE bits. */ 237*412f47f9SXin Li eor syndrome, data1, data2 238*412f47f9SXin Li sel syndrome, syndrome, const_m1 239*412f47f9SXin Li cbnz syndrome, L(aligned4_done) 240*412f47f9SXin Li ldr data1, [src1, #-4] 241*412f47f9SXin Li ldr data2, [src2, #-4] 242*412f47f9SXin Li uadd8 syndrome, data1, const_m1 243*412f47f9SXin Li eor syndrome, data1, data2 244*412f47f9SXin Li sel syndrome, syndrome, const_m1 245*412f47f9SXin Li cmp syndrome, #0 246*412f47f9SXin Li beq L(loop_aligned4) 247*412f47f9SXin Li 248*412f47f9SXin LiL(aligned4_done): 249*412f47f9SXin Li strcmp_epilogue_aligned syndrome, data1, data2, 0 250*412f47f9SXin Li 251*412f47f9SXin LiL(mutual_align4): 252*412f47f9SXin Li .cfi_restore_state 253*412f47f9SXin Li /* Deal with mutual misalignment by aligning downwards and then 254*412f47f9SXin Li masking off the unwanted loaded data to prevent a difference. */ 255*412f47f9SXin Li lsl tmp1, tmp1, #3 /* Bytes -> bits. */ 256*412f47f9SXin Li bic src1, src1, #3 257*412f47f9SXin Li ldr data1, [src1], #8 258*412f47f9SXin Li bic src2, src2, #3 259*412f47f9SXin Li ldr data2, [src2], #8 260*412f47f9SXin Li 261*412f47f9SXin Li /* In thumb code we can't use MVN with a register shift, but 262*412f47f9SXin Li we do have ORN. */ 263*412f47f9SXin Li S2HI tmp1, const_m1, tmp1 264*412f47f9SXin Li orn data1, data1, tmp1 265*412f47f9SXin Li orn data2, data2, tmp1 266*412f47f9SXin Li b L(start_realigned4) 267*412f47f9SXin Li 268*412f47f9SXin LiL(misaligned4): 269*412f47f9SXin Li ands tmp1, src1, #3 270*412f47f9SXin Li beq L(src1_aligned) 271*412f47f9SXin Li sub src2, src2, tmp1 272*412f47f9SXin Li bic src1, src1, #3 273*412f47f9SXin Li lsls tmp1, tmp1, #31 274*412f47f9SXin Li ldr data1, [src1], #4 275*412f47f9SXin Li beq L(aligned_m2) 276*412f47f9SXin Li bcs L(aligned_m1) 277*412f47f9SXin Li 278*412f47f9SXin Li#if STRCMP_NO_PRECHECK == 1 279*412f47f9SXin Li ldrb data2, [src2, #1] 280*412f47f9SXin Li uxtb tmp1, data1, ror #BYTE1_OFFSET 281*412f47f9SXin Li subs tmp1, tmp1, data2 282*412f47f9SXin Li bne L(misaligned_exit) 283*412f47f9SXin Li cbz data2, L(misaligned_exit) 284*412f47f9SXin Li 285*412f47f9SXin LiL(aligned_m2): 286*412f47f9SXin Li ldrb data2, [src2, #2] 287*412f47f9SXin Li uxtb tmp1, data1, ror #BYTE2_OFFSET 288*412f47f9SXin Li subs tmp1, tmp1, data2 289*412f47f9SXin Li bne L(misaligned_exit) 290*412f47f9SXin Li cbz data2, L(misaligned_exit) 291*412f47f9SXin Li 292*412f47f9SXin LiL(aligned_m1): 293*412f47f9SXin Li ldrb data2, [src2, #3] 294*412f47f9SXin Li uxtb tmp1, data1, ror #BYTE3_OFFSET 295*412f47f9SXin Li subs tmp1, tmp1, data2 296*412f47f9SXin Li bne L(misaligned_exit) 297*412f47f9SXin Li add src2, src2, #4 298*412f47f9SXin Li cbnz data2, L(src1_aligned) 299*412f47f9SXin Li#else /* STRCMP_NO_PRECHECK */ 300*412f47f9SXin Li /* If we've done the pre-check, then we don't need to check the 301*412f47f9SXin Li first byte again here. */ 302*412f47f9SXin Li ldrb data2, [src2, #2] 303*412f47f9SXin Li uxtb tmp1, data1, ror #BYTE2_OFFSET 304*412f47f9SXin Li subs tmp1, tmp1, data2 305*412f47f9SXin Li bne L(misaligned_exit) 306*412f47f9SXin Li cbz data2, L(misaligned_exit) 307*412f47f9SXin Li 308*412f47f9SXin LiL(aligned_m2): 309*412f47f9SXin Li ldrb data2, [src2, #3] 310*412f47f9SXin Li uxtb tmp1, data1, ror #BYTE3_OFFSET 311*412f47f9SXin Li subs tmp1, tmp1, data2 312*412f47f9SXin Li bne L(misaligned_exit) 313*412f47f9SXin Li cbnz data2, L(aligned_m1) 314*412f47f9SXin Li#endif 315*412f47f9SXin Li 316*412f47f9SXin LiL(misaligned_exit): 317*412f47f9SXin Li .cfi_remember_state 318*412f47f9SXin Li mov result, tmp1 319*412f47f9SXin Li ldr r4, [sp], #16 320*412f47f9SXin Li .cfi_restore 4 321*412f47f9SXin Li .cfi_adjust_cfa_offset -16 322*412f47f9SXin Li epilogue push_ip=HAVE_PAC_LEAF 323*412f47f9SXin Li 324*412f47f9SXin Li#if STRCMP_NO_PRECHECK == 0 325*412f47f9SXin LiL(fastpath_exit): 326*412f47f9SXin Li .cfi_restore_state 327*412f47f9SXin Li .cfi_remember_state 328*412f47f9SXin Li sub r0, r2, r3 329*412f47f9SXin Li epilogue push_ip=HAVE_PAC_LEAF 330*412f47f9SXin Li 331*412f47f9SXin LiL(aligned_m1): 332*412f47f9SXin Li .cfi_restore_state 333*412f47f9SXin Li .cfi_remember_state 334*412f47f9SXin Li add src2, src2, #4 335*412f47f9SXin Li#endif 336*412f47f9SXin LiL(src1_aligned): 337*412f47f9SXin Li .cfi_restore_state 338*412f47f9SXin Li /* src1 is word aligned, but src2 has no common alignment 339*412f47f9SXin Li with it. */ 340*412f47f9SXin Li ldr data1, [src1], #4 341*412f47f9SXin Li lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */ 342*412f47f9SXin Li 343*412f47f9SXin Li bic src2, src2, #3 344*412f47f9SXin Li ldr data2, [src2], #4 345*412f47f9SXin Li bhi L(overlap1) /* C=1, Z=0 => src2[1:0] = 0b11. */ 346*412f47f9SXin Li bcs L(overlap2) /* C=1, Z=1 => src2[1:0] = 0b10. */ 347*412f47f9SXin Li 348*412f47f9SXin Li /* (overlap3) C=0, Z=0 => src2[1:0] = 0b01. */ 349*412f47f9SXin LiL(overlap3): 350*412f47f9SXin Li bic tmp1, data1, #MSB 351*412f47f9SXin Li uadd8 syndrome, data1, const_m1 352*412f47f9SXin Li eors syndrome, tmp1, data2, S2LO #8 353*412f47f9SXin Li sel syndrome, syndrome, const_m1 354*412f47f9SXin Li bne 4f 355*412f47f9SXin Li cbnz syndrome, 5f 356*412f47f9SXin Li ldr data2, [src2], #4 357*412f47f9SXin Li eor tmp1, tmp1, data1 358*412f47f9SXin Li cmp tmp1, data2, S2HI #24 359*412f47f9SXin Li bne 6f 360*412f47f9SXin Li ldr data1, [src1], #4 361*412f47f9SXin Li b L(overlap3) 362*412f47f9SXin Li4: 363*412f47f9SXin Li S2LO data2, data2, #8 364*412f47f9SXin Li b L(strcmp_tail) 365*412f47f9SXin Li 366*412f47f9SXin Li5: 367*412f47f9SXin Li bics syndrome, syndrome, #MSB 368*412f47f9SXin Li bne L(strcmp_done_equal) 369*412f47f9SXin Li 370*412f47f9SXin Li /* We can only get here if the MSB of data1 contains 0, so 371*412f47f9SXin Li fast-path the exit. */ 372*412f47f9SXin Li ldrb result, [src2] 373*412f47f9SXin Li .cfi_remember_state 374*412f47f9SXin Li ldrd r4, r5, [sp], #16 375*412f47f9SXin Li .cfi_restore 4 376*412f47f9SXin Li .cfi_restore 5 377*412f47f9SXin Li /* R6/7 Not used in this sequence. */ 378*412f47f9SXin Li .cfi_restore 6 379*412f47f9SXin Li .cfi_restore 7 380*412f47f9SXin Li .cfi_adjust_cfa_offset -16 381*412f47f9SXin Li neg result, result 382*412f47f9SXin Li epilogue push_ip=HAVE_PAC_LEAF 383*412f47f9SXin Li6: 384*412f47f9SXin Li .cfi_restore_state 385*412f47f9SXin Li S2LO data1, data1, #24 386*412f47f9SXin Li and data2, data2, #LSB 387*412f47f9SXin Li b L(strcmp_tail) 388*412f47f9SXin Li 389*412f47f9SXin Li .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ 390*412f47f9SXin LiL(overlap2): 391*412f47f9SXin Li and tmp1, data1, const_m1, S2LO #16 392*412f47f9SXin Li uadd8 syndrome, data1, const_m1 393*412f47f9SXin Li eors syndrome, tmp1, data2, S2LO #16 394*412f47f9SXin Li sel syndrome, syndrome, const_m1 395*412f47f9SXin Li bne 4f 396*412f47f9SXin Li cbnz syndrome, 5f 397*412f47f9SXin Li ldr data2, [src2], #4 398*412f47f9SXin Li eor tmp1, tmp1, data1 399*412f47f9SXin Li cmp tmp1, data2, S2HI #16 400*412f47f9SXin Li bne 6f 401*412f47f9SXin Li ldr data1, [src1], #4 402*412f47f9SXin Li b L(overlap2) 403*412f47f9SXin Li4: 404*412f47f9SXin Li S2LO data2, data2, #16 405*412f47f9SXin Li b L(strcmp_tail) 406*412f47f9SXin Li5: 407*412f47f9SXin Li ands syndrome, syndrome, const_m1, S2LO #16 408*412f47f9SXin Li bne L(strcmp_done_equal) 409*412f47f9SXin Li 410*412f47f9SXin Li ldrh data2, [src2] 411*412f47f9SXin Li S2LO data1, data1, #16 412*412f47f9SXin Li#ifdef __ARM_BIG_ENDIAN 413*412f47f9SXin Li lsl data2, data2, #16 414*412f47f9SXin Li#endif 415*412f47f9SXin Li b L(strcmp_tail) 416*412f47f9SXin Li 417*412f47f9SXin Li6: 418*412f47f9SXin Li S2LO data1, data1, #16 419*412f47f9SXin Li and data2, data2, const_m1, S2LO #16 420*412f47f9SXin Li b L(strcmp_tail) 421*412f47f9SXin Li 422*412f47f9SXin Li .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ 423*412f47f9SXin LiL(overlap1): 424*412f47f9SXin Li and tmp1, data1, #LSB 425*412f47f9SXin Li uadd8 syndrome, data1, const_m1 426*412f47f9SXin Li eors syndrome, tmp1, data2, S2LO #24 427*412f47f9SXin Li sel syndrome, syndrome, const_m1 428*412f47f9SXin Li bne 4f 429*412f47f9SXin Li cbnz syndrome, 5f 430*412f47f9SXin Li ldr data2, [src2], #4 431*412f47f9SXin Li eor tmp1, tmp1, data1 432*412f47f9SXin Li cmp tmp1, data2, S2HI #8 433*412f47f9SXin Li bne 6f 434*412f47f9SXin Li ldr data1, [src1], #4 435*412f47f9SXin Li b L(overlap1) 436*412f47f9SXin Li4: 437*412f47f9SXin Li S2LO data2, data2, #24 438*412f47f9SXin Li b L(strcmp_tail) 439*412f47f9SXin Li5: 440*412f47f9SXin Li tst syndrome, #LSB 441*412f47f9SXin Li bne L(strcmp_done_equal) 442*412f47f9SXin Li ldr data2, [src2] 443*412f47f9SXin Li6: 444*412f47f9SXin Li S2LO data1, data1, #8 445*412f47f9SXin Li bic data2, data2, #MSB 446*412f47f9SXin Li b L(strcmp_tail) 447*412f47f9SXin Li 448*412f47f9SXin LiL(strcmp_done_equal): 449*412f47f9SXin Li mov result, #0 450*412f47f9SXin Li .cfi_remember_state 451*412f47f9SXin Li ldrd r4, r5, [sp], #16 452*412f47f9SXin Li .cfi_restore 4 453*412f47f9SXin Li .cfi_restore 5 454*412f47f9SXin Li /* R6/7 not used in this sequence. */ 455*412f47f9SXin Li .cfi_restore 6 456*412f47f9SXin Li .cfi_restore 7 457*412f47f9SXin Li .cfi_adjust_cfa_offset -16 458*412f47f9SXin Li epilogue push_ip=HAVE_PAC_LEAF 459*412f47f9SXin Li 460*412f47f9SXin LiL(strcmp_tail): 461*412f47f9SXin Li .cfi_restore_state 462*412f47f9SXin Li#ifndef __ARM_BIG_ENDIAN 463*412f47f9SXin Li rev data1, data1 464*412f47f9SXin Li rev data2, data2 465*412f47f9SXin Li /* Now everything looks big-endian... */ 466*412f47f9SXin Li#endif 467*412f47f9SXin Li uadd8 tmp1, data1, const_m1 468*412f47f9SXin Li eor tmp1, data1, data2 469*412f47f9SXin Li sel syndrome, tmp1, const_m1 470*412f47f9SXin Li clz tmp1, syndrome 471*412f47f9SXin Li lsl data1, data1, tmp1 472*412f47f9SXin Li lsl data2, data2, tmp1 473*412f47f9SXin Li lsr result, data1, #24 474*412f47f9SXin Li ldrd r4, r5, [sp], #16 475*412f47f9SXin Li .cfi_restore 4 476*412f47f9SXin Li .cfi_restore 5 477*412f47f9SXin Li /* R6/7 not used in this sequence. */ 478*412f47f9SXin Li .cfi_restore 6 479*412f47f9SXin Li .cfi_restore 7 480*412f47f9SXin Li .cfi_adjust_cfa_offset -16 481*412f47f9SXin Li sub result, result, data2, lsr #24 482*412f47f9SXin Li epilogue push_ip=HAVE_PAC_LEAF 483*412f47f9SXin Li 484*412f47f9SXin LiEND (__strcmp_arm) 485*412f47f9SXin Li 486*412f47f9SXin Li#endif /* __ARM_ARCH >= 7 && __ARM_ARCH_ISA_ARM >= 1 */ 487