1/* SPDX-License-Identifier: GPL-2.0-only */ 2 3/* This code originates from Linux 5.19 */ 4 5/* 6 * Implement memmove(). This can handle overlap between src and dst. 7 * 8 * Input: 9 * rdi: dest 10 * rsi: src 11 * rdx: count 12 * 13 * Output: 14 * rax: dest 15 */ 16.global memmove 17memmove: 18 19 mov %rdi, %rax 20 21 /* Decide forward/backward copy mode */ 22 cmp %rdi, %rsi 23 jge .Lmemmove_begin_forward 24 mov %rsi, %r8 25 add %rdx, %r8 26 cmp %rdi, %r8 27 jg 2f 28 29 /* Don't optimize for FSRM and ERMS like Linux */ 30.Lmemmove_begin_forward: 31 cmp $0x20, %rdx 32 jb 1f 33 34 /* 35 * movsq instruction have many startup latency 36 * so we handle small size by general register. 37 */ 38 cmp $680, %rdx 39 jb 3f 40 /* 41 * movsq instruction is only good for aligned case. 42 */ 43 44 cmpb %dil, %sil 45 je 4f 463: 47 sub $0x20, %rdx 48 /* 49 * We gobble 32 bytes forward in each loop. 50 */ 515: 52 sub $0x20, %rdx 53 movq 0*8(%rsi), %r11 54 movq 1*8(%rsi), %r10 55 movq 2*8(%rsi), %r9 56 movq 3*8(%rsi), %r8 57 leaq 4*8(%rsi), %rsi 58 59 movq %r11, 0*8(%rdi) 60 movq %r10, 1*8(%rdi) 61 movq %r9, 2*8(%rdi) 62 movq %r8, 3*8(%rdi) 63 leaq 4*8(%rdi), %rdi 64 jae 5b 65 addq $0x20, %rdx 66 jmp 1f 67 /* 68 * Handle data forward by movsq. 69 */ 70 .p2align 4 714: 72 movq %rdx, %rcx 73 movq -8(%rsi, %rdx), %r11 74 lea -8(%rdi, %rdx), %r10 75 shrq $3, %rcx 76 rep movsq 77 movq %r11, (%r10) 78 jmp 13f 79.Lmemmove_end_forward: 80 81 /* 82 * Handle data backward by movsq. 83 */ 84 .p2align 4 857: 86 movq %rdx, %rcx 87 movq (%rsi), %r11 88 movq %rdi, %r10 89 leaq -8(%rsi, %rdx), %rsi 90 leaq -8(%rdi, %rdx), %rdi 91 shrq $3, %rcx 92 std 93 rep movsq 94 cld 95 movq %r11, (%r10) 96 jmp 13f 97 98 /* 99 * Start to prepare for backward copy. 100 */ 101 .p2align 4 1022: 103 cmp $0x20, %rdx 104 jb 1f 105 cmp $680, %rdx 106 jb 6f 107 cmp %dil, %sil 108 je 7b 1096: 110 /* 111 * Calculate copy position to tail. 112 */ 113 addq %rdx, %rsi 114 addq %rdx, %rdi 115 subq $0x20, %rdx 116 /* 117 * We gobble 32 bytes backward in each loop. 118 */ 1198: 120 subq $0x20, %rdx 121 movq -1*8(%rsi), %r11 122 movq -2*8(%rsi), %r10 123 movq -3*8(%rsi), %r9 124 movq -4*8(%rsi), %r8 125 leaq -4*8(%rsi), %rsi 126 127 movq %r11, -1*8(%rdi) 128 movq %r10, -2*8(%rdi) 129 movq %r9, -3*8(%rdi) 130 movq %r8, -4*8(%rdi) 131 leaq -4*8(%rdi), %rdi 132 jae 8b 133 /* 134 * Calculate copy position to head. 135 */ 136 addq $0x20, %rdx 137 subq %rdx, %rsi 138 subq %rdx, %rdi 1391: 140 cmpq $16, %rdx 141 jb 9f 142 /* 143 * Move data from 16 bytes to 31 bytes. 144 */ 145 movq 0*8(%rsi), %r11 146 movq 1*8(%rsi), %r10 147 movq -2*8(%rsi, %rdx), %r9 148 movq -1*8(%rsi, %rdx), %r8 149 movq %r11, 0*8(%rdi) 150 movq %r10, 1*8(%rdi) 151 movq %r9, -2*8(%rdi, %rdx) 152 movq %r8, -1*8(%rdi, %rdx) 153 jmp 13f 154 .p2align 4 1559: 156 cmpq $8, %rdx 157 jb 10f 158 /* 159 * Move data from 8 bytes to 15 bytes. 160 */ 161 movq 0*8(%rsi), %r11 162 movq -1*8(%rsi, %rdx), %r10 163 movq %r11, 0*8(%rdi) 164 movq %r10, -1*8(%rdi, %rdx) 165 jmp 13f 16610: 167 cmpq $4, %rdx 168 jb 11f 169 /* 170 * Move data from 4 bytes to 7 bytes. 171 */ 172 movl (%rsi), %r11d 173 movl -4(%rsi, %rdx), %r10d 174 movl %r11d, (%rdi) 175 movl %r10d, -4(%rdi, %rdx) 176 jmp 13f 17711: 178 cmp $2, %rdx 179 jb 12f 180 /* 181 * Move data from 2 bytes to 3 bytes. 182 */ 183 movw (%rsi), %r11w 184 movw -2(%rsi, %rdx), %r10w 185 movw %r11w, (%rdi) 186 movw %r10w, -2(%rdi, %rdx) 187 jmp 13f 18812: 189 cmp $1, %rdx 190 jb 13f 191 /* 192 * Move data for 1 byte. 193 */ 194 movb (%rsi), %r11b 195 movb %r11b, (%rdi) 19613: 197 RET 198