1.text 2.p2align 2 3.global ixheaacd_complex_ifft_p2_asm 4.type ixheaacd_complex_ifft_p2_asm, %function 5 6ixheaacd_complex_ifft_p2_asm: 7 STMFD sp!, {r0-r12, lr} 8 SUB sp, sp, #0x44 9 LDR r0, [sp, #0x48] 10 EOR r0, r0, r0, ASR #31 11 CLZ r0, r0 12 SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@ 13 SUB r0, r0, #1 14 RSB r0, r0, #0x1e 15 AND r1, r0, #1 16 STR r1, [sp, #0x30] 17 MOV r1, r0, ASR #1 18 LDR r0, [sp, #0x48] @npoints 19 STR r1, [sp, #0x18] 20 MOV lr, r0, LSL #1 @(npoints >>1) * 4 21 MOV r0, #0 22 23FIRST_STAGE_R4: 24 MOVW r4, #0x3333 25 MOVT r4, #0x3333 26 MOVW r5, #0x0F0F 27 MOVT r5, #0x0F0F 28 AND r6, r4, r0 29 AND r7, r4, r0, LSR #2 30 ORR r4, r7, r6, LSL #2 31 AND r6, r5, r4 32 AND r7, r5, r4, LSR #4 33 ORR r4, r7, r6, LSL #4 34 BIC r6, r4, #0x0000FF00 35 BIC r7, r4, #0x00FF0000 36 MOV r7, r7, LSR #8 37 ORR r4, r7, r6, LSL #8 38 LDR r5, [sp, #0x30] 39 MOV r10, r4, LSR r12 40 CMP r5, #0 41 ADDNE r10, r10, #1 42 BICNE r10, r10, #1 43 44 ADD r1, r2, r10, LSL #2 45 LDRD r4, [r1] @r4=x0r, r5=x0i 46 ADD r1, r1, lr 47 LDRD r8, [r1] @r8=x1r, r9=x1i 48 ADD r1, r1, lr 49 LDRD r6, [r1] @r6=x2r, r7=x2i 50 ADD r1, r1, lr 51 LDRD r10, [r1] @r10=x3r, r11=x3i 52 ADD r0, r0, #4 53 CMP r0, lr, ASR #1 54 55 ADD r4, r4, r6 @x0r = x0r + x2r@ 56 ADD r5, r5, r7 @x0i = x0i + x2i@ 57 SUB r6, r4, r6, lsl#1 @x2r = x0r - (x2r << 1)@ 58 SUB r7, r5, r7, lsl#1 @x2i = x0i - (x2i << 1)@ 59 ADD r8, r8, r10 @x1r = x1r + x3r@ 60 ADD r9, r9, r11 @x1i = x1i + x3i@ 61 SUB r1, r8, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 62 SUB r11, r9, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 63 64 ADD r4, r4, r8 @x0r = x0r + x1r@ 65 ADD r5, r5, r9 @x0i = x0i + x1i@ 66 SUB r8, r4, r8, lsl#1 @x1r = x0r - (x1r << 1)@ 67 SUB r9, r5, r9, lsl#1 @x1i = x0i - (x1i << 1) 68 SUB r6, r6, r11 @x2r = x2r - x3i@ 69 ADD r7, r7, r1 @x2i = x2i + x3r@ 70 ADD r10, r6, r11, lsl#1 @x3i = x2r + (x3i << 1)@ 71 SUB r11, r7, r1, lsl#1 @x3r = x2i - (x3r << 1)@ 72 73 STMIA r3!, {r4-r11} 74 BLT FIRST_STAGE_R4 75 LDR r1, [sp, #0x18] 76 LDR r0, [sp, #0x48] 77 MOV r12, #0x40 @nodespacing = 64@ 78 STR r12, [sp, #0x38] 79 LDR r12, [sp, #0x48] 80 SUB r3, r3, r0, LSL #3 81 SUBS r1, r1, #1 82 STR r3, [sp, #0x50] 83 MOV r4, r12, ASR #4 84 MOV r0, #4 85 STR r4, [sp, #0x34] 86 STR r1, [sp, #0x3c] 87 BLE RADIX2 88OUTER_LOOP: 89 LDR r1, [sp, #0x44] 90 LDR r12, [sp, #0x50] @WORD32 *data = ptr_y@ 91 STR r1, [sp, #0x2c] 92 LDR r1, [sp, #0x34] 93 94 MOV r0, r0, LSL #3 @(del<<1) * 4 95LOOP_TRIVIAL_TWIDDLE: 96 LDRD r4, [r12] @r4=x0r, r5=x0i 97 ADD r12, r12, r0 98 LDRD r6, [r12] @r6=x1r, r7=x1i 99 ADD r12, r12, r0 100 LDRD r8, [r12] @r8=x2r, r9=x2i 101 ADD r12, r12, r0 102 LDRD r10, [r12] @r10=x3r, r11=x3i 103 104@MOV r4,r4,ASR #1 105@MOV r5,r5,ASR #1 106@MOV r6,r6,ASR #1 107@MOV r7,r7,ASR #1 108@MOV r8,r8,ASR #1 109@MOV r9,r9,ASR #1 110@MOV r10,r10,ASR #1 111@MOV r11,r11,ASR #1 112 113 ADD r4, r4, r8 @x0r = x0r + x2r@ 114 ADD r5, r5, r9 @x0i = x0i + x2i@ 115 SUB r8, r4, r8, lsl #1 @x2r = x0r - (x2r << 1)@ 116 SUB r9, r5, r9, lsl #1 @x2i = x0i - (x2i << 1)@ 117 ADD r6, r6, r10 @x1r = x1r + x3r@ 118 ADD r7, r7, r11 @x1i = x1i + x3i@ 119 SUB r2, r6, r10, lsl #1 @x3r = x1r - (x3r << 1)@ 120 SUB r11, r7, r11, lsl #1 @x3i = x1i - (x3i << 1)@ 121 122 ADD r4, r4, r6 @x0r = x0r + x1r@ 123 ADD r5, r5, r7 @x0i = x0i + x1i@ 124@MOV r4,r4,ASR #1 125@MOV r5,r5,ASR #1 126 SUB r6, r4, r6, lsl #1 @x1r = x0r - (x1r << 1)@ 127 SUB r7, r5, r7, lsl #1 @x1i = x0i - (x1i << 1) 128 SUB r8, r8, r11 @x2r = x2r - x3i@ 129 ADD r9, r9, r2 @x2i = x2i + x3r@ 130 ADD r10, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@ 131 SUB r11, r9, r2, lsl#1 @x3r = x2i - (x3r << 1) 132 133 STRD r10, [r12] @r10=x3r, r11=x3i 134 SUB r12, r12, r0 135 STRD r6, [r12] @r6=x1r, r7=x1i 136 SUB r12, r12, r0 137 STRD r8, [r12] @r8=x2r, r9=x2i 138 SUB r12, r12, r0 139 STRD r4, [r12] @r4=x0r, r5=x0i 140 ADD r12, r12, r0, lsl #2 141 142 SUBS r1, r1, #1 143 BNE LOOP_TRIVIAL_TWIDDLE 144 145 MOV r0, r0, ASR #3 146 LDR r4, [sp, #0x38] 147 LDR r3, [sp, #0x50] 148 MUL r1, r0, r4 149 ADD r12, r3, #8 150 STR r1, [sp, #0x40] 151 MOV r3, r1, ASR #2 152 ADD r3, r3, r1, ASR #3 153 SUB r3, r3, r1, ASR #4 154 ADD r3, r3, r1, ASR #5 155 SUB r3, r3, r1, ASR #6 156 ADD r3, r3, r1, ASR #7 157 SUB r3, r3, r1, ASR #8 158 STR r3, [sp, #0x18] 159SECOND_LOOP: 160 LDR r3, [sp, #0x2c] 161 LDR r14, [sp, #0x34] 162 MOV r0, r0, LSL #3 @(del<<1) * 4 163 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 164 LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ 165 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 166 LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ 167 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 168 LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 169 170 STR r4, [sp, #0x24] 171 STR r1, [sp, #0x14] 172 STR r2, [sp, #0x10] 173 STR r5, [sp, #0x0c] 174 STR r6, [sp, #0x08] 175 STR r7, [sp, #0x04] 176 STR r8, [sp] 177 178RADIX4_BFLY: 179 180 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 181 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 182 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 183 SUBS r14, r14, #1 184 185 LDR r1, [sp, #0x14] 186 LDR r2, [sp, #0x10] 187 188 SMULL r3, r4, r6, r2 @ixheaac_mult32(x1r,w1l) 189 LSR r3, r3, #31 190 ORR r4, r3, r4, LSL#1 191 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 192 LSR r3, r3, #31 193 ORR r6, r3, r6, LSL#1 194 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 195 LSR r3, r3, #31 196 ORR r5, r3, r5, LSL#1 197 SMULL r3, r7, r7, r2 @ixheaac_mac32(ixheaac_mult32(x1r,w1h) ,x1i,w1l) 198 LSR r3, r3, #31 199 ORR r7, r3, r7, LSL#1 200 SUB r7, r7, r6 201 ADD r6, r4, r5 @ 202 203 LDR r1, [sp, #0x0c] 204 LDR r2, [sp, #0x08] 205 206 SMULL r3, r4, r8, r2 @ixheaac_mult32(x2r,w2l) 207 LSR r3, r3, #31 208 ORR r4, r3, r4, LSL#1 209 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 210 LSR r3, r3, #31 211 ORR r8, r3, r8, LSL#1 212 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 213 LSR r3, r3, #31 214 ORR r5, r3, r5, LSL#1 215 SMULL r3, r9, r9, r2 @ixheaac_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 216 LSR r3, r3, #31 217 ORR r9, r3, r9, LSL#1 218 SUB r9, r9, r8 219 ADD r8, r4, r5 @ 220 221 LDR r1, [sp, #0x04] 222 LDR r2, [sp] 223 224 SMULL r3, r4, r10, r2 @ixheaac_mult32(x3r,w3l) 225 LSR r3, r3, #31 226 ORR r4, r3, r4, LSL#1 227 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 228 LSR r3, r3, #31 229 ORR r10, r3, r10, LSL#1 230 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 231 LSR r3, r3, #31 232 ORR r5, r3, r5, LSL#1 233 SMULL r3, r11, r11, r2 @ixheaac_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 234 LSR r3, r3, #31 235 ORR r11, r3, r11, LSL#1 236 SUB r11, r11, r10 237 ADD r10, r4, r5 @ 238 239 @SUB r12,r12,r0,lsl #1 240 @LDRD r4,[r12] @r4=x0r, r5=x0i 241 LDR r4, [r12, -r0, lsl #1]! @ 242 LDR r5, [r12, #0x04] 243 244 245 ADD r4, r8, r4 @x0r = x0r + x2r@ 246 ADD r5, r9, r5 @x0i = x0i + x2i@ 247 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 248 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 249 ADD r6, r6, r10 @x1r = x1r + x3r@ 250 ADD r7, r7, r11 @x1i = x1i + x3i@ 251 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 252 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 253 254 ADD r4, r4, r6 @x0r = x0r + x1r@ 255 ADD r5, r5, r7 @x0i = x0i + x1i@ 256 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 257 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 258 STRD r4, [r12] @r4=x0r, r5=x0i 259 ADD r12, r12, r0 260 261 SUB r8, r8, r11 @x2r = x2r - x3i@ 262 ADD r9, r9, r10 @x2i = x2i + x3r@ 263 ADD r4, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@ 264 SUB r5, r9, r10, lsl#1 @x3r = x2i - (x3r << 1) 265 266 STRD r8, [r12] @r8=x2r, r9=x2i 267 ADD r12, r12, r0 268 STRD r6, [r12] @r6=x1r, r7=x1i 269 ADD r12, r12, r0 270 STRD r4, [r12] @r10=x3r, r11=x3i 271 ADD r12, r12, r0 272 273 BNE RADIX4_BFLY 274 MOV r0, r0, ASR #3 275 276 LDR r1, [sp, #0x48] 277 LDR r4, [sp, #0x24] 278 SUB r1, r12, r1, LSL #3 279 LDR r6, [sp, #0x38] 280 ADD r12, r1, #8 281 LDR r7, [sp, #0x18] 282 ADD r4, r4, r6 283 CMP r4, r7 284 BLE SECOND_LOOP 285 286SECOND_LOOP_2: 287 LDR r3, [sp, #0x2c] 288 LDR r14, [sp, #0x34] 289 MOV r0, r0, LSL #3 @(del<<1) * 4 290 291 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 292 LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ 293 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 294 LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ 295 SUB r3, r3, #2048 @ 512 *4 296 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 297 LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 298 299 STR r4, [sp, #0x24] 300 301 STR r1, [sp, #0x14] 302 STR r2, [sp, #0x10] 303 STR r5, [sp, #0x0c] 304 STR r6, [sp, #0x08] 305 STR r7, [sp, #0x04] 306 STR r8, [sp] 307 308RADIX4_BFLY_2: 309 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 310 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 311 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 312 SUBS r14, r14, #1 313 LDR r1, [sp, #0x14] 314 LDR r2, [sp, #0x10] 315 316 SMULL r3, r4, r6, r2 @ixheaac_mult32(x1r,w1l) 317 LSR r3, r3, #31 318 ORR r4, r3, r4, LSL#1 319 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 320 LSR r3, r3, #31 321 ORR r6, r3, r6, LSL#1 322 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 323 LSR r3, r3, #31 324 ORR r5, r3, r5, LSL#1 325 SMULL r3, r7, r7, r2 @ixheaac_mac32(ixheaac_mult32(x1r,w1h) ,x1i,w1l) 326 LSR r3, r3, #31 327 ORR r7, r3, r7, LSL#1 328 SUB r7, r7, r6 329 ADD r6, r4, r5 @ 330 331 LDR r1, [sp, #0x0c] 332 LDR r2, [sp, #0x08] 333 334 SMULL r3, r4, r8, r2 @ixheaac_mult32(x2r,w2l) 335 LSR r3, r3, #31 336 ORR r4, r3, r4, LSL#1 337 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 338 LSR r3, r3, #31 339 ORR r8, r3, r8, LSL#1 340 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 341 LSR r3, r3, #31 342 ORR r5, r3, r5, LSL#1 343 SMULL r3, r9, r9, r2 @ixheaac_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 344 LSR r3, r3, #31 345 ORR r9, r3, r9, LSL#1 346 SUB r9, r9, r8 347 ADD r8, r4, r5 @ 348 349 LDR r1, [sp, #0x04] 350 LDR r2, [sp] 351 352 SMULL r3, r4, r10, r2 @ixheaac_mult32(x3r,w3l) 353 LSR r3, r3, #31 354 ORR r4, r3, r4, LSL#1 355 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 356 LSR r3, r3, #31 357 ORR r10, r3, r10, LSL#1 358 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 359 LSR r3, r3, #31 360 ORR r5, r3, r5, LSL#1 361 SMULL r3, r11, r11, r2 @ixheaac_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 362 LSR r3, r3, #31 363 ORR r11, r3, r11, LSL#1 364 SUB r10, r10, r11 365 ADD r11, r5, r4 @ 366 367 @SUB r12,r12,r0,lsl #1 368 @LDRD r4,[r12] @r4=x0r, r5=x0i 369 LDR r4, [r12, -r0, lsl #1]! @ 370 LDR r5, [r12, #0x04] 371 372 373 ADD r4, r8, r4 @x0r = x0r + x2r@ 374 ADD r5, r9, r5 @x0i = x0i + x2i@ 375 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 376 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 377 ADD r6, r6, r10 @x1r = x1r + x3r@ 378 ADD r7, r7, r11 @x1i = x1i + x3i@ 379 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 380 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 381 382 ADD r4, r4, r6 @x0r = x0r + x1r@ 383 ADD r5, r5, r7 @x0i = x0i + x1i@ 384 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 385 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 386 STRD r4, [r12] @r4=x0r, r5=x0i 387 ADD r12, r12, r0 388 389 SUB r8, r8, r11 @x2r = x2r - x3i@ 390 ADD r9, r9, r10 @x2i = x2i + x3r@ 391 ADD r4, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@ 392 SUB r5, r9, r10, lsl#1 @x3r = x2i - (x3r << 1) 393 394 STRD r8, [r12] @r8=x2r, r9=x2i 395 ADD r12, r12, r0 396 STRD r6, [r12] @r6=x1r, r7=x1i 397 ADD r12, r12, r0 398 STRD r4, [r12] @r10=x3r, r11=x3i 399 ADD r12, r12, r0 400 401 BNE RADIX4_BFLY_2 402 MOV r0, r0, ASR #3 403 404 LDR r1, [sp, #0x48] 405 LDR r4, [sp, #0x24] 406 SUB r1, r12, r1, LSL #3 407 LDR r6, [sp, #0x38] 408 ADD r12, r1, #8 409 LDR r7, [sp, #0x40] 410 ADD r4, r4, r6 411 CMP r4, r7, ASR #1 412 BLE SECOND_LOOP_2 413 LDR r7, [sp, #0x18] 414 CMP r4, r7, LSL #1 415 BGT SECOND_LOOP_4 416 417SECOND_LOOP_3: 418 LDR r3, [sp, #0x2c] 419 LDR r14, [sp, #0x34] 420 MOV r0, r0, LSL #3 @(del<<1) * 4 421 422 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 423 LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ 424 SUB r3, r3, #2048 @ 512 *4 425 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 426 LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ 427 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 428 LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 429 430 STR r4, [sp, #0x24] 431 STR r1, [sp, #0x14] 432 STR r2, [sp, #0x10] 433 STR r5, [sp, #0x0c] 434 STR r6, [sp, #0x08] 435 STR r7, [sp, #0x04] 436 STR r8, [sp] 437 438 439RADIX4_BFLY_3: 440 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 441 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 442 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 443 SUBS r14, r14, #1 444 445 LDR r1, [sp, #0x14] 446 LDR r2, [sp, #0x10] 447 448 SMULL r3, r4, r6, r2 @ixheaac_mult32(x1r,w1l) 449 LSR r3, r3, #31 450 ORR r4, r3, r4, LSL#1 451 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 452 LSR r3, r3, #31 453 ORR r6, r3, r6, LSL#1 454 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 455 LSR r3, r3, #31 456 ORR r5, r3, r5, LSL#1 457 SMULL r3, r7, r7, r2 @ixheaac_mac32(ixheaac_mult32(x1r,w1h) ,x1i,w1l) 458 LSR r3, r3, #31 459 ORR r7, r3, r7, LSL#1 460 SUB r7, r7, r6 461 ADD r6, r4, r5 @ 462 463 LDR r1, [sp, #0x0c] 464 LDR r2, [sp, #0x08] 465 466 SMULL r3, r4, r8, r2 @ixheaac_mult32(x2r,w2l) 467 LSR r3, r3, #31 468 ORR r4, r3, r4, LSL#1 469 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 470 LSR r3, r3, #31 471 ORR r8, r3, r8, LSL#1 472 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 473 LSR r3, r3, #31 474 ORR r5, r3, r5, LSL#1 475 SMULL r3, r9, r9, r2 @ixheaac_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 476 LSR r3, r3, #31 477 ORR r9, r3, r9, LSL#1 478 SUB r8, r8, r9 479 ADD r9, r5, r4 @ 480 481 LDR r1, [sp, #0x04] 482 LDR r2, [sp] 483 484 SMULL r3, r4, r10, r2 @ixheaac_mult32(x3r,w3l) 485 LSR r3, r3, #31 486 ORR r4, r3, r4, LSL#1 487 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 488 LSR r3, r3, #31 489 ORR r10, r3, r10, LSL#1 490 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 491 LSR r3, r3, #31 492 ORR r5, r3, r5, LSL#1 493 SMULL r3, r11, r11, r2 @ixheaac_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 494 LSR r3, r3, #31 495 ORR r11, r3, r11, LSL#1 496 SUB r10, r10, r11 497 ADD r11, r5, r4 @ 498 499 @SUB r12,r12,r0,lsl #1 500 @LDRD r4,[r12] @r4=x0r, r5=x0i 501 LDR r4, [r12, -r0, lsl #1]! @ 502 LDR r5, [r12, #0x04] 503 504 505 ADD r4, r8, r4 @x0r = x0r + x2r@ 506 ADD r5, r9, r5 @x0i = x0i + x2i@ 507 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 508 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 509 ADD r6, r6, r10 @x1r = x1r + x3r@ 510 ADD r7, r7, r11 @x1i = x1i + x3i@ 511 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 512 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 513 514 ADD r4, r4, r6 @x0r = x0r + x1r@ 515 ADD r5, r5, r7 @x0i = x0i + x1i@ 516 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 517 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 518 STRD r4, [r12] @r4=x0r, r5=x0i 519 ADD r12, r12, r0 520 521 SUB r8, r8, r11 @x2r = x2r - x3i@ 522 ADD r9, r9, r10 @x2i = x2i + x3r@ 523 ADD r4, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@ 524 SUB r5, r9, r10, lsl#1 @x3r = x2i - (x3r << 1) 525 526 STRD r8, [r12] @r8=x2r, r9=x2i 527 ADD r12, r12, r0 528 STRD r6, [r12] @r6=x1r, r7=x1i 529 ADD r12, r12, r0 530 STRD r4, [r12] @r10=x3r, r11=x3i 531 ADD r12, r12, r0 532 533 BNE RADIX4_BFLY_3 534 MOV r0, r0, ASR #3 535 536 LDR r1, [sp, #0x48] 537 LDR r4, [sp, #0x24] 538 SUB r1, r12, r1, LSL #3 539 LDR r6, [sp, #0x38] 540 ADD r12, r1, #8 541 LDR r7, [sp, #0x18] 542 ADD r4, r4, r6 543 CMP r4, r7, LSL #1 544 BLE SECOND_LOOP_3 545 546SECOND_LOOP_4: 547 LDR r3, [sp, #0x2c] 548 LDR r14, [sp, #0x34] 549 MOV r0, r0, LSL #3 @(del<<1) * 4 550 551 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 552 LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ 553 SUB r3, r3, #2048 @ 512 *4 554 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 555 LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ 556 SUB r3, r3, #2048 @ 512 *4 557 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 558 LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 559 560 561 STR r4, [sp, #0x24] 562 STR r1, [sp, #0x14] 563 STR r2, [sp, #0x10] 564 STR r5, [sp, #0x0c] 565 STR r6, [sp, #0x08] 566 STR r7, [sp, #0x04] 567 STR r8, [sp] 568 569RADIX4_BFLY_4: 570 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 571 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 572 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 573 SUBS r14, r14, #1 574 575 LDR r1, [sp, #0x14] 576 LDR r2, [sp, #0x10] 577 578 SMULL r3, r4, r6, r2 @ixheaac_mult32(x1r,w1l) 579 LSR r3, r3, #31 580 ORR r4, r3, r4, LSL#1 581 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 582 LSR r3, r3, #31 583 ORR r6, r3, r6, LSL#1 584 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 585 LSR r3, r3, #31 586 ORR r5, r3, r5, LSL#1 587 SMULL r3, r7, r7, r2 @ixheaac_mac32(ixheaac_mult32(x1r,w1h) ,x1i,w1l) 588 LSR r3, r3, #31 589 ORR r7, r3, r7, LSL#1 590 SUB r7, r7, r6 591 ADD r6, r4, r5 @ 592 593 LDR r1, [sp, #0x0c] 594 LDR r2, [sp, #0x08] 595 596 SMULL r3, r4, r8, r2 @ixheaac_mult32(x2r,w2l) 597 LSR r3, r3, #31 598 ORR r4, r3, r4, LSL#1 599 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 600 LSR r3, r3, #31 601 ORR r8, r3, r8, LSL#1 602 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 603 LSR r3, r3, #31 604 ORR r5, r3, r5, LSL#1 605 SMULL r3, r9, r9, r2 @ixheaac_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 606 LSR r3, r3, #31 607 ORR r9, r3, r9, LSL#1 608 SUB r8, r8, r9 609 ADD r9, r5, r4 @ 610 611 LDR r1, [sp, #0x04] 612 LDR r2, [sp] 613 614 SMULL r3, r4, r10, r2 @ixheaac_mult32(x3r,w3l) 615 LSR r3, r3, #31 616 ORR r4, r3, r4, LSL#1 617 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 618 LSR r3, r3, #31 619 ORR r10, r3, r10, LSL#1 620 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 621 LSR r3, r3, #31 622 ORR r5, r3, r5, LSL#1 623 SMULL r3, r11, r11, r2 @ixheaac_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 624 LSR r3, r3, #31 625 ORR r11, r3, r11, LSL#1 626 SUB r11, r11, r10 627 ADD r10, r5, r4 @ 628 RSB r10, r10, #0 629 630 @SUB r12,r12,r0,lsl #1 631 @LDRD r4,[r12] @r4=x0r, r5=x0i 632 LDR r4, [r12, -r0, lsl #1]! @ 633 LDR r5, [r12, #0x04] 634 635 636 ADD r4, r8, r4 @x0r = x0r + x2r@ 637 ADD r5, r9, r5 @x0i = x0i + x2i@ 638 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 639 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 640 ADD r6, r6, r10 @x1r = x1r + x3r@ 641 SUB r7, r7, r11 @x1i = x1i - x3i@ 642 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 643 ADD r11, r7, r11, lsl#1 @x3i = x1i + (x3i << 1)@ 644 645 ADD r4, r4, r6 @x0r = x0r + x1r@ 646 ADD r5, r5, r7 @x0i = x0i + x1i@ 647 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 648 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 649 STRD r4, [r12] @r4=x0r, r5=x0i 650 ADD r12, r12, r0 651 652 SUB r8, r8, r11 @x2r = x2r - x3i@ 653 ADD r9, r9, r10 @x2i = x2i + x3r@ 654 ADD r4, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@ 655 SUB r5, r9, r10, lsl#1 @x3r = x2i - (x3r << 1) 656 657 STRD r8, [r12] @r8=x2r, r9=x2i 658 ADD r12, r12, r0 659 STRD r6, [r12] @r6=x1r, r7=x1i 660 ADD r12, r12, r0 661 STRD r4, [r12] @r10=x3r, r11=x3i 662 ADD r12, r12, r0 663 664 BNE RADIX4_BFLY_4 665 MOV r0, r0, ASR #3 666 667 LDR r1, [sp, #0x48] 668 LDR r4, [sp, #0x24] 669 SUB r1, r12, r1, LSL #3 670 LDR r6, [sp, #0x38] 671 ADD r12, r1, #8 672 LDR r7, [sp, #0x40] 673 ADD r4, r4, r6 674 CMP r4, r7 675 BLT SECOND_LOOP_4 676 677 LDR r1, [sp, #0x38] 678 MOV r0, r0, LSL #2 679 MOV r1, r1, ASR #2 680 STR r1, [sp, #0x38] 681 LDR r1, [sp, #0x34] 682 MOV r1, r1, ASR #2 683 STR r1, [sp, #0x34] 684 LDR r1, [sp, #0x3c] 685 SUBS r1, r1, #1 686 STR r1, [sp, #0x3c] 687 BGT OUTER_LOOP 688 689RADIX2: 690 LDR r1, [sp, #0x30] 691 CMP r1, #0 692 BEQ EXIT 693 LDR r12, [sp, #0x38] 694 LDR r1, [sp, #0x44] 695 CMP r12, #0 696 MOVEQ r4, #1 697 MOVNE r4, r12, LSL #1 698 MOVS r3, r0 699 BEQ EXIT 700 701 MOV r3, r3, ASR #1 702 LDR r5, [sp, #0x50] 703 MOV r0, r0, LSL #3 @(del<<1) * 4 704 STR r1, [sp, #0x18] 705RADIX2_BFLY: 706 LDR r1, [sp, #0x18] 707 LDRD r6, [r5] @r6 = x0r 708 ADD r5, r5, r0 709 LDRD r8, [r5] @r8 = x1r 710 711 LDR r2, [r1] 712 SUBS r3, r3, #1 713 714 715 SMULL r1, r11, r8, r2 @mult32x16hin32(x1r,W1h) 716 LSR r1, r1, #31 717 ORR r11, r1, r11, LSL#1 718 SMULL r1, r10, r9, r2 @mult32x16hin32(x1i,W1h) 719 LSR r1, r1, #31 720 ORR r10, r1, r10, LSL#1 721 722 723 LDR r1, [sp, #0x18] 724 LDR r2, [r1, #0x04] 725 ADD r1, r1, r4, LSL #3 726 STR r1, [sp, #0x18] 727 728 SMULL r1, r8, r8, r2 @ixheaac_mult32(x1r,w1l) 729 LSR r1, r1, #31 730 ORR r8, r1, r8, LSL#1 731 SMULL r1, r9, r9, r2 @ixheaac_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 732 LSR r1, r1, #31 733 ORR r9, r1, r9, LSL#1 734 735 ADD r8, r8, r10 736 SUB r9, r9, r11 737 738 ASR r8, r8, #1 739 ASR r6, r6, #1 740 ASR r9, r9, #1 741 ASR r7, r7, #1 742 ADD r10, r8, r6 @(x0r/2) + (x1r/2) 743 ADD r11, r9, r7 @(x0i/2) + (x1i/2)@ 744 SUB r8, r6, r8 @(x0r/2) - (x1r/2) 745 SUB r9, r7, r9 @(x0i/2) - (x1i/2)@ 746 747 STRD r8, [r5] 748 SUB r5, r5, r0 749 STRD r10, [r5], #8 750 751 BNE RADIX2_BFLY 752 753 LDR r1, [sp, #0x44] 754 MOV r3, r0, ASR #4 755 STR r1, [sp, #0x18] 756RADIX2_BFLY_2: 757 LDR r1, [sp, #0x18] 758 LDRD r6, [r5] @r6 = x0r 759 ADD r5, r5, r0 760 LDRD r8, [r5] @r8 = x1r 761 762 LDR r2, [r1] 763 SUBS r3, r3, #1 764 765 766 767 SMULL r1, r11, r8, r2 @mult32x16hin32(x1r,W1h) 768 LSR r1, r1, #31 769 ORR r11, r1, r11, LSL#1 770 SMULL r1, r10, r9, r2 @mult32x16hin32(x1i,W1h) 771 LSR r1, r1, #31 772 ORR r10, r1, r10, LSL#1 773 774 775 LDR r1, [sp, #0x18] 776 LDR r2, [r1, #0x04] 777 ADD r1, r1, r4, LSL #3 778 STR r1, [sp, #0x18] 779 780 SMULL r1, r8, r8, r2 @ixheaac_mult32(x1r,w1l) 781 LSR r1, r1, #31 782 ORR r8, r1, r8, LSL#1 783 SMULL r1, r9, r9, r2 @ixheaac_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 784 LSR r1, r1, #31 785 ORR r9, r1, r9, LSL#1 786 787 SUB r11, r11, r9 788 ADD r9, r10, r8 @ 789 MOV r8, r11 790 791 ASR r8, r8, #1 792 ASR r6, r6, #1 793 ASR r9, r9, #1 794 ASR r7, r7, #1 795 ADD r10, r8, r6 @(x0r>>1) + (x1r) 796 ADD r11, r9, r7 @(x0i>>1) + (x1i)@ 797 SUB r8, r6, r8 @(x0r>>1) - (x1r) 798 SUB r9, r7, r9 @(x0i>>1) - (x1i)@ 799 800 STRD r8, [r5] 801 SUB r5, r5, r0 802 STRD r10, [r5], #8 803 804 BNE RADIX2_BFLY_2 805 806EXIT: 807 ADD sp, sp, #0x54 808 LDMFD sp!, {r4-r12, pc} 809 810