1@/****************************************************************************** 2@ * 3@ * Copyright (C) 2018 The Android Open Source Project 4@ * 5@ * Licensed under the Apache License, Version 2.0 (the "License"); 6@ * you may not use this file except in compliance with the License. 7@ * You may obtain a copy of the License at: 8@ * 9@ * http:@www.apache.org/licenses/LICENSE-2.0 10@ * 11@ * Unless required by applicable law or agreed to in writing, software 12@ * distributed under the License is distributed on an "AS IS" BASIS, 13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14@ * See the License for the specific language governing permissions and 15@ * limitations under the License. 16@ * 17@ ***************************************************************************** 18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19@*/ 20 21 22.text 23.p2align 2 24 25 .global ixheaacd_esbr_cos_sin_mod_loop1 26 .type ixheaacd_esbr_cos_sin_mod_loop1, %function 27ixheaacd_esbr_cos_sin_mod_loop1: 28 29 STMFD sp!, {r4-r12, r14} 30 VPUSH {D8-D11} 31@generating load addresses 32 ADD r4, r0, r1, lsl #3 @psubband1 33 SUB r4, r4, #4 34 ADD r5, r3, r1, lsl #3 @psubband1_t 35 SUB r5, r5, #8 36 MOV r6, r1, ASR #2 37 38LOOP1: 39@first part 40 vld1.32 {d0} , [r2]! 41 vrev64.32 d1, d0 42 vld1.32 {d2[0]}, [r0]! 43 ADD r7, r0, #252 44 vld1.32 {d2[1]}, [r7] 45 vld1.32 {d3[0]}, [r4] 46 ADD r7, r4, #256 47 vld1.32 {d3[1]}, [r7] 48 SUB r4, r4, #4 49 50 VMULL.S32 q2, d0, d2 @qsub 2nd 51 VMULL.S32 q3, d0, d3 @add 2nd 52 VMULL.S32 q4, d1, d2 @add 1st 53 VMULL.S32 q5, d1, d3 @qsub 1st 54 55 vadd.I64 q0, q4, q3 56 VQSUB.S64 Q1, Q5, Q2 57 58 VSHRN.I64 D0, Q0, #32 59 VSHRN.I64 D2, Q1, #32 60 VMOV.32 D3, D0 61 VST2.32 {D0[0], D2[0]}, [R3]! 62 ADD r7, r3, #248 63 VST2.32 {D2[1], D3[1]}, [R7] 64 65@second part 66 vld1.32 {d0} , [r2]! 67 vrev64.32 d1, d0 68 vld1.32 {d2[0]}, [r0]! 69 ADD R7, R0, #252 70 vld1.32 {d2[1]}, [r7] 71 vld1.32 {d3[0]}, [r4] 72 ADD R7, R4, #256 73 vld1.32 {d3[1]}, [r7] 74 SUB r4, r4, #4 75 76 VMULL.S32 q2, d0, d2 @add 2nd 77 VMULL.S32 q3, d0, d3 @sub 2nd 78 VMULL.S32 q4, d1, d2 @sub 1st 79 VMULL.S32 q5, d1, d3 @add 1st 80 81 VADD.I64 Q0, Q5, Q2 82 VQSUB.S64 Q1, Q4, Q3 83 84 VSHRN.I64 D0, Q0, #32 85 VSHRN.I64 D2, Q1, #32 86 VMOV.32 D3, D0 87 VST2.32 {D0[0], D2[0]}, [R5] 88 ADD R7, R5, #256 89 VST2.32 {D2[1], D3[1]}, [R7] 90 SUB r5, r5, #8 91@Third part 92 vld1.32 {d0} , [r2]! 93 vrev64.32 d1, d0 94 vld1.32 {d2[0]}, [r0]! 95 ADD r7, r0, #252 96 vld1.32 {d2[1]}, [r7] 97 vld1.32 {d3[0]}, [r4] 98 ADD r7, r4, #256 99 vld1.32 {d3[1]}, [r7] 100 SUB r4, r4, #4 101 102 VMULL.S32 q2, d0, d2 @qsub 2nd 103 VMULL.S32 q3, d0, d3 @add 2nd 104 VMULL.S32 q4, d1, d2 @add 1st 105 VMULL.S32 q5, d1, d3 @qsub 1st 106 107 vadd.I64 q0, q4, q3 108 VQSUB.S64 Q1, Q5, Q2 109 110 VSHRN.I64 D0, Q0, #32 111 VSHRN.I64 D2, Q1, #32 112 VMOV.32 D3, D0 113 VST2.32 {D0[0], D2[0]}, [R3]! 114 ADD r7, r3, #248 115 VST2.32 {D2[1], D3[1]}, [R7] 116 117@Fourth part 118 vld1.32 {d0} , [r2]! 119 vrev64.32 d1, d0 120 vld1.32 {d2[0]}, [r0]! 121 ADD R7, R0, #252 122 vld1.32 {d2[1]}, [r7] 123 vld1.32 {d3[0]}, [r4] 124 ADD R7, R4, #256 125 vld1.32 {d3[1]}, [r7] 126 SUB r4, r4, #4 127 128 VMULL.S32 q2, d0, d2 @add 2nd 129 VMULL.S32 q3, d0, d3 @sub 2nd 130 VMULL.S32 q4, d1, d2 @sub 1st 131 VMULL.S32 q5, d1, d3 @add 1st 132 133 VADD.I64 Q0, Q5, Q2 134 VQSUB.S64 Q1, Q4, Q3 135 136 VSHRN.I64 D0, Q0, #32 137 VSHRN.I64 D2, Q1, #32 138 VMOV.32 D3, D0 139 VST2.32 {D0[0], D2[0]}, [R5] 140 ADD R7, R5, #256 141 SUBS R6, R6, #1 142 VST2.32 {D2[1], D3[1]}, [R7] 143 SUB r5, r5, #8 144 145 BGT LOOP1 146 VPOP {D8-D11} 147 LDMFD sp!, {r4-r12, r15} 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174