xref: /aosp_15_r20/external/libxaac/decoder/armv7/ixheaacd_esbr_cos_sin_mod_loop1.s (revision 15dc779a375ca8b5125643b829a8aa4b70d7f451)
1@/******************************************************************************
2@ *
3@ * Copyright (C) 2018 The Android Open Source Project
4@ *
5@ * Licensed under the Apache License, Version 2.0 (the "License");
6@ * you may not use this file except in compliance with the License.
7@ * You may obtain a copy of the License at:
8@ *
9@ * http:@www.apache.org/licenses/LICENSE-2.0
10@ *
11@ * Unless required by applicable law or agreed to in writing, software
12@ * distributed under the License is distributed on an "AS IS" BASIS,
13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@ * See the License for the specific language governing permissions and
15@ * limitations under the License.
16@ *
17@ *****************************************************************************
18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19@*/
20
21
22.text
23.p2align 2
24
25    .global ixheaacd_esbr_cos_sin_mod_loop1
26    .type ixheaacd_esbr_cos_sin_mod_loop1, %function
27ixheaacd_esbr_cos_sin_mod_loop1:
28
29    STMFD           sp!, {r4-r12, r14}
30    VPUSH           {D8-D11}
31@generating load addresses
32    ADD             r4, r0, r1, lsl #3  @psubband1
33    SUB             r4, r4, #4
34    ADD             r5, r3, r1, lsl #3  @psubband1_t
35    SUB             r5, r5, #8
36    MOV             r6, r1, ASR #2
37
38LOOP1:
39@first part
40    vld1.32         {d0} , [r2]!
41    vrev64.32       d1, d0
42    vld1.32         {d2[0]}, [r0]!
43    ADD             r7, r0, #252
44    vld1.32         {d2[1]}, [r7]
45    vld1.32         {d3[0]}, [r4]
46    ADD             r7, r4, #256
47    vld1.32         {d3[1]}, [r7]
48    SUB             r4, r4, #4
49
50    VMULL.S32       q2, d0, d2          @qsub 2nd
51    VMULL.S32       q3, d0, d3          @add 2nd
52    VMULL.S32       q4, d1, d2          @add 1st
53    VMULL.S32       q5, d1, d3          @qsub 1st
54
55    vadd.I64        q0, q4, q3
56    VQSUB.S64       Q1, Q5, Q2
57
58    VSHRN.I64       D0, Q0, #32
59    VSHRN.I64       D2, Q1, #32
60    VMOV.32         D3, D0
61    VST2.32         {D0[0], D2[0]}, [R3]!
62    ADD             r7, r3, #248
63    VST2.32         {D2[1], D3[1]}, [R7]
64
65@second part
66    vld1.32         {d0} , [r2]!
67    vrev64.32       d1, d0
68    vld1.32         {d2[0]}, [r0]!
69    ADD             R7, R0, #252
70    vld1.32         {d2[1]}, [r7]
71    vld1.32         {d3[0]}, [r4]
72    ADD             R7, R4, #256
73    vld1.32         {d3[1]}, [r7]
74    SUB             r4, r4, #4
75
76    VMULL.S32       q2, d0, d2          @add 2nd
77    VMULL.S32       q3, d0, d3          @sub 2nd
78    VMULL.S32       q4, d1, d2          @sub 1st
79    VMULL.S32       q5, d1, d3          @add 1st
80
81    VADD.I64        Q0, Q5, Q2
82    VQSUB.S64       Q1, Q4, Q3
83
84    VSHRN.I64       D0, Q0, #32
85    VSHRN.I64       D2, Q1, #32
86    VMOV.32         D3, D0
87    VST2.32         {D0[0], D2[0]}, [R5]
88    ADD             R7, R5, #256
89    VST2.32         {D2[1], D3[1]}, [R7]
90    SUB             r5, r5, #8
91@Third part
92    vld1.32         {d0} , [r2]!
93    vrev64.32       d1, d0
94    vld1.32         {d2[0]}, [r0]!
95    ADD             r7, r0, #252
96    vld1.32         {d2[1]}, [r7]
97    vld1.32         {d3[0]}, [r4]
98    ADD             r7, r4, #256
99    vld1.32         {d3[1]}, [r7]
100    SUB             r4, r4, #4
101
102    VMULL.S32       q2, d0, d2          @qsub 2nd
103    VMULL.S32       q3, d0, d3          @add 2nd
104    VMULL.S32       q4, d1, d2          @add 1st
105    VMULL.S32       q5, d1, d3          @qsub 1st
106
107    vadd.I64        q0, q4, q3
108    VQSUB.S64       Q1, Q5, Q2
109
110    VSHRN.I64       D0, Q0, #32
111    VSHRN.I64       D2, Q1, #32
112    VMOV.32         D3, D0
113    VST2.32         {D0[0], D2[0]}, [R3]!
114    ADD             r7, r3, #248
115    VST2.32         {D2[1], D3[1]}, [R7]
116
117@Fourth part
118    vld1.32         {d0} , [r2]!
119    vrev64.32       d1, d0
120    vld1.32         {d2[0]}, [r0]!
121    ADD             R7, R0, #252
122    vld1.32         {d2[1]}, [r7]
123    vld1.32         {d3[0]}, [r4]
124    ADD             R7, R4, #256
125    vld1.32         {d3[1]}, [r7]
126    SUB             r4, r4, #4
127
128    VMULL.S32       q2, d0, d2          @add 2nd
129    VMULL.S32       q3, d0, d3          @sub 2nd
130    VMULL.S32       q4, d1, d2          @sub 1st
131    VMULL.S32       q5, d1, d3          @add 1st
132
133    VADD.I64        Q0, Q5, Q2
134    VQSUB.S64       Q1, Q4, Q3
135
136    VSHRN.I64       D0, Q0, #32
137    VSHRN.I64       D2, Q1, #32
138    VMOV.32         D3, D0
139    VST2.32         {D0[0], D2[0]}, [R5]
140    ADD             R7, R5, #256
141    SUBS            R6, R6, #1
142    VST2.32         {D2[1], D3[1]}, [R7]
143    SUB             r5, r5, #8
144
145    BGT             LOOP1
146    VPOP            {D8-D11}
147    LDMFD           sp!, {r4-r12, r15}
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174