1 /******************************************************************************
2  *
3  *  Copyright 1999-2012 Broadcom Corporation
4  *
5  *  Licensed under the Apache License, Version 2.0 (the "License");
6  *  you may not use this file except in compliance with the License.
7  *  You may obtain a copy of the License at:
8  *
9  *  http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  *
17  ******************************************************************************/
18 
19 /******************************************************************************
20  *
21  *  This file contains the code that performs Analysis of the input audio
22  *  stream.
23  *
24  ******************************************************************************/
25 #include <string.h>
26 
27 #include "sbc_enc_func_declare.h"
28 #include "sbc_encoder.h"
29 /*#include <math.h>*/
30 
31 #if (SBC_IS_64_MULT_IN_WINDOW_ACCU == TRUE)
32 #define WIND_4_SUBBANDS_0_1                                              \
33   (int32_t)0x01659F45 /* gas32CoeffFor4SBs[8] = -gas32CoeffFor4SBs[32] = \
34                          0x01659F45 */
35 #define WIND_4_SUBBANDS_0_2                                               \
36   (int32_t)0x115B1ED2 /* gas32CoeffFor4SBs[16] = -gas32CoeffFor4SBs[24] = \
37                          0x115B1ED2 */
38 #define WIND_4_SUBBANDS_1_0 (int32_t)0x001194E6 /* gas32CoeffFor4SBs[1 et 39] = 0x001194E6 */
39 #define WIND_4_SUBBANDS_1_1 (int32_t)0x029DBAA3 /* gas32CoeffFor4SBs[9 et 31] = 0x029DBAA3 */
40 #define WIND_4_SUBBANDS_1_2 (int32_t)0x18F55C90 /* gas32CoeffFor4SBs[17 et 23] = 0x18F55C90 */
41 #define WIND_4_SUBBANDS_1_3 (int32_t)0xF60FAF37 /* gas32CoeffFor4SBs[15 et 25] = 0xF60FAF37 */
42 #define WIND_4_SUBBANDS_1_4 (int32_t)0xFF9BB9D5 /* gas32CoeffFor4SBs[7 et 33] = 0xFF9BB9D5 */
43 #define WIND_4_SUBBANDS_2_0 (int32_t)0x0030E2D3 /* gas32CoeffFor4SBs[2 et 38] = 0x0030E2D3 */
44 #define WIND_4_SUBBANDS_2_1 (int32_t)0x03B23341 /* gas32CoeffFor4SBs[10 et 30] = 0x03B23341 */
45 #define WIND_4_SUBBANDS_2_2 (int32_t)0x1F91CA46 /* gas32CoeffFor4SBs[18 et 22] = 0x1F91CA46 */
46 #define WIND_4_SUBBANDS_2_3 (int32_t)0xFC4F91D4 /* gas32CoeffFor4SBs[14 et 26] = 0xFC4F91D4 */
47 #define WIND_4_SUBBANDS_2_4 (int32_t)0x003D239B /* gas32CoeffFor4SBs[6 et 34] = 0x003D239B */
48 #define WIND_4_SUBBANDS_3_0 (int32_t)0x00599403 /* gas32CoeffFor4SBs[3 et 37] = 0x00599403 */
49 #define WIND_4_SUBBANDS_3_1 (int32_t)0x041EEE40 /* gas32CoeffFor4SBs[11 et 29] = 0x041EEE40 */
50 #define WIND_4_SUBBANDS_3_2 (int32_t)0x2412F251 /* gas32CoeffFor4SBs[19 et 21] = 0x2412F251 */
51 #define WIND_4_SUBBANDS_3_3 (int32_t)0x00C8F2BC /* gas32CoeffFor4SBs[13 et 27] = 0x00C8F2BC */
52 #define WIND_4_SUBBANDS_3_4 (int32_t)0x007F88E4 /* gas32CoeffFor4SBs[5 et 35] = 0x007F88E4 */
53 #define WIND_4_SUBBANDS_4_0 (int32_t)0x007DBCC8 /* gas32CoeffFor4SBs[4 et 36] = 0x007DBCC8 */
54 #define WIND_4_SUBBANDS_4_1 (int32_t)0x034FEE2C /* gas32CoeffFor4SBs[12 et 28] = 0x034FEE2C */
55 #define WIND_4_SUBBANDS_4_2 (int32_t)0x25AC1FF2 /* gas32CoeffFor4SBs[20] = 0x25AC1FF2 */
56 
57 #define WIND_8_SUBBANDS_0_1 (int32_t)0x00B97348 /* 16 0x00B97348 */
58 #define WIND_8_SUBBANDS_0_2 (int32_t)0x08B4307A /* 32 0x08B4307A */
59 #define WIND_8_SUBBANDS_1_0 (int32_t)0x00052173 /* 1 et 79 = 0x00052173 */
60 #define WIND_8_SUBBANDS_1_1 (int32_t)0x01071B96 /* 17 et 63 = 0x01071B96 */
61 #define WIND_8_SUBBANDS_1_2 (int32_t)0x0A9F3E9A /* 33 et 47 = 0x0A9F3E9A*/
62 #define WIND_8_SUBBANDS_1_3 (int32_t)0xF9312891 /* 31 et 49 = 0xF9312891 */
63 #define WIND_8_SUBBANDS_1_4 (int32_t)0xFF8D6793 /* 15 et 65 = 0xFF8D6793 */
64 #define WIND_8_SUBBANDS_2_0 (int32_t)0x000B3F71 /* 2 et 78 = 0x000B3F71 */
65 #define WIND_8_SUBBANDS_2_1 (int32_t)0x0156B3CA /* 18 et 62 = 0x0156B3CA */
66 #define WIND_8_SUBBANDS_2_2 (int32_t)0x0C7D59B6 /* 34 et 46 = 0x0C7D59B6 */
67 #define WIND_8_SUBBANDS_2_3 (int32_t)0xFAFF95FC /* 30 et 50 = 0xFAFF95FC */
68 #define WIND_8_SUBBANDS_2_4 (int32_t)0xFFC9F10E /* 14 et 66 = 0xFFC9F10E */
69 #define WIND_8_SUBBANDS_3_0 (int32_t)0x00122C7D /* 3 et 77 = 0x00122C7D*/
70 #define WIND_8_SUBBANDS_3_1 (int32_t)0x01A1B38B /* 19 et 61 = 0x01A1B38B */
71 #define WIND_8_SUBBANDS_3_2 (int32_t)0x0E3BB16F /* 35 et 45 = 0x0E3BB16F */
72 #define WIND_8_SUBBANDS_3_3 (int32_t)0xFCA86E7E /* 29 et 51 = 0xFCA86E7E */
73 #define WIND_8_SUBBANDS_3_4 (int32_t)0xFFFA2413 /* 13 et 67 = 0xFFFA2413 */
74 #define WIND_8_SUBBANDS_4_0 (int32_t)0x001AFF89 /* 4 et 66 = 0x001AFF89 */
75 #define WIND_8_SUBBANDS_4_1 (int32_t)0x01E0224C /* 20 et 60 = 0x01E0224C */
76 #define WIND_8_SUBBANDS_4_2 (int32_t)0x0FC721F9 /* 36 et 44 = 0x0FC721F9 */
77 #define WIND_8_SUBBANDS_4_3 (int32_t)0xFE20435D /* 28 et 52 = 0xFE20435D */
78 #define WIND_8_SUBBANDS_4_4 (int32_t)0x001D8FD2 /* 12 et 68 = 0x001D8FD2 */
79 #define WIND_8_SUBBANDS_5_0 (int32_t)0x00255A62 /* 5 et 75 = 0x00255A62 */
80 #define WIND_8_SUBBANDS_5_1 (int32_t)0x0209291F /* 21 et 59 = 0x0209291F */
81 #define WIND_8_SUBBANDS_5_2 (int32_t)0x110ECEF0 /* 37 et 43 = 0x110ECEF0 */
82 #define WIND_8_SUBBANDS_5_3 (int32_t)0xFF5EEB73 /* 27 et  53 = 0xFF5EEB73 */
83 #define WIND_8_SUBBANDS_5_4 (int32_t)0x0034F8B6 /* 11 et 69 = 0x0034F8B6 */
84 #define WIND_8_SUBBANDS_6_0 (int32_t)0x003060F4 /* 6 et 74 = 0x003060F4 */
85 #define WIND_8_SUBBANDS_6_1 (int32_t)0x02138653 /* 22 et 58 = 0x02138653 */
86 #define WIND_8_SUBBANDS_6_2 (int32_t)0x120435FA /* 38 et 42 = 0x120435FA */
87 #define WIND_8_SUBBANDS_6_3 (int32_t)0x005FD0FF /* 26 et 54 = 0x005FD0FF */
88 #define WIND_8_SUBBANDS_6_4 (int32_t)0x00415B75 /* 10 et 70 = 0x00415B75 */
89 #define WIND_8_SUBBANDS_7_0 (int32_t)0x003A72E7 /* 7 et 73 = 0x003A72E7 */
90 #define WIND_8_SUBBANDS_7_1 (int32_t)0x01F5F424 /* 23 et 57 = 0x01F5F424 */
91 #define WIND_8_SUBBANDS_7_2 (int32_t)0x129C226F /* 39 et 41 = 0x129C226F */
92 #define WIND_8_SUBBANDS_7_3 (int32_t)0x01223EBA /* 25 et 55 = 0x01223EBA */
93 #define WIND_8_SUBBANDS_7_4 (int32_t)0x0044EF48 /* 9 et 71 = 0x0044EF48 */
94 #define WIND_8_SUBBANDS_8_0 (int32_t)0x0041EC6A /* 8 et 72 = 0x0041EC6A */
95 #define WIND_8_SUBBANDS_8_1 (int32_t)0x01A7ECEF /* 24 et 56 = 0x01A7ECEF */
96 #define WIND_8_SUBBANDS_8_2 (int32_t)0x12CF6C75 /* 40 = 0x12CF6C75 */
97 #else
98 #define WIND_4_SUBBANDS_0_1                                          \
99   (int16_t)0x0166 /* gas32CoeffFor4SBs[8] = -gas32CoeffFor4SBs[32] = \
100                      0x01659F45 */
101 #define WIND_4_SUBBANDS_0_2                                                                     \
102   (int16_t)0x115B                           /* gas32CoeffFor4SBs[16] = -gas32CoeffFor4SBs[24] = \
103                                                0x115B1ED2 */
104 #define WIND_4_SUBBANDS_1_0 (int16_t)0x0012 /* gas32CoeffFor4SBs[1 et 39] = 0x001194E6 */
105 #define WIND_4_SUBBANDS_1_1 (int16_t)0x029E /* gas32CoeffFor4SBs[9 et 31] = 0x029DBAA3 */
106 #define WIND_4_SUBBANDS_1_2 (int16_t)0x18F5 /* gas32CoeffFor4SBs[17 et 23] = 0x18F55C90 */
107 #define WIND_4_SUBBANDS_1_3 (int16_t)0xF610 /* gas32CoeffFor4SBs[15 et 25] = 0xF60FAF37 */
108 #define WIND_4_SUBBANDS_1_4 (int16_t)0xFF9C /* gas32CoeffFor4SBs[7 et 33] = 0xFF9BB9D5 */
109 #define WIND_4_SUBBANDS_2_0 (int16_t)0x0031 /* gas32CoeffFor4SBs[2 et 38] = 0x0030E2D3 */
110 #define WIND_4_SUBBANDS_2_1 (int16_t)0x03B2 /* gas32CoeffFor4SBs[10 et 30] = 0x03B23341 */
111 #define WIND_4_SUBBANDS_2_2 (int16_t)0x1F91 /* gas32CoeffFor4SBs[18 et 22] = 0x1F91CA46 */
112 #define WIND_4_SUBBANDS_2_3 (int16_t)0xFC50 /* gas32CoeffFor4SBs[14 et 26] = 0xFC4F91D4 */
113 #define WIND_4_SUBBANDS_2_4 (int16_t)0x003D /* gas32CoeffFor4SBs[6 et 34] = 0x003D239B */
114 #define WIND_4_SUBBANDS_3_0 (int16_t)0x005A /* gas32CoeffFor4SBs[3 et 37] = 0x00599403 */
115 #define WIND_4_SUBBANDS_3_1 (int16_t)0x041F /* gas32CoeffFor4SBs[11 et 29] = 0x041EEE40 */
116 #define WIND_4_SUBBANDS_3_2 (int16_t)0x2413 /* gas32CoeffFor4SBs[19 et 21] = 0x2412F251 */
117 #define WIND_4_SUBBANDS_3_3 (int16_t)0x00C9 /* gas32CoeffFor4SBs[13 et 27] = 0x00C8F2BC */
118 #define WIND_4_SUBBANDS_3_4 (int16_t)0x0080 /* gas32CoeffFor4SBs[5 et 35] = 0x007F88E4 */
119 #define WIND_4_SUBBANDS_4_0 (int16_t)0x007E /* gas32CoeffFor4SBs[4 et 36] = 0x007DBCC8 */
120 #define WIND_4_SUBBANDS_4_1 (int16_t)0x0350 /* gas32CoeffFor4SBs[12 et 28] = 0x034FEE2C */
121 #define WIND_4_SUBBANDS_4_2 (int16_t)0x25AC /* gas32CoeffFor4SBs[20] = 25AC1FF2 */
122 
123 #define WIND_8_SUBBANDS_0_1 (int16_t)0x00B9 /* 16 0x12CF6C75 */
124 #define WIND_8_SUBBANDS_0_2 (int16_t)0x08B4 /* 32 0x08B4307A */
125 #define WIND_8_SUBBANDS_1_0 (int16_t)0x0005 /* 1 et 79 = 0x00052173 */
126 #define WIND_8_SUBBANDS_1_1 (int16_t)0x0107 /* 17 et 63 = 0x01071B96 */
127 #define WIND_8_SUBBANDS_1_2 (int16_t)0x0A9F /* 33 et 47 = 0x0A9F3E9A*/
128 #define WIND_8_SUBBANDS_1_3 (int16_t)0xF931 /* 31 et 49 = 0xF9312891 */
129 #define WIND_8_SUBBANDS_1_4 (int16_t)0xFF8D /* 15 et 65 = 0xFF8D6793 */
130 #define WIND_8_SUBBANDS_2_0 (int16_t)0x000B /* 2 et 78 = 0x000B3F71 */
131 #define WIND_8_SUBBANDS_2_1 (int16_t)0x0157 /* 18 et 62 = 0x0156B3CA */
132 #define WIND_8_SUBBANDS_2_2 (int16_t)0x0C7D /* 34 et 46 = 0x0C7D59B6 */
133 #define WIND_8_SUBBANDS_2_3 (int16_t)0xFB00 /* 30 et 50 = 0xFAFF95FC */
134 #define WIND_8_SUBBANDS_2_4 (int16_t)0xFFCA /* 14 et 66 = 0xFFC9F10E */
135 #define WIND_8_SUBBANDS_3_0 (int16_t)0x0012 /* 3 et 77 = 0x00122C7D*/
136 #define WIND_8_SUBBANDS_3_1 (int16_t)0x01A2 /* 19 et 61 = 0x01A1B38B */
137 #define WIND_8_SUBBANDS_3_2 (int16_t)0x0E3C /* 35 et 45 = 0x0E3BB16F */
138 #define WIND_8_SUBBANDS_3_3 (int16_t)0xFCA8 /* 29 et 51 = 0xFCA86E7E */
139 #define WIND_8_SUBBANDS_3_4 (int16_t)0xFFFA /* 13 et 67 = 0xFFFA2413 */
140 #define WIND_8_SUBBANDS_4_0 (int16_t)0x001B /* 4 et 66 = 0x001AFF89 */
141 #define WIND_8_SUBBANDS_4_1 (int16_t)0x01E0 /* 20 et 60 = 0x01E0224C */
142 #define WIND_8_SUBBANDS_4_2 (int16_t)0x0FC7 /* 36 et 44 = 0x0FC721F9 */
143 #define WIND_8_SUBBANDS_4_3 (int16_t)0xFE20 /* 28 et 52 = 0xFE20435D */
144 #define WIND_8_SUBBANDS_4_4 (int16_t)0x001E /* 12 et 68 = 0x001D8FD2 */
145 #define WIND_8_SUBBANDS_5_0 (int16_t)0x0025 /* 5 et 75 = 0x00255A62 */
146 #define WIND_8_SUBBANDS_5_1 (int16_t)0x0209 /* 21 et 59 = 0x0209291F */
147 #define WIND_8_SUBBANDS_5_2 (int16_t)0x110F /* 37 et 43 = 0x110ECEF0 */
148 #define WIND_8_SUBBANDS_5_3 (int16_t)0xFF5F /* 27 et  53 = 0xFF5EEB73 */
149 #define WIND_8_SUBBANDS_5_4 (int16_t)0x0035 /* 11 et 69 = 0x0034F8B6 */
150 #define WIND_8_SUBBANDS_6_0 (int16_t)0x0030 /* 6 et 74 = 0x003060F4 */
151 #define WIND_8_SUBBANDS_6_1 (int16_t)0x0214 /* 22 et 58 = 0x02138653 */
152 #define WIND_8_SUBBANDS_6_2 (int16_t)0x1204 /* 38 et 42 = 0x120435FA */
153 #define WIND_8_SUBBANDS_6_3 (int16_t)0x0060 /* 26 et 54 = 0x005FD0FF */
154 #define WIND_8_SUBBANDS_6_4 (int16_t)0x0041 /* 10 et 70 = 0x00415B75 */
155 #define WIND_8_SUBBANDS_7_0 (int16_t)0x003A /* 7 et 73 = 0x003A72E7 */
156 #define WIND_8_SUBBANDS_7_1 (int16_t)0x01F6 /* 23 et 57 = 0x01F5F424 */
157 #define WIND_8_SUBBANDS_7_2 (int16_t)0x129C /* 39 et 41 = 0x129C226F */
158 #define WIND_8_SUBBANDS_7_3 (int16_t)0x0122 /* 25 et 55 = 0x01223EBA */
159 #define WIND_8_SUBBANDS_7_4 (int16_t)0x0045 /* 9 et 71 = 0x0044EF48 */
160 #define WIND_8_SUBBANDS_8_0 (int16_t)0x0042 /* 8 et 72 = 0x0041EC6A */
161 #define WIND_8_SUBBANDS_8_1 (int16_t)0x01A8 /* 24 et 56 = 0x01A7ECEF */
162 #define WIND_8_SUBBANDS_8_2 (int16_t)0x12CF /* 40 = 0x12CF6C75 */
163 #endif
164 
165 #if (SBC_USE_ARM_PRAGMA == TRUE)
166 #pragma arm section zidata = "sbc_s32_analysis_section"
167 #endif
168 static int32_t s32DCTY[16] = {0};
169 static int32_t s32X[ENC_VX_BUFFER_SIZE / 2];
170 static int16_t* s16X = (int16_t*)s32X; /* s16X must be 32 bits aligned cf  SHIFTUP_X8_2*/
171 #if (SBC_USE_ARM_PRAGMA == TRUE)
172 #pragma arm section zidata
173 #endif
174 
175 /* This macro is for 4 subbands */
176 #define SHIFTUP_X4                                      \
177   {                                                     \
178     ps32X = (int32_t*)(s16X + EncMaxShiftCounter + 38); \
179     for (i = 0; i < 9; i++) {                           \
180       *ps32X = *(ps32X - 2 - (ShiftCounter >> 1));      \
181       ps32X--;                                          \
182       *ps32X = *(ps32X - 2 - (ShiftCounter >> 1));      \
183       ps32X--;                                          \
184     }                                                   \
185   }
186 #define SHIFTUP_X4_2                                            \
187   {                                                             \
188     ps32X = (int32_t*)(s16X + EncMaxShiftCounter + 38);         \
189     ps32X2 = (int32_t*)(s16X + (EncMaxShiftCounter << 1) + 78); \
190     for (i = 0; i < 9; i++) {                                   \
191       *ps32X = *(ps32X - 2 - (ShiftCounter >> 1));              \
192       *(ps32X2) = *(ps32X2 - 2 - (ShiftCounter >> 1));          \
193       ps32X--;                                                  \
194       ps32X2--;                                                 \
195       *ps32X = *(ps32X - 2 - (ShiftCounter >> 1));              \
196       *(ps32X2) = *(ps32X2 - 2 - (ShiftCounter >> 1));          \
197       ps32X--;                                                  \
198       ps32X2--;                                                 \
199     }                                                           \
200   }
201 
202 /* This macro is for 8 subbands */
203 #define SHIFTUP_X8                                      \
204   {                                                     \
205     ps32X = (int32_t*)(s16X + EncMaxShiftCounter + 78); \
206     for (i = 0; i < 9; i++) {                           \
207       *ps32X = *(ps32X - 4 - (ShiftCounter >> 1));      \
208       ps32X--;                                          \
209       *ps32X = *(ps32X - 4 - (ShiftCounter >> 1));      \
210       ps32X--;                                          \
211       *ps32X = *(ps32X - 4 - (ShiftCounter >> 1));      \
212       ps32X--;                                          \
213       *ps32X = *(ps32X - 4 - (ShiftCounter >> 1));      \
214       ps32X--;                                          \
215     }                                                   \
216   }
217 #define SHIFTUP_X8_2                                             \
218   {                                                              \
219     ps32X = (int32_t*)(s16X + EncMaxShiftCounter + 78);          \
220     ps32X2 = (int32_t*)(s16X + (EncMaxShiftCounter << 1) + 158); \
221     for (i = 0; i < 9; i++) {                                    \
222       *ps32X = *(ps32X - 4 - (ShiftCounter >> 1));               \
223       *(ps32X2) = *(ps32X2 - 4 - (ShiftCounter >> 1));           \
224       ps32X--;                                                   \
225       ps32X2--;                                                  \
226       *ps32X = *(ps32X - 4 - (ShiftCounter >> 1));               \
227       *(ps32X2) = *(ps32X2 - 4 - (ShiftCounter >> 1));           \
228       ps32X--;                                                   \
229       ps32X2--;                                                  \
230       *ps32X = *(ps32X - 4 - (ShiftCounter >> 1));               \
231       *(ps32X2) = *(ps32X2 - 4 - (ShiftCounter >> 1));           \
232       ps32X--;                                                   \
233       ps32X2--;                                                  \
234       *ps32X = *(ps32X - 4 - (ShiftCounter >> 1));               \
235       *(ps32X2) = *(ps32X2 - 4 - (ShiftCounter >> 1));           \
236       ps32X--;                                                   \
237       ps32X2--;                                                  \
238     }                                                            \
239   }
240 
241 #if (SBC_ARM_ASM_OPT == TRUE)
242 #define WINDOW_ACCU_8_0 \
243   {                     \
244     __asm {\
245         MUL s32Hi,WIND_8_SUBBANDS_0_1,(s16X[ChOffset+16]-s16X[ChOffset+64]);\
246         MLA s32Hi,WIND_8_SUBBANDS_0_2,(s16X[ChOffset+32]-s16X[ChOffset+48]),s32Hi;\
247         MOV s32DCTY[0],s32Hi;            \
248     }                   \
249   }
250 #define WINDOW_ACCU_8_1_15 \
251   {                        \
252     __asm {\
253         MUL s32Hi,WIND_8_SUBBANDS_1_0,s16X[ChOffset+1];\
254         MUL s32Hi2,WIND_8_SUBBANDS_1_0,s16X[ChOffset+64+15];\
255         MLA s32Hi,WIND_8_SUBBANDS_1_1,s16X[ChOffset+16+1],s32Hi;\
256         MLA s32Hi2,WIND_8_SUBBANDS_1_1,s16X[ChOffset+48+15],s32Hi2;\
257         MLA s32Hi,WIND_8_SUBBANDS_1_2,s16X[ChOffset+32+1],s32Hi;\
258         MLA s32Hi2,WIND_8_SUBBANDS_1_2,s16X[ChOffset+32+15],s32Hi2;\
259         MLA s32Hi,WIND_8_SUBBANDS_1_3,s16X[ChOffset+48+1],s32Hi;\
260         MLA s32Hi2,WIND_8_SUBBANDS_1_3,s16X[ChOffset+16+15],s32Hi2;\
261         MLA s32Hi,WIND_8_SUBBANDS_1_4,s16X[ChOffset+64+1],s32Hi;\
262         MLA s32Hi2,WIND_8_SUBBANDS_1_4,s16X[ChOffset+15],s32Hi2;\
263         MOV s32DCTY[1],s32Hi;\
264         MOV s32DCTY[15],s32Hi2;               \
265     }                      \
266   }
267 #define WINDOW_ACCU_8_2_14 \
268   {                        \
269     __asm {\
270         MUL s32Hi,WIND_8_SUBBANDS_2_0,s16X[ChOffset+2];\
271         MUL s32Hi2,WIND_8_SUBBANDS_2_0,s16X[ChOffset+64+14];\
272         MLA s32Hi,WIND_8_SUBBANDS_2_1,s16X[ChOffset+16+2],s32Hi;\
273         MLA s32Hi2,WIND_8_SUBBANDS_2_1,s16X[ChOffset+48+14],s32Hi2;\
274         MLA s32Hi,WIND_8_SUBBANDS_2_2,s16X[ChOffset+32+2],s32Hi;\
275         MLA s32Hi2,WIND_8_SUBBANDS_2_2,s16X[ChOffset+32+14],s32Hi2;\
276         MLA s32Hi,WIND_8_SUBBANDS_2_3,s16X[ChOffset+48+2],s32Hi;\
277         MLA s32Hi2,WIND_8_SUBBANDS_2_3,s16X[ChOffset+16+14],s32Hi2;\
278         MLA s32Hi,WIND_8_SUBBANDS_2_4,s16X[ChOffset+64+2],s32Hi;\
279         MLA s32Hi2,WIND_8_SUBBANDS_2_4,s16X[ChOffset+14],s32Hi2;\
280         MOV s32DCTY[2],s32Hi;\
281         MOV s32DCTY[14],s32Hi2;               \
282     }                      \
283   }
284 #define WINDOW_ACCU_8_3_13 \
285   {                        \
286     __asm {\
287         MUL s32Hi,WIND_8_SUBBANDS_3_0,s16X[ChOffset+3];\
288         MUL s32Hi2,WIND_8_SUBBANDS_3_0,s16X[ChOffset+64+13];\
289         MLA s32Hi,WIND_8_SUBBANDS_3_1,s16X[ChOffset+16+3],s32Hi;\
290         MLA s32Hi2,WIND_8_SUBBANDS_3_1,s16X[ChOffset+48+13],s32Hi2;\
291         MLA s32Hi,WIND_8_SUBBANDS_3_2,s16X[ChOffset+32+3],s32Hi;\
292         MLA s32Hi2,WIND_8_SUBBANDS_3_2,s16X[ChOffset+32+13],s32Hi2;\
293         MLA s32Hi,WIND_8_SUBBANDS_3_3,s16X[ChOffset+48+3],s32Hi;\
294         MLA s32Hi2,WIND_8_SUBBANDS_3_3,s16X[ChOffset+16+13],s32Hi2;\
295         MLA s32Hi,WIND_8_SUBBANDS_3_4,s16X[ChOffset+64+3],s32Hi;\
296         MLA s32Hi2,WIND_8_SUBBANDS_3_4,s16X[ChOffset+13],s32Hi2;\
297         MOV s32DCTY[3],s32Hi;\
298         MOV s32DCTY[13],s32Hi2;               \
299     }                      \
300   }
301 #define WINDOW_ACCU_8_4_12 \
302   {                        \
303     __asm {\
304         MUL s32Hi,WIND_8_SUBBANDS_4_0,s16X[ChOffset+4];\
305         MUL s32Hi2,WIND_8_SUBBANDS_4_0,s16X[ChOffset+64+12];\
306         MLA s32Hi,WIND_8_SUBBANDS_4_1,s16X[ChOffset+16+4],s32Hi;\
307         MLA s32Hi2,WIND_8_SUBBANDS_4_1,s16X[ChOffset+48+12],s32Hi2;\
308         MLA s32Hi,WIND_8_SUBBANDS_4_2,s16X[ChOffset+32+4],s32Hi;\
309         MLA s32Hi2,WIND_8_SUBBANDS_4_2,s16X[ChOffset+32+12],s32Hi2;\
310         MLA s32Hi,WIND_8_SUBBANDS_4_3,s16X[ChOffset+48+4],s32Hi;\
311         MLA s32Hi2,WIND_8_SUBBANDS_4_3,s16X[ChOffset+16+12],s32Hi2;\
312         MLA s32Hi,WIND_8_SUBBANDS_4_4,s16X[ChOffset+64+4],s32Hi;\
313         MLA s32Hi2,WIND_8_SUBBANDS_4_4,s16X[ChOffset+12],s32Hi2;\
314         MOV s32DCTY[4],s32Hi;\
315         MOV s32DCTY[12],s32Hi2;               \
316     }                      \
317   }
318 #define WINDOW_ACCU_8_5_11 \
319   {                        \
320     __asm {\
321         MUL s32Hi,WIND_8_SUBBANDS_5_0,s16X[ChOffset+5];\
322         MUL s32Hi2,WIND_8_SUBBANDS_5_0,s16X[ChOffset+64+11];\
323         MLA s32Hi,WIND_8_SUBBANDS_5_1,s16X[ChOffset+16+5],s32Hi;\
324         MLA s32Hi2,WIND_8_SUBBANDS_5_1,s16X[ChOffset+48+11],s32Hi2;\
325         MLA s32Hi,WIND_8_SUBBANDS_5_2,s16X[ChOffset+32+5],s32Hi;\
326         MLA s32Hi2,WIND_8_SUBBANDS_5_2,s16X[ChOffset+32+11],s32Hi2;\
327         MLA s32Hi,WIND_8_SUBBANDS_5_3,s16X[ChOffset+48+5],s32Hi;\
328         MLA s32Hi2,WIND_8_SUBBANDS_5_3,s16X[ChOffset+16+11],s32Hi2;\
329         MLA s32Hi,WIND_8_SUBBANDS_5_4,s16X[ChOffset+64+5],s32Hi;\
330         MLA s32Hi2,WIND_8_SUBBANDS_5_4,s16X[ChOffset+11],s32Hi2;\
331         MOV s32DCTY[5],s32Hi;\
332         MOV s32DCTY[11],s32Hi2;               \
333     }                      \
334   }
335 #define WINDOW_ACCU_8_6_10 \
336   {                        \
337     __asm {\
338         MUL s32Hi,WIND_8_SUBBANDS_6_0,s16X[ChOffset+6];\
339         MUL s32Hi2,WIND_8_SUBBANDS_6_0,s16X[ChOffset+64+10];\
340         MLA s32Hi,WIND_8_SUBBANDS_6_1,s16X[ChOffset+16+6],s32Hi;\
341         MLA s32Hi2,WIND_8_SUBBANDS_6_1,s16X[ChOffset+48+10],s32Hi2;\
342         MLA s32Hi,WIND_8_SUBBANDS_6_2,s16X[ChOffset+32+6],s32Hi;\
343         MLA s32Hi2,WIND_8_SUBBANDS_6_2,s16X[ChOffset+32+10],s32Hi2;\
344         MLA s32Hi,WIND_8_SUBBANDS_6_3,s16X[ChOffset+48+6],s32Hi;\
345         MLA s32Hi2,WIND_8_SUBBANDS_6_3,s16X[ChOffset+16+10],s32Hi2;\
346         MLA s32Hi,WIND_8_SUBBANDS_6_4,s16X[ChOffset+64+6],s32Hi;\
347         MLA s32Hi2,WIND_8_SUBBANDS_6_4,s16X[ChOffset+10],s32Hi2;\
348         MOV s32DCTY[6],s32Hi;\
349         MOV s32DCTY[10],s32Hi2;               \
350     }                      \
351   }
352 #define WINDOW_ACCU_8_7_9 \
353   {                       \
354     __asm {\
355         MUL s32Hi,WIND_8_SUBBANDS_7_0,s16X[ChOffset+7];\
356         MUL s32Hi2,WIND_8_SUBBANDS_7_0,s16X[ChOffset+64+9];\
357         MLA s32Hi,WIND_8_SUBBANDS_7_1,s16X[ChOffset+16+7],s32Hi;\
358         MLA s32Hi2,WIND_8_SUBBANDS_7_1,s16X[ChOffset+48+9],s32Hi2;\
359         MLA s32Hi,WIND_8_SUBBANDS_7_2,s16X[ChOffset+32+7],s32Hi;\
360         MLA s32Hi2,WIND_8_SUBBANDS_7_2,s16X[ChOffset+32+9],s32Hi2;\
361         MLA s32Hi,WIND_8_SUBBANDS_7_3,s16X[ChOffset+48+7],s32Hi;\
362         MLA s32Hi2,WIND_8_SUBBANDS_7_3,s16X[ChOffset+16+9],s32Hi2;\
363         MLA s32Hi,WIND_8_SUBBANDS_7_4,s16X[ChOffset+64+7],s32Hi;\
364         MLA s32Hi2,WIND_8_SUBBANDS_7_4,s16X[ChOffset+9],s32Hi2;\
365         MOV s32DCTY[7],s32Hi;\
366         MOV s32DCTY[9],s32Hi2;              \
367     }                     \
368   }
369 #define WINDOW_ACCU_8_8 \
370   {                     \
371     __asm {\
372         MUL s32Hi,WIND_8_SUBBANDS_8_0,(s16X[ChOffset+8]+s16X[ChOffset+8+64]);\
373         MLA s32Hi,WIND_8_SUBBANDS_8_1,(s16X[ChOffset+8+16]+s16X[ChOffset+8+64]),s32Hi;\
374         MLA s32Hi,WIND_8_SUBBANDS_8_2,s16X[ChOffset+8+32],s32Hi;\
375         MOV s32DCTY[8],s32Hi;            \
376     }                   \
377   }
378 #define WINDOW_ACCU_4_0 \
379   {                     \
380     __asm {\
381         MUL s32Hi,WIND_4_SUBBANDS_0_1,(s16X[ChOffset+8]-s16X[ChOffset+32]);\
382         MLA s32Hi,WIND_4_SUBBANDS_0_2,(s16X[ChOffset+16]-s16X[ChOffset+24]),s32Hi;\
383         MOV s32DCTY[0],s32Hi;            \
384     }                   \
385   }
386 #define WINDOW_ACCU_4_1_7 \
387   {                       \
388     __asm {\
389         MUL s32Hi,WIND_4_SUBBANDS_1_0,s16X[ChOffset+1];\
390         MUL s32Hi2,WIND_4_SUBBANDS_1_0,s16X[ChOffset+32+7];\
391         MLA s32Hi,WIND_4_SUBBANDS_1_1,s16X[ChOffset+8+1],s32Hi;\
392         MLA s32Hi2,WIND_4_SUBBANDS_1_1,s16X[ChOffset+24+7],s32Hi2;\
393         MLA s32Hi,WIND_4_SUBBANDS_1_2,s16X[ChOffset+16+1],s32Hi;\
394         MLA s32Hi2,WIND_4_SUBBANDS_1_2,s16X[ChOffset+16+7],s32Hi2;\
395         MLA s32Hi,WIND_4_SUBBANDS_1_3,s16X[ChOffset+24+1],s32Hi;\
396         MLA s32Hi2,WIND_4_SUBBANDS_1_3,s16X[ChOffset+8+7],s32Hi2;\
397         MLA s32Hi,WIND_4_SUBBANDS_1_4,s16X[ChOffset+32+1],s32Hi;\
398         MLA s32Hi2,WIND_4_SUBBANDS_1_4,s16X[ChOffset+7],s32Hi2;\
399         MOV s32DCTY[1],s32Hi;\
400         MOV s32DCTY[7],s32Hi2;              \
401     }                     \
402   }
403 #define WINDOW_ACCU_4_2_6 \
404   {                       \
405     __asm {\
406         MUL s32Hi,WIND_4_SUBBANDS_2_0,s16X[ChOffset+2];\
407         MUL s32Hi2,WIND_4_SUBBANDS_2_0,s16X[ChOffset+32+6];\
408         MLA s32Hi,WIND_4_SUBBANDS_2_1,s16X[ChOffset+8+2],s32Hi;\
409         MLA s32Hi2,WIND_4_SUBBANDS_2_1,s16X[ChOffset+24+6],s32Hi2;\
410         MLA s32Hi,WIND_4_SUBBANDS_2_2,s16X[ChOffset+16+2],s32Hi;\
411         MLA s32Hi2,WIND_4_SUBBANDS_2_2,s16X[ChOffset+16+6],s32Hi2;\
412         MLA s32Hi,WIND_4_SUBBANDS_2_3,s16X[ChOffset+24+2],s32Hi;\
413         MLA s32Hi2,WIND_4_SUBBANDS_2_3,s16X[ChOffset+8+6],s32Hi2;\
414         MLA s32Hi,WIND_4_SUBBANDS_2_4,s16X[ChOffset+32+2],s32Hi;\
415         MLA s32Hi2,WIND_4_SUBBANDS_2_4,s16X[ChOffset+6],s32Hi2;\
416         MOV s32DCTY[2],s32Hi;\
417         MOV s32DCTY[6],s32Hi2;              \
418     }                     \
419   }
420 #define WINDOW_ACCU_4_3_5 \
421   {                       \
422     __asm {\
423         MUL s32Hi,WIND_4_SUBBANDS_3_0,s16X[ChOffset+3];\
424         MUL s32Hi2,WIND_4_SUBBANDS_3_0,s16X[ChOffset+32+5];\
425         MLA s32Hi,WIND_4_SUBBANDS_3_1,s16X[ChOffset+8+3],s32Hi;\
426         MLA s32Hi2,WIND_4_SUBBANDS_3_1,s16X[ChOffset+24+5],s32Hi2;\
427         MLA s32Hi,WIND_4_SUBBANDS_3_2,s16X[ChOffset+16+3],s32Hi;\
428         MLA s32Hi2,WIND_4_SUBBANDS_3_2,s16X[ChOffset+16+5],s32Hi2;\
429         MLA s32Hi,WIND_4_SUBBANDS_3_3,s16X[ChOffset+24+3],s32Hi;\
430         MLA s32Hi2,WIND_4_SUBBANDS_3_3,s16X[ChOffset+8+5],s32Hi2;\
431         MLA s32Hi,WIND_4_SUBBANDS_3_4,s16X[ChOffset+32+3],s32Hi;\
432         MLA s32Hi2,WIND_4_SUBBANDS_3_4,s16X[ChOffset+5],s32Hi2;\
433         MOV s32DCTY[3],s32Hi;\
434         MOV s32DCTY[5],s32Hi2;              \
435     }                     \
436   }
437 #define WINDOW_ACCU_4_4 \
438   {                     \
439     __asm {\
440         MUL s32Hi,WIND_4_SUBBANDS_4_0,(s16X[ChOffset+4]+s16X[ChOffset+4+32]);\
441         MLA s32Hi,WIND_4_SUBBANDS_4_1,(s16X[ChOffset+4+8]+s16X[ChOffset+4+24]),s32Hi;\
442         MLA s32Hi,WIND_4_SUBBANDS_4_2,s16X[ChOffset+4+16],s32Hi;\
443         MOV s32DCTY[4],s32Hi;            \
444     }                   \
445   }
446 
447 #define WINDOW_PARTIAL_4 \
448   {                      \
449     WINDOW_ACCU_4_0;     \
450     WINDOW_ACCU_4_1_7;   \
451     WINDOW_ACCU_4_2_6;   \
452     WINDOW_ACCU_4_3_5;   \
453     WINDOW_ACCU_4_4;     \
454   }
455 
456 #define WINDOW_PARTIAL_8 \
457   {                      \
458     WINDOW_ACCU_8_0;     \
459     WINDOW_ACCU_8_1_15;  \
460     WINDOW_ACCU_8_2_14;  \
461     WINDOW_ACCU_8_3_13;  \
462     WINDOW_ACCU_8_4_12;  \
463     WINDOW_ACCU_8_5_11;  \
464     WINDOW_ACCU_8_6_10;  \
465     WINDOW_ACCU_8_7_9;   \
466     WINDOW_ACCU_8_8;     \
467   }
468 
469 #else
470 #if (SBC_IPAQ_OPT == TRUE)
471 
472 #if (SBC_IS_64_MULT_IN_WINDOW_ACCU == TRUE)
473 #define WINDOW_ACCU_8_0                                                                            \
474   {                                                                                                \
475     s64Temp = (int64_t)WIND_8_SUBBANDS_0_1 * (int64_t)(s16X[ChOffset + 16] - s16X[ChOffset + 64]); \
476     s64Temp +=                                                                                     \
477             (int64_t)WIND_8_SUBBANDS_0_2 * (int64_t)(s16X[ChOffset + 32] - s16X[ChOffset + 48]);   \
478     s32DCTY[0] = (int32_t)(s64Temp >> 16);                                                         \
479   }
480 #define WINDOW_ACCU_8_1_15                                                        \
481   {                                                                               \
482     s64Temp = (int64_t)WIND_8_SUBBANDS_1_0 * (int64_t)s16X[ChOffset + 1];         \
483     s64Temp2 = (int64_t)WIND_8_SUBBANDS_1_0 * (int64_t)s16X[ChOffset + 64 + 15];  \
484     s64Temp += (int64_t)WIND_8_SUBBANDS_1_1 * (int64_t)s16X[ChOffset + 16 + 1];   \
485     s64Temp2 += (int64_t)WIND_8_SUBBANDS_1_1 * (int64_t)s16X[ChOffset + 48 + 15]; \
486     s64Temp += (int64_t)WIND_8_SUBBANDS_1_2 * (int64_t)s16X[ChOffset + 32 + 1];   \
487     s64Temp2 += (int64_t)WIND_8_SUBBANDS_1_2 * (int64_t)s16X[ChOffset + 32 + 15]; \
488     s64Temp += (int64_t)WIND_8_SUBBANDS_1_3 * (int64_t)s16X[ChOffset + 48 + 1];   \
489     s64Temp2 += (int64_t)WIND_8_SUBBANDS_1_3 * (int64_t)s16X[ChOffset + 16 + 15]; \
490     s64Temp += (int64_t)WIND_8_SUBBANDS_1_4 * (int64_t)s16X[ChOffset + 64 + 1];   \
491     s64Temp2 += (int64_t)WIND_8_SUBBANDS_1_4 * (int64_t)s16X[ChOffset + 15];      \
492     s32DCTY[1] = (int32_t)(s64Temp >> 16);                                        \
493     s32DCTY[15] = (int32_t)(s64Temp2 >> 16);                                      \
494   }
495 #define WINDOW_ACCU_8_2_14                                                        \
496   {                                                                               \
497     s64Temp = (int64_t)WIND_8_SUBBANDS_2_0 * (int64_t)s16X[ChOffset + 2];         \
498     s64Temp2 = (int64_t)WIND_8_SUBBANDS_2_0 * (int64_t)s16X[ChOffset + 64 + 14];  \
499     s64Temp += (int64_t)WIND_8_SUBBANDS_2_1 * (int64_t)s16X[ChOffset + 16 + 2];   \
500     s64Temp2 += (int64_t)WIND_8_SUBBANDS_2_1 * (int64_t)s16X[ChOffset + 48 + 14]; \
501     s64Temp += (int64_t)WIND_8_SUBBANDS_2_2 * (int64_t)s16X[ChOffset + 32 + 2];   \
502     s64Temp2 += (int64_t)WIND_8_SUBBANDS_2_2 * (int64_t)s16X[ChOffset + 32 + 14]; \
503     s64Temp += (int64_t)WIND_8_SUBBANDS_2_3 * (int64_t)s16X[ChOffset + 48 + 2];   \
504     s64Temp2 += (int64_t)WIND_8_SUBBANDS_2_3 * (int64_t)s16X[ChOffset + 16 + 14]; \
505     s64Temp += (int64_t)WIND_8_SUBBANDS_2_4 * (int64_t)s16X[ChOffset + 64 + 2];   \
506     s64Temp2 += (int64_t)WIND_8_SUBBANDS_2_4 * (int64_t)s16X[ChOffset + 14];      \
507     s32DCTY[2] = (int32_t)(s64Temp >> 16);                                        \
508     s32DCTY[14] = (int32_t)(s64Temp2 >> 16);                                      \
509   }
510 #define WINDOW_ACCU_8_3_13                                                        \
511   {                                                                               \
512     s64Temp = (int64_t)WIND_8_SUBBANDS_3_0 * (int64_t)s16X[ChOffset + 3];         \
513     s64Temp2 = (int64_t)WIND_8_SUBBANDS_3_0 * (int64_t)s16X[ChOffset + 64 + 13];  \
514     s64Temp += (int64_t)WIND_8_SUBBANDS_3_1 * (int64_t)s16X[ChOffset + 16 + 3];   \
515     s64Temp2 += (int64_t)WIND_8_SUBBANDS_3_1 * (int64_t)s16X[ChOffset + 48 + 13]; \
516     s64Temp += (int64_t)WIND_8_SUBBANDS_3_2 * (int64_t)s16X[ChOffset + 32 + 3];   \
517     s64Temp2 += (int64_t)WIND_8_SUBBANDS_3_2 * (int64_t)s16X[ChOffset + 32 + 13]; \
518     s64Temp += (int64_t)WIND_8_SUBBANDS_3_3 * (int64_t)s16X[ChOffset + 48 + 3];   \
519     s64Temp2 += (int64_t)WIND_8_SUBBANDS_3_3 * (int64_t)s16X[ChOffset + 16 + 13]; \
520     s64Temp += (int64_t)WIND_8_SUBBANDS_3_4 * (int64_t)s16X[ChOffset + 64 + 3];   \
521     s64Temp2 += (int64_t)WIND_8_SUBBANDS_3_4 * (int64_t)s16X[ChOffset + 13];      \
522     s32DCTY[3] = (int32_t)(s64Temp >> 16);                                        \
523     s32DCTY[13] = (int32_t)(s64Temp2 >> 16);                                      \
524   }
525 #define WINDOW_ACCU_8_4_12                                                        \
526   {                                                                               \
527     s64Temp = (int64_t)WIND_8_SUBBANDS_4_0 * (int64_t)s16X[ChOffset + 4];         \
528     s64Temp2 = (int64_t)WIND_8_SUBBANDS_4_0 * (int64_t)s16X[ChOffset + 64 + 12];  \
529     s64Temp += (int64_t)WIND_8_SUBBANDS_4_1 * (int64_t)s16X[ChOffset + 16 + 4];   \
530     s64Temp2 += (int64_t)WIND_8_SUBBANDS_4_1 * (int64_t)s16X[ChOffset + 48 + 12]; \
531     s64Temp += (int64_t)WIND_8_SUBBANDS_4_2 * (int64_t)s16X[ChOffset + 32 + 4];   \
532     s64Temp2 += (int64_t)WIND_8_SUBBANDS_4_2 * (int64_t)s16X[ChOffset + 32 + 12]; \
533     s64Temp += (int64_t)WIND_8_SUBBANDS_4_3 * (int64_t)s16X[ChOffset + 48 + 4];   \
534     s64Temp2 += (int64_t)WIND_8_SUBBANDS_4_3 * (int64_t)s16X[ChOffset + 16 + 12]; \
535     s64Temp += (int64_t)WIND_8_SUBBANDS_4_4 * (int64_t)s16X[ChOffset + 64 + 4];   \
536     s64Temp2 += (int64_t)WIND_8_SUBBANDS_4_4 * (int64_t)s16X[ChOffset + 12];      \
537     s32DCTY[4] = (int32_t)(s64Temp >> 16);                                        \
538     s32DCTY[12] = (int32_t)(s64Temp2 >> 16);                                      \
539   }
540 #define WINDOW_ACCU_8_5_11                                                        \
541   {                                                                               \
542     s64Temp = (int64_t)WIND_8_SUBBANDS_5_0 * (int64_t)s16X[ChOffset + 5];         \
543     s64Temp2 = (int64_t)WIND_8_SUBBANDS_5_0 * (int64_t)s16X[ChOffset + 64 + 11];  \
544     s64Temp += (int64_t)WIND_8_SUBBANDS_5_1 * (int64_t)s16X[ChOffset + 16 + 5];   \
545     s64Temp2 += (int64_t)WIND_8_SUBBANDS_5_1 * (int64_t)s16X[ChOffset + 48 + 11]; \
546     s64Temp += (int64_t)WIND_8_SUBBANDS_5_2 * (int64_t)s16X[ChOffset + 32 + 5];   \
547     s64Temp2 += (int64_t)WIND_8_SUBBANDS_5_2 * (int64_t)s16X[ChOffset + 32 + 11]; \
548     s64Temp += (int64_t)WIND_8_SUBBANDS_5_3 * (int64_t)s16X[ChOffset + 48 + 5];   \
549     s64Temp2 += (int64_t)WIND_8_SUBBANDS_5_3 * (int64_t)s16X[ChOffset + 16 + 11]; \
550     s64Temp += (int64_t)WIND_8_SUBBANDS_5_4 * (int64_t)s16X[ChOffset + 64 + 5];   \
551     s64Temp2 += (int64_t)WIND_8_SUBBANDS_5_4 * (int64_t)s16X[ChOffset + 11];      \
552     s32DCTY[5] = (int32_t)(s64Temp >> 16);                                        \
553     s32DCTY[11] = (int32_t)(s64Temp2 >> 16);                                      \
554   }
555 #define WINDOW_ACCU_8_6_10                                                        \
556   {                                                                               \
557     s64Temp = (int64_t)WIND_8_SUBBANDS_6_0 * (int64_t)s16X[ChOffset + 6];         \
558     s64Temp2 = (int64_t)WIND_8_SUBBANDS_6_0 * (int64_t)s16X[ChOffset + 64 + 10];  \
559     s64Temp += (int64_t)WIND_8_SUBBANDS_6_1 * (int64_t)s16X[ChOffset + 16 + 6];   \
560     s64Temp2 += (int64_t)WIND_8_SUBBANDS_6_1 * (int64_t)s16X[ChOffset + 48 + 10]; \
561     s64Temp += (int64_t)WIND_8_SUBBANDS_6_2 * (int64_t)s16X[ChOffset + 32 + 6];   \
562     s64Temp2 += (int64_t)WIND_8_SUBBANDS_6_2 * (int64_t)s16X[ChOffset + 32 + 10]; \
563     s64Temp += (int64_t)WIND_8_SUBBANDS_6_3 * (int64_t)s16X[ChOffset + 48 + 6];   \
564     s64Temp2 += (int64_t)WIND_8_SUBBANDS_6_3 * (int64_t)s16X[ChOffset + 16 + 10]; \
565     s64Temp += (int64_t)WIND_8_SUBBANDS_6_4 * (int64_t)s16X[ChOffset + 64 + 6];   \
566     s64Temp2 += (int64_t)WIND_8_SUBBANDS_6_4 * (int64_t)s16X[ChOffset + 10];      \
567     s32DCTY[6] = (int32_t)(s64Temp >> 16);                                        \
568     s32DCTY[10] = (int32_t)(s64Temp2 >> 16);                                      \
569   }
570 #define WINDOW_ACCU_8_7_9                                                        \
571   {                                                                              \
572     s64Temp = (int64_t)WIND_8_SUBBANDS_7_0 * (int64_t)s16X[ChOffset + 7];        \
573     s64Temp2 = (int64_t)WIND_8_SUBBANDS_7_0 * (int64_t)s16X[ChOffset + 64 + 9];  \
574     s64Temp += (int64_t)WIND_8_SUBBANDS_7_1 * (int64_t)s16X[ChOffset + 16 + 7];  \
575     s64Temp2 += (int64_t)WIND_8_SUBBANDS_7_1 * (int64_t)s16X[ChOffset + 48 + 9]; \
576     s64Temp += (int64_t)WIND_8_SUBBANDS_7_2 * (int64_t)s16X[ChOffset + 32 + 7];  \
577     s64Temp2 += (int64_t)WIND_8_SUBBANDS_7_2 * (int64_t)s16X[ChOffset + 32 + 9]; \
578     s64Temp += (int64_t)WIND_8_SUBBANDS_7_3 * (int64_t)s16X[ChOffset + 48 + 7];  \
579     s64Temp2 += (int64_t)WIND_8_SUBBANDS_7_3 * (int64_t)s16X[ChOffset + 16 + 9]; \
580     s64Temp += (int64_t)WIND_8_SUBBANDS_7_4 * (int64_t)s16X[ChOffset + 64 + 7];  \
581     s64Temp2 += (int64_t)WIND_8_SUBBANDS_7_4 * (int64_t)s16X[ChOffset + 9];      \
582     s32DCTY[7] = (int32_t)(s64Temp >> 16);                                       \
583     s32DCTY[9] = (int32_t)(s64Temp2 >> 16);                                      \
584   }
585 #define WINDOW_ACCU_8_8                                                         \
586   {                                                                             \
587     s64Temp = (int64_t)WIND_8_SUBBANDS_8_0 *                                    \
588               (int64_t)(s16X[ChOffset + 8] + s16X[ChOffset + 64 + 8]);          \
589     s64Temp += (int64_t)WIND_8_SUBBANDS_8_1 *                                   \
590                (int64_t)(s16X[ChOffset + 16 + 8] + s16X[ChOffset + 48 + 8]);    \
591     s64Temp += (int64_t)WIND_8_SUBBANDS_8_2 * (int64_t)s16X[ChOffset + 32 + 8]; \
592     s32DCTY[8] = (int32_t)(s64Temp >> 16);                                      \
593   }
594 #define WINDOW_ACCU_4_0                                                                           \
595   {                                                                                               \
596     s64Temp = (int64_t)WIND_4_SUBBANDS_0_1 * (int64_t)(s16X[ChOffset + 8] - s16X[ChOffset + 32]); \
597     s64Temp +=                                                                                    \
598             (int64_t)WIND_4_SUBBANDS_0_2 * (int64_t)(s16X[ChOffset + 16] - s16X[ChOffset + 24]);  \
599     s32DCTY[0] = (int32_t)(s64Temp >> 16);                                                        \
600   }
601 #define WINDOW_ACCU_4_1_7                                                        \
602   {                                                                              \
603     s64Temp = (int64_t)WIND_4_SUBBANDS_1_0 * (int64_t)s16X[ChOffset + 1];        \
604     s64Temp2 = (int64_t)WIND_4_SUBBANDS_1_0 * (int64_t)s16X[ChOffset + 32 + 7];  \
605     s64Temp += (int64_t)WIND_4_SUBBANDS_1_1 * (int64_t)s16X[ChOffset + 8 + 1];   \
606     s64Temp2 += (int64_t)WIND_4_SUBBANDS_1_1 * (int64_t)s16X[ChOffset + 24 + 7]; \
607     s64Temp += (int64_t)WIND_4_SUBBANDS_1_2 * (int64_t)s16X[ChOffset + 16 + 1];  \
608     s64Temp2 += (int64_t)WIND_4_SUBBANDS_1_2 * (int64_t)s16X[ChOffset + 16 + 7]; \
609     s64Temp += (int64_t)WIND_4_SUBBANDS_1_3 * (int64_t)s16X[ChOffset + 24 + 1];  \
610     s64Temp2 += (int64_t)WIND_4_SUBBANDS_1_3 * (int64_t)s16X[ChOffset + 8 + 7];  \
611     s64Temp += (int64_t)WIND_4_SUBBANDS_1_4 * (int64_t)s16X[ChOffset + 32 + 1];  \
612     s64Temp2 += (int64_t)WIND_4_SUBBANDS_1_4 * (int64_t)s16X[ChOffset + 7];      \
613     s32DCTY[1] = (int32_t)(s64Temp >> 16);                                       \
614     s32DCTY[7] = (int32_t)(s64Temp2 >> 16);                                      \
615   }
616 #define WINDOW_ACCU_4_2_6                                                        \
617   {                                                                              \
618     s64Temp = (int64_t)WIND_4_SUBBANDS_2_0 * (int64_t)s16X[ChOffset + 2];        \
619     s64Temp2 = (int64_t)WIND_4_SUBBANDS_2_0 * (int64_t)s16X[ChOffset + 32 + 6];  \
620     s64Temp += (int64_t)WIND_4_SUBBANDS_2_1 * (int64_t)s16X[ChOffset + 8 + 2];   \
621     s64Temp2 += (int64_t)WIND_4_SUBBANDS_2_1 * (int64_t)s16X[ChOffset + 24 + 6]; \
622     s64Temp += (int64_t)WIND_4_SUBBANDS_2_2 * (int64_t)s16X[ChOffset + 16 + 2];  \
623     s64Temp2 += (int64_t)WIND_4_SUBBANDS_2_2 * (int64_t)s16X[ChOffset + 16 + 6]; \
624     s64Temp += (int64_t)WIND_4_SUBBANDS_2_3 * (int64_t)s16X[ChOffset + 24 + 2];  \
625     s64Temp2 += (int64_t)WIND_4_SUBBANDS_2_3 * (int64_t)s16X[ChOffset + 8 + 6];  \
626     s64Temp += (int64_t)WIND_4_SUBBANDS_2_4 * (int64_t)s16X[ChOffset + 32 + 2];  \
627     s64Temp2 += (int64_t)WIND_4_SUBBANDS_2_4 * (int64_t)s16X[ChOffset + 6];      \
628     s32DCTY[2] = (int32_t)(s64Temp >> 16);                                       \
629     s32DCTY[6] = (int32_t)(s64Temp2 >> 16);                                      \
630   }
631 #define WINDOW_ACCU_4_3_5                                                        \
632   {                                                                              \
633     s64Temp = (int64_t)WIND_4_SUBBANDS_3_0 * (int64_t)s16X[ChOffset + 3];        \
634     s64Temp2 = (int64_t)WIND_4_SUBBANDS_3_0 * (int64_t)s16X[ChOffset + 32 + 5];  \
635     s64Temp += (int64_t)WIND_4_SUBBANDS_3_1 * (int64_t)s16X[ChOffset + 8 + 3];   \
636     s64Temp2 += (int64_t)WIND_4_SUBBANDS_3_1 * (int64_t)s16X[ChOffset + 24 + 5]; \
637     s64Temp += (int64_t)WIND_4_SUBBANDS_3_2 * (int64_t)s16X[ChOffset + 16 + 3];  \
638     s64Temp2 += (int64_t)WIND_4_SUBBANDS_3_2 * (int64_t)s16X[ChOffset + 16 + 5]; \
639     s64Temp += (int64_t)WIND_4_SUBBANDS_3_3 * (int64_t)s16X[ChOffset + 24 + 3];  \
640     s64Temp2 += (int64_t)WIND_4_SUBBANDS_3_3 * (int64_t)s16X[ChOffset + 8 + 5];  \
641     s64Temp += (int64_t)WIND_4_SUBBANDS_3_4 * (int64_t)s16X[ChOffset + 32 + 3];  \
642     s64Temp2 += (int64_t)WIND_4_SUBBANDS_3_4 * (int64_t)s16X[ChOffset + 5];      \
643     s32DCTY[3] = (int32_t)(s64Temp >> 16);                                       \
644     s32DCTY[5] = (int32_t)(s64Temp2 >> 16);                                      \
645   }
646 
647 #define WINDOW_ACCU_4_4                                                         \
648   {                                                                             \
649     s64Temp = (int64_t)WIND_4_SUBBANDS_4_0 *                                    \
650               (int64_t)(s16X[ChOffset + 4] + s16X[ChOffset + 4 + 32]);          \
651     s64Temp += (int64_t)WIND_4_SUBBANDS_4_1 *                                   \
652                (int64_t)(s16X[ChOffset + 4 + 8] + s16X[ChOffset + 4 + 24]);     \
653     s64Temp += (int64_t)WIND_4_SUBBANDS_4_2 * (int64_t)s16X[ChOffset + 4 + 16]; \
654     s32DCTY[4] = (int32_t)(s64Temp >> 16);                                      \
655   }
656 #else /* SBC_IS_64_MULT_IN_WINDOW_ACCU == FALSE */
657 #define WINDOW_ACCU_8_0                                                                            \
658   {                                                                                                \
659     s32Temp = (int32_t)WIND_8_SUBBANDS_0_1 * (int32_t)(s16X[ChOffset + 16] - s16X[ChOffset + 64]); \
660     s32Temp +=                                                                                     \
661             (int32_t)WIND_8_SUBBANDS_0_2 * (int32_t)(s16X[ChOffset + 32] - s16X[ChOffset + 48]);   \
662     s32DCTY[0] = (int32_t)s32Temp;                                                                 \
663   }
664 #define WINDOW_ACCU_8_1_15                                                        \
665   {                                                                               \
666     s32Temp = (int32_t)WIND_8_SUBBANDS_1_0 * (int32_t)s16X[ChOffset + 1];         \
667     s32Temp2 = (int32_t)WIND_8_SUBBANDS_1_0 * (int32_t)s16X[ChOffset + 64 + 15];  \
668     s32Temp += (int32_t)WIND_8_SUBBANDS_1_1 * (int32_t)s16X[ChOffset + 16 + 1];   \
669     s32Temp2 += (int32_t)WIND_8_SUBBANDS_1_1 * (int32_t)s16X[ChOffset + 48 + 15]; \
670     s32Temp += (int32_t)WIND_8_SUBBANDS_1_2 * (int32_t)s16X[ChOffset + 32 + 1];   \
671     s32Temp2 += (int32_t)WIND_8_SUBBANDS_1_2 * (int32_t)s16X[ChOffset + 32 + 15]; \
672     s32Temp += (int32_t)WIND_8_SUBBANDS_1_3 * (int32_t)s16X[ChOffset + 48 + 1];   \
673     s32Temp2 += (int32_t)WIND_8_SUBBANDS_1_3 * (int32_t)s16X[ChOffset + 16 + 15]; \
674     s32Temp += (int32_t)WIND_8_SUBBANDS_1_4 * (int32_t)s16X[ChOffset + 64 + 1];   \
675     s32Temp2 += (int32_t)WIND_8_SUBBANDS_1_4 * (int32_t)s16X[ChOffset + 15];      \
676     s32DCTY[1] = (int32_t)s32Temp;                                                \
677     s32DCTY[15] = (int32_t)s32Temp2;                                              \
678   }
679 #define WINDOW_ACCU_8_2_14                                                        \
680   {                                                                               \
681     s32Temp = (int32_t)WIND_8_SUBBANDS_2_0 * (int32_t)s16X[ChOffset + 2];         \
682     s32Temp2 = (int32_t)WIND_8_SUBBANDS_2_0 * (int32_t)s16X[ChOffset + 64 + 14];  \
683     s32Temp += (int32_t)WIND_8_SUBBANDS_2_1 * (int32_t)s16X[ChOffset + 16 + 2];   \
684     s32Temp2 += (int32_t)WIND_8_SUBBANDS_2_1 * (int32_t)s16X[ChOffset + 48 + 14]; \
685     s32Temp += (int32_t)WIND_8_SUBBANDS_2_2 * (int32_t)s16X[ChOffset + 32 + 2];   \
686     s32Temp2 += (int32_t)WIND_8_SUBBANDS_2_2 * (int32_t)s16X[ChOffset + 32 + 14]; \
687     s32Temp += (int32_t)WIND_8_SUBBANDS_2_3 * (int32_t)s16X[ChOffset + 48 + 2];   \
688     s32Temp2 += (int32_t)WIND_8_SUBBANDS_2_3 * (int32_t)s16X[ChOffset + 16 + 14]; \
689     s32Temp += (int32_t)WIND_8_SUBBANDS_2_4 * (int32_t)s16X[ChOffset + 64 + 2];   \
690     s32Temp2 += (int32_t)WIND_8_SUBBANDS_2_4 * (int32_t)s16X[ChOffset + 14];      \
691     s32DCTY[2] = (int32_t)s32Temp;                                                \
692     s32DCTY[14] = (int32_t)s32Temp2;                                              \
693   }
694 #define WINDOW_ACCU_8_3_13                                                        \
695   {                                                                               \
696     s32Temp = (int32_t)WIND_8_SUBBANDS_3_0 * (int32_t)s16X[ChOffset + 3];         \
697     s32Temp2 = (int32_t)WIND_8_SUBBANDS_3_0 * (int32_t)s16X[ChOffset + 64 + 13];  \
698     s32Temp += (int32_t)WIND_8_SUBBANDS_3_1 * (int32_t)s16X[ChOffset + 16 + 3];   \
699     s32Temp2 += (int32_t)WIND_8_SUBBANDS_3_1 * (int32_t)s16X[ChOffset + 48 + 13]; \
700     s32Temp += (int32_t)WIND_8_SUBBANDS_3_2 * (int32_t)s16X[ChOffset + 32 + 3];   \
701     s32Temp2 += (int32_t)WIND_8_SUBBANDS_3_2 * (int32_t)s16X[ChOffset + 32 + 13]; \
702     s32Temp += (int32_t)WIND_8_SUBBANDS_3_3 * (int32_t)s16X[ChOffset + 48 + 3];   \
703     s32Temp2 += (int32_t)WIND_8_SUBBANDS_3_3 * (int32_t)s16X[ChOffset + 16 + 13]; \
704     s32Temp += (int32_t)WIND_8_SUBBANDS_3_4 * (int32_t)s16X[ChOffset + 64 + 3];   \
705     s32Temp2 += (int32_t)WIND_8_SUBBANDS_3_4 * (int32_t)s16X[ChOffset + 13];      \
706     s32DCTY[3] = (int32_t)s32Temp;                                                \
707     s32DCTY[13] = (int32_t)s32Temp2;                                              \
708   }
709 #define WINDOW_ACCU_8_4_12                                                        \
710   {                                                                               \
711     s32Temp = (int32_t)WIND_8_SUBBANDS_4_0 * (int32_t)s16X[ChOffset + 4];         \
712     s32Temp2 = (int32_t)WIND_8_SUBBANDS_4_0 * (int32_t)s16X[ChOffset + 64 + 12];  \
713     s32Temp += (int32_t)WIND_8_SUBBANDS_4_1 * (int32_t)s16X[ChOffset + 16 + 4];   \
714     s32Temp2 += (int32_t)WIND_8_SUBBANDS_4_1 * (int32_t)s16X[ChOffset + 48 + 12]; \
715     s32Temp += (int32_t)WIND_8_SUBBANDS_4_2 * (int32_t)s16X[ChOffset + 32 + 4];   \
716     s32Temp2 += (int32_t)WIND_8_SUBBANDS_4_2 * (int32_t)s16X[ChOffset + 32 + 12]; \
717     s32Temp += (int32_t)WIND_8_SUBBANDS_4_3 * (int32_t)s16X[ChOffset + 48 + 4];   \
718     s32Temp2 += (int32_t)WIND_8_SUBBANDS_4_3 * (int32_t)s16X[ChOffset + 16 + 12]; \
719     s32Temp += (int32_t)WIND_8_SUBBANDS_4_4 * (int32_t)s16X[ChOffset + 64 + 4];   \
720     s32Temp2 += (int32_t)WIND_8_SUBBANDS_4_4 * (int32_t)s16X[ChOffset + 12];      \
721     s32DCTY[4] = (int32_t)s32Temp;                                                \
722     s32DCTY[12] = (int32_t)s32Temp2;                                              \
723   }
724 #define WINDOW_ACCU_8_5_11                                                        \
725   {                                                                               \
726     s32Temp = (int32_t)WIND_8_SUBBANDS_5_0 * (int32_t)s16X[ChOffset + 5];         \
727     s32Temp2 = (int32_t)WIND_8_SUBBANDS_5_0 * (int32_t)s16X[ChOffset + 64 + 11];  \
728     s32Temp += (int32_t)WIND_8_SUBBANDS_5_1 * (int32_t)s16X[ChOffset + 16 + 5];   \
729     s32Temp2 += (int32_t)WIND_8_SUBBANDS_5_1 * (int32_t)s16X[ChOffset + 48 + 11]; \
730     s32Temp += (int32_t)WIND_8_SUBBANDS_5_2 * (int32_t)s16X[ChOffset + 32 + 5];   \
731     s32Temp2 += (int32_t)WIND_8_SUBBANDS_5_2 * (int32_t)s16X[ChOffset + 32 + 11]; \
732     s32Temp += (int32_t)WIND_8_SUBBANDS_5_3 * (int32_t)s16X[ChOffset + 48 + 5];   \
733     s32Temp2 += (int32_t)WIND_8_SUBBANDS_5_3 * (int32_t)s16X[ChOffset + 16 + 11]; \
734     s32Temp += (int32_t)WIND_8_SUBBANDS_5_4 * (int32_t)s16X[ChOffset + 64 + 5];   \
735     s32Temp2 += (int32_t)WIND_8_SUBBANDS_5_4 * (int32_t)s16X[ChOffset + 11];      \
736     s32DCTY[5] = (int32_t)s32Temp;                                                \
737     s32DCTY[11] = (int32_t)s32Temp2;                                              \
738   }
739 #define WINDOW_ACCU_8_6_10                                                        \
740   {                                                                               \
741     s32Temp = (int32_t)WIND_8_SUBBANDS_6_0 * (int32_t)s16X[ChOffset + 6];         \
742     s32Temp2 = (int32_t)WIND_8_SUBBANDS_6_0 * (int32_t)s16X[ChOffset + 64 + 10];  \
743     s32Temp += (int32_t)WIND_8_SUBBANDS_6_1 * (int32_t)s16X[ChOffset + 16 + 6];   \
744     s32Temp2 += (int32_t)WIND_8_SUBBANDS_6_1 * (int32_t)s16X[ChOffset + 48 + 10]; \
745     s32Temp += (int32_t)WIND_8_SUBBANDS_6_2 * (int32_t)s16X[ChOffset + 32 + 6];   \
746     s32Temp2 += (int32_t)WIND_8_SUBBANDS_6_2 * (int32_t)s16X[ChOffset + 32 + 10]; \
747     s32Temp += (int32_t)WIND_8_SUBBANDS_6_3 * (int32_t)s16X[ChOffset + 48 + 6];   \
748     s32Temp2 += (int32_t)WIND_8_SUBBANDS_6_3 * (int32_t)s16X[ChOffset + 16 + 10]; \
749     s32Temp += (int32_t)WIND_8_SUBBANDS_6_4 * (int32_t)s16X[ChOffset + 64 + 6];   \
750     s32Temp2 += (int32_t)WIND_8_SUBBANDS_6_4 * (int32_t)s16X[ChOffset + 10];      \
751     s32DCTY[6] = (int32_t)s32Temp;                                                \
752     s32DCTY[10] = (int32_t)s32Temp2;                                              \
753   }
754 #define WINDOW_ACCU_8_7_9                                                        \
755   {                                                                              \
756     s32Temp = (int32_t)WIND_8_SUBBANDS_7_0 * (int32_t)s16X[ChOffset + 7];        \
757     s32Temp2 = (int32_t)WIND_8_SUBBANDS_7_0 * (int32_t)s16X[ChOffset + 64 + 9];  \
758     s32Temp += (int32_t)WIND_8_SUBBANDS_7_1 * (int32_t)s16X[ChOffset + 16 + 7];  \
759     s32Temp2 += (int32_t)WIND_8_SUBBANDS_7_1 * (int32_t)s16X[ChOffset + 48 + 9]; \
760     s32Temp += (int32_t)WIND_8_SUBBANDS_7_2 * (int32_t)s16X[ChOffset + 32 + 7];  \
761     s32Temp2 += (int32_t)WIND_8_SUBBANDS_7_2 * (int32_t)s16X[ChOffset + 32 + 9]; \
762     s32Temp += (int32_t)WIND_8_SUBBANDS_7_3 * (int32_t)s16X[ChOffset + 48 + 7];  \
763     s32Temp2 += (int32_t)WIND_8_SUBBANDS_7_3 * (int32_t)s16X[ChOffset + 16 + 9]; \
764     s32Temp += (int32_t)WIND_8_SUBBANDS_7_4 * (int32_t)s16X[ChOffset + 64 + 7];  \
765     s32Temp2 += (int32_t)WIND_8_SUBBANDS_7_4 * (int32_t)s16X[ChOffset + 9];      \
766     s32DCTY[7] = (int32_t)s32Temp;                                               \
767     s32DCTY[9] = (int32_t)s32Temp2;                                              \
768   }
769 #define WINDOW_ACCU_8_8                                                         \
770   {                                                                             \
771     s32Temp = (int32_t)WIND_8_SUBBANDS_8_0 *                                    \
772               (int32_t)(s16X[ChOffset + 8] + s16X[ChOffset + 64 + 8]);          \
773     s32Temp += (int32_t)WIND_8_SUBBANDS_8_1 *                                   \
774                (int32_t)(s16X[ChOffset + 16 + 8] + s16X[ChOffset + 48 + 8]);    \
775     s32Temp += (int32_t)WIND_8_SUBBANDS_8_2 * (int32_t)s16X[ChOffset + 32 + 8]; \
776     s32DCTY[8] = (int32_t)s32Temp;                                              \
777   }
778 #define WINDOW_ACCU_4_0                                                                           \
779   {                                                                                               \
780     s32Temp = (int32_t)WIND_4_SUBBANDS_0_1 * (int32_t)(s16X[ChOffset + 8] - s16X[ChOffset + 32]); \
781     s32Temp +=                                                                                    \
782             (int32_t)WIND_4_SUBBANDS_0_2 * (int32_t)(s16X[ChOffset + 16] - s16X[ChOffset + 24]);  \
783     s32DCTY[0] = (int32_t)(s32Temp);                                                              \
784   }
785 #define WINDOW_ACCU_4_1_7                                                        \
786   {                                                                              \
787     s32Temp = (int32_t)WIND_4_SUBBANDS_1_0 * (int32_t)s16X[ChOffset + 1];        \
788     s32Temp2 = (int32_t)WIND_4_SUBBANDS_1_0 * (int32_t)s16X[ChOffset + 32 + 7];  \
789     s32Temp += (int32_t)WIND_4_SUBBANDS_1_1 * (int32_t)s16X[ChOffset + 8 + 1];   \
790     s32Temp2 += (int32_t)WIND_4_SUBBANDS_1_1 * (int32_t)s16X[ChOffset + 24 + 7]; \
791     s32Temp += (int32_t)WIND_4_SUBBANDS_1_2 * (int32_t)s16X[ChOffset + 16 + 1];  \
792     s32Temp2 += (int32_t)WIND_4_SUBBANDS_1_2 * (int32_t)s16X[ChOffset + 16 + 7]; \
793     s32Temp += (int32_t)WIND_4_SUBBANDS_1_3 * (int32_t)s16X[ChOffset + 24 + 1];  \
794     s32Temp2 += (int32_t)WIND_4_SUBBANDS_1_3 * (int32_t)s16X[ChOffset + 8 + 7];  \
795     s32Temp += (int32_t)WIND_4_SUBBANDS_1_4 * (int32_t)s16X[ChOffset + 32 + 1];  \
796     s32Temp2 += (int32_t)WIND_4_SUBBANDS_1_4 * (int32_t)s16X[ChOffset + 7];      \
797     s32DCTY[1] = (int32_t)(s32Temp);                                             \
798     s32DCTY[7] = (int32_t)(s32Temp2);                                            \
799   }
800 #define WINDOW_ACCU_4_2_6                                                        \
801   {                                                                              \
802     s32Temp = (int32_t)WIND_4_SUBBANDS_2_0 * (int32_t)s16X[ChOffset + 2];        \
803     s32Temp2 = (int32_t)WIND_4_SUBBANDS_2_0 * (int32_t)s16X[ChOffset + 32 + 6];  \
804     s32Temp += (int32_t)WIND_4_SUBBANDS_2_1 * (int32_t)s16X[ChOffset + 8 + 2];   \
805     s32Temp2 += (int32_t)WIND_4_SUBBANDS_2_1 * (int32_t)s16X[ChOffset + 24 + 6]; \
806     s32Temp += (int32_t)WIND_4_SUBBANDS_2_2 * (int32_t)s16X[ChOffset + 16 + 2];  \
807     s32Temp2 += (int32_t)WIND_4_SUBBANDS_2_2 * (int32_t)s16X[ChOffset + 16 + 6]; \
808     s32Temp += (int32_t)WIND_4_SUBBANDS_2_3 * (int32_t)s16X[ChOffset + 24 + 2];  \
809     s32Temp2 += (int32_t)WIND_4_SUBBANDS_2_3 * (int32_t)s16X[ChOffset + 8 + 6];  \
810     s32Temp += (int32_t)WIND_4_SUBBANDS_2_4 * (int32_t)s16X[ChOffset + 32 + 2];  \
811     s32Temp2 += (int32_t)WIND_4_SUBBANDS_2_4 * (int32_t)s16X[ChOffset + 6];      \
812     s32DCTY[2] = (int32_t)(s32Temp);                                             \
813     s32DCTY[6] = (int32_t)(s32Temp2);                                            \
814   }
815 #define WINDOW_ACCU_4_3_5                                                        \
816   {                                                                              \
817     s32Temp = (int32_t)WIND_4_SUBBANDS_3_0 * (int32_t)s16X[ChOffset + 3];        \
818     s32Temp2 = (int32_t)WIND_4_SUBBANDS_3_0 * (int32_t)s16X[ChOffset + 32 + 5];  \
819     s32Temp += (int32_t)WIND_4_SUBBANDS_3_1 * (int32_t)s16X[ChOffset + 8 + 3];   \
820     s32Temp2 += (int32_t)WIND_4_SUBBANDS_3_1 * (int32_t)s16X[ChOffset + 24 + 5]; \
821     s32Temp += (int32_t)WIND_4_SUBBANDS_3_2 * (int32_t)s16X[ChOffset + 16 + 3];  \
822     s32Temp2 += (int32_t)WIND_4_SUBBANDS_3_2 * (int32_t)s16X[ChOffset + 16 + 5]; \
823     s32Temp += (int32_t)WIND_4_SUBBANDS_3_3 * (int32_t)s16X[ChOffset + 24 + 3];  \
824     s32Temp2 += (int32_t)WIND_4_SUBBANDS_3_3 * (int32_t)s16X[ChOffset + 8 + 5];  \
825     s32Temp += (int32_t)WIND_4_SUBBANDS_3_4 * (int32_t)s16X[ChOffset + 32 + 3];  \
826     s32Temp2 += (int32_t)WIND_4_SUBBANDS_3_4 * (int32_t)s16X[ChOffset + 5];      \
827     s32DCTY[3] = (int32_t)(s32Temp);                                             \
828     s32DCTY[5] = (int32_t)(s32Temp2);                                            \
829   }
830 
831 #define WINDOW_ACCU_4_4                                                         \
832   {                                                                             \
833     s32Temp = (int32_t)WIND_4_SUBBANDS_4_0 *                                    \
834               (int32_t)(s16X[ChOffset + 4] + s16X[ChOffset + 4 + 32]);          \
835     s32Temp += (int32_t)WIND_4_SUBBANDS_4_1 *                                   \
836                (int32_t)(s16X[ChOffset + 4 + 8] + s16X[ChOffset + 4 + 24]);     \
837     s32Temp += (int32_t)WIND_4_SUBBANDS_4_2 * (int32_t)s16X[ChOffset + 4 + 16]; \
838     s32DCTY[4] = (int32_t)(s32Temp);                                            \
839   }
840 #endif
841 #define WINDOW_PARTIAL_4 \
842   {                      \
843     WINDOW_ACCU_4_0;     \
844     WINDOW_ACCU_4_1_7;   \
845     WINDOW_ACCU_4_2_6;   \
846     WINDOW_ACCU_4_3_5;   \
847     WINDOW_ACCU_4_4;     \
848   }
849 
850 #define WINDOW_PARTIAL_8 \
851   {                      \
852     WINDOW_ACCU_8_0;     \
853     WINDOW_ACCU_8_1_15;  \
854     WINDOW_ACCU_8_2_14;  \
855     WINDOW_ACCU_8_3_13;  \
856     WINDOW_ACCU_8_4_12;  \
857     WINDOW_ACCU_8_5_11;  \
858     WINDOW_ACCU_8_6_10;  \
859     WINDOW_ACCU_8_7_9;   \
860     WINDOW_ACCU_8_8;     \
861   }
862 #else
863 #if (SBC_IS_64_MULT_IN_WINDOW_ACCU == TRUE)
864 #define WINDOW_ACCU_4(i)                                                                  \
865   {                                                                                       \
866     s64Temp = ((int64_t)gas32CoeffFor4SBs[i] * (int64_t)s16X[ChOffset + i]);              \
867     s64Temp += ((int64_t)gas32CoeffFor4SBs[(i + 8)] * (int64_t)s16X[ChOffset + i + 8]);   \
868     s64Temp += ((int64_t)gas32CoeffFor4SBs[(i + 16)] * (int64_t)s16X[ChOffset + i + 16]); \
869     s64Temp += ((int64_t)gas32CoeffFor4SBs[(i + 24)] * (int64_t)s16X[ChOffset + i + 24]); \
870     s64Temp += ((int64_t)gas32CoeffFor4SBs[(i + 32)] * (int64_t)s16X[ChOffset + i + 32]); \
871     s32DCTY[i] = (int32_t)(s64Temp >> 16);                                                \
872     /*printf("s32DCTY4: 0x%x \n", s32DCTY[i]);*/                                          \
873   }
874 #else
875 #define WINDOW_ACCU_4(i)                                                                        \
876   {                                                                                             \
877     s32DCTY[i] =                                                                                \
878             (gas32CoeffFor4SBs[i * 2] * s16X[ChOffset + i]) +                                   \
879             (((int32_t)(uint16_t)(gas32CoeffFor4SBs[(i * 2) + 1]) * s16X[ChOffset + i]) >> 16); \
880     s32DCTY[i] += (gas32CoeffFor4SBs[(i + 8) * 2] * s16X[ChOffset + i + 8]) +                   \
881                   (((int32_t)(uint16_t)(gas32CoeffFor4SBs[((i + 8) * 2) + 1]) *                 \
882                     s16X[ChOffset + i + 8]) >>                                                  \
883                    16);                                                                         \
884     s32DCTY[i] += (gas32CoeffFor4SBs[(i + 16) * 2] * s16X[ChOffset + i + 16]) +                 \
885                   (((int32_t)(uint16_t)(gas32CoeffFor4SBs[((i + 16) * 2) + 1]) *                \
886                     s16X[ChOffset + i + 16]) >>                                                 \
887                    16);                                                                         \
888     s32DCTY[i] += (gas32CoeffFor4SBs[(i + 24) * 2] * s16X[ChOffset + i + 24]) +                 \
889                   (((int32_t)(uint16_t)(gas32CoeffFor4SBs[((i + 24) * 2) + 1]) *                \
890                     s16X[ChOffset + i + 24]) >>                                                 \
891                    16);                                                                         \
892     s32DCTY[i] += (gas32CoeffFor4SBs[(i + 32) * 2] * s16X[ChOffset + i + 32]) +                 \
893                   (((int32_t)(uint16_t)(gas32CoeffFor4SBs[((i + 32) * 2) + 1]) *                \
894                     s16X[ChOffset + i + 32]) >>                                                 \
895                    16);                                                                         \
896   }
897 #endif
898 #define WINDOW_PARTIAL_4 \
899   {                      \
900     WINDOW_ACCU_4(0);    \
901     WINDOW_ACCU_4(1);    \
902     WINDOW_ACCU_4(2);    \
903     WINDOW_ACCU_4(3);    \
904     WINDOW_ACCU_4(4);    \
905     WINDOW_ACCU_4(5);    \
906     WINDOW_ACCU_4(6);    \
907     WINDOW_ACCU_4(7);    \
908   }
909 
910 #if (SBC_IS_64_MULT_IN_WINDOW_ACCU == TRUE)
911 #define WINDOW_ACCU_8(i)                                                                      \
912   {                                                                                           \
913     s64Temp = ((((int64_t)gas32CoeffFor8SBs[i] * (int64_t)s16X[ChOffset + i])));              \
914     s64Temp += ((((int64_t)gas32CoeffFor8SBs[(i + 16)] * (int64_t)s16X[ChOffset + i + 16]))); \
915     s64Temp += ((((int64_t)gas32CoeffFor8SBs[(i + 32)] * (int64_t)s16X[ChOffset + i + 32]))); \
916     s64Temp += ((((int64_t)gas32CoeffFor8SBs[(i + 48)] * (int64_t)s16X[ChOffset + i + 48]))); \
917     s64Temp += ((((int64_t)gas32CoeffFor8SBs[(i + 64)] * (int64_t)s16X[ChOffset + i + 64]))); \
918     /*printf("s32DCTY8: %d= 0x%x * %d\n", s32DCTY[i], gas32CoeffFor8SBs[i],                   \
919      * s16X[ChOffset+i]);*/                                                                   \
920     s32DCTY[i] = (int32_t)(s64Temp >> 16);                                                    \
921   }
922 #else
923 #define WINDOW_ACCU_8(i)                                                                        \
924   {                                                                                             \
925     s32DCTY[i] =                                                                                \
926             (gas32CoeffFor8SBs[i * 2] * s16X[ChOffset + i]) +                                   \
927             (((int32_t)(uint16_t)(gas32CoeffFor8SBs[(i * 2) + 1]) * s16X[ChOffset + i]) >> 16); \
928     s32DCTY[i] += (gas32CoeffFor8SBs[(i + 16) * 2] * s16X[ChOffset + i + 16]) +                 \
929                   (((int32_t)(uint16_t)(gas32CoeffFor8SBs[((i + 16) * 2) + 1]) *                \
930                     s16X[ChOffset + i + 16]) >>                                                 \
931                    16);                                                                         \
932     s32DCTY[i] += (gas32CoeffFor8SBs[(i + 32) * 2] * s16X[ChOffset + i + 32]) +                 \
933                   (((int32_t)(uint16_t)(gas32CoeffFor8SBs[((i + 32) * 2) + 1]) *                \
934                     s16X[ChOffset + i + 32]) >>                                                 \
935                    16);                                                                         \
936     s32DCTY[i] += (gas32CoeffFor8SBs[(i + 48) * 2] * s16X[ChOffset + i + 48]) +                 \
937                   (((int32_t)(uint16_t)(gas32CoeffFor8SBs[((i + 48) * 2) + 1]) *                \
938                     s16X[ChOffset + i + 48]) >>                                                 \
939                    16);                                                                         \
940     s32DCTY[i] += (gas32CoeffFor8SBs[(i + 64) * 2] * s16X[ChOffset + i + 64]) +                 \
941                   (((int32_t)(uint16_t)(gas32CoeffFor8SBs[((i + 64) * 2) + 1]) *                \
942                     s16X[ChOffset + i + 64]) >>                                                 \
943                    16);                                                                         \
944     /*printf("s32DCTY8: %d = 0x%4x%4x * %d\n", s32DCTY[i], gas32CoeffFor8SBs[i                  \
945      * * 2], (gas32CoeffFor8SBs[(i * 2) + 1]), s16X[ChOffset+i]);*/                             \
946     /*s32DCTY[i]=(int32_t)(s64Temp>>16);*/                                                      \
947   }
948 #endif
949 #define WINDOW_PARTIAL_8 \
950   {                      \
951     WINDOW_ACCU_8(0);    \
952     WINDOW_ACCU_8(1);    \
953     WINDOW_ACCU_8(2);    \
954     WINDOW_ACCU_8(3);    \
955     WINDOW_ACCU_8(4);    \
956     WINDOW_ACCU_8(5);    \
957     WINDOW_ACCU_8(6);    \
958     WINDOW_ACCU_8(7);    \
959     WINDOW_ACCU_8(8);    \
960     WINDOW_ACCU_8(9);    \
961     WINDOW_ACCU_8(10);   \
962     WINDOW_ACCU_8(11);   \
963     WINDOW_ACCU_8(12);   \
964     WINDOW_ACCU_8(13);   \
965     WINDOW_ACCU_8(14);   \
966     WINDOW_ACCU_8(15);   \
967   }
968 #endif
969 #endif
970 
971 static int16_t ShiftCounter = 0;
972 extern int16_t EncMaxShiftCounter;
973 /****************************************************************************
974  * SbcAnalysisFilter - performs Analysis of the input audio stream
975  *
976  * RETURNS : N/A
977  */
SbcAnalysisFilter4(SBC_ENC_PARAMS * pstrEncParams,int16_t * input)978 void SbcAnalysisFilter4(SBC_ENC_PARAMS* pstrEncParams, int16_t* input) {
979   int16_t* ps16PcmBuf;
980   int32_t* ps32SbBuf;
981   int32_t s32Blk, s32Ch;
982   int32_t s32NumOfChannels, s32NumOfBlocks;
983   int32_t i, *ps32X, *ps32X2;
984   int32_t Offset, Offset2, ChOffset;
985 #if (SBC_ARM_ASM_OPT == TRUE)
986   register int32_t s32Hi, s32Hi2;
987 #else
988 #if (SBC_IPAQ_OPT == TRUE)
989 #if (SBC_IS_64_MULT_IN_WINDOW_ACCU == TRUE)
990   register int64_t s64Temp, s64Temp2;
991 #else
992   register int32_t s32Temp, s32Temp2;
993 #endif
994 #else
995 
996 #if (SBC_IS_64_MULT_IN_WINDOW_ACCU == TRUE)
997   int64_t s64Temp;
998 #endif
999 
1000 #endif
1001 #endif
1002 
1003   s32NumOfChannels = pstrEncParams->s16NumOfChannels;
1004   s32NumOfBlocks = pstrEncParams->s16NumOfBlocks;
1005 
1006   ps16PcmBuf = input;
1007 
1008   ps32SbBuf = pstrEncParams->s32SbBuffer;
1009   Offset2 = (int32_t)(EncMaxShiftCounter + 40);
1010   for (s32Blk = 0; s32Blk < s32NumOfBlocks; s32Blk++) {
1011     Offset = (int32_t)(EncMaxShiftCounter - ShiftCounter);
1012     /* Store new samples */
1013     if (s32NumOfChannels == 1) {
1014       s16X[3 + Offset] = *ps16PcmBuf;
1015       ps16PcmBuf++;
1016       s16X[2 + Offset] = *ps16PcmBuf;
1017       ps16PcmBuf++;
1018       s16X[1 + Offset] = *ps16PcmBuf;
1019       ps16PcmBuf++;
1020       s16X[0 + Offset] = *ps16PcmBuf;
1021       ps16PcmBuf++;
1022     } else {
1023       s16X[3 + Offset] = *ps16PcmBuf;
1024       ps16PcmBuf++;
1025       s16X[Offset2 + 3 + Offset] = *ps16PcmBuf;
1026       ps16PcmBuf++;
1027       s16X[2 + Offset] = *ps16PcmBuf;
1028       ps16PcmBuf++;
1029       s16X[Offset2 + 2 + Offset] = *ps16PcmBuf;
1030       ps16PcmBuf++;
1031       s16X[1 + Offset] = *ps16PcmBuf;
1032       ps16PcmBuf++;
1033       s16X[Offset2 + 1 + Offset] = *ps16PcmBuf;
1034       ps16PcmBuf++;
1035       s16X[0 + Offset] = *ps16PcmBuf;
1036       ps16PcmBuf++;
1037       s16X[Offset2 + 0 + Offset] = *ps16PcmBuf;
1038       ps16PcmBuf++;
1039     }
1040     for (s32Ch = 0; s32Ch < s32NumOfChannels; s32Ch++) {
1041       ChOffset = s32Ch * Offset2 + Offset;
1042 
1043       WINDOW_PARTIAL_4
1044 
1045       SBC_FastIDCT4(s32DCTY, ps32SbBuf);
1046 
1047       ps32SbBuf += SUB_BANDS_4;
1048     }
1049     if (s32NumOfChannels == 1) {
1050       if (ShiftCounter >= EncMaxShiftCounter) {
1051         SHIFTUP_X4;
1052         ShiftCounter = 0;
1053       } else {
1054         ShiftCounter += SUB_BANDS_4;
1055       }
1056     } else {
1057       if (ShiftCounter >= EncMaxShiftCounter) {
1058         SHIFTUP_X4_2;
1059         ShiftCounter = 0;
1060       } else {
1061         ShiftCounter += SUB_BANDS_4;
1062       }
1063     }
1064   }
1065 }
1066 
1067 /* ////////////////////////////////////////////////////////////////////////// */
SbcAnalysisFilter8(SBC_ENC_PARAMS * pstrEncParams,int16_t * input)1068 void SbcAnalysisFilter8(SBC_ENC_PARAMS* pstrEncParams, int16_t* input) {
1069   int16_t* ps16PcmBuf;
1070   int32_t* ps32SbBuf;
1071   int32_t s32Blk, s32Ch; /* counter for block*/
1072   int32_t Offset, Offset2;
1073   int32_t s32NumOfChannels, s32NumOfBlocks;
1074   int32_t i, *ps32X, *ps32X2;
1075   int32_t ChOffset;
1076 #if (SBC_ARM_ASM_OPT == TRUE)
1077   register int32_t s32Hi, s32Hi2;
1078 #else
1079 #if (SBC_IPAQ_OPT == TRUE)
1080 #if (SBC_IS_64_MULT_IN_WINDOW_ACCU == TRUE)
1081   register int64_t s64Temp, s64Temp2;
1082 #else
1083   register int32_t s32Temp, s32Temp2;
1084 #endif
1085 #else
1086 #if (SBC_IS_64_MULT_IN_WINDOW_ACCU == TRUE)
1087   int64_t s64Temp;
1088 #endif
1089 #endif
1090 #endif
1091 
1092   s32NumOfChannels = pstrEncParams->s16NumOfChannels;
1093   s32NumOfBlocks = pstrEncParams->s16NumOfBlocks;
1094 
1095   ps16PcmBuf = input;
1096 
1097   ps32SbBuf = pstrEncParams->s32SbBuffer;
1098   Offset2 = (int32_t)(EncMaxShiftCounter + 80);
1099   for (s32Blk = 0; s32Blk < s32NumOfBlocks; s32Blk++) {
1100     Offset = (int32_t)(EncMaxShiftCounter - ShiftCounter);
1101     /* Store new samples */
1102     if (s32NumOfChannels == 1) {
1103       s16X[7 + Offset] = *ps16PcmBuf;
1104       ps16PcmBuf++;
1105       s16X[6 + Offset] = *ps16PcmBuf;
1106       ps16PcmBuf++;
1107       s16X[5 + Offset] = *ps16PcmBuf;
1108       ps16PcmBuf++;
1109       s16X[4 + Offset] = *ps16PcmBuf;
1110       ps16PcmBuf++;
1111       s16X[3 + Offset] = *ps16PcmBuf;
1112       ps16PcmBuf++;
1113       s16X[2 + Offset] = *ps16PcmBuf;
1114       ps16PcmBuf++;
1115       s16X[1 + Offset] = *ps16PcmBuf;
1116       ps16PcmBuf++;
1117       s16X[0 + Offset] = *ps16PcmBuf;
1118       ps16PcmBuf++;
1119     } else {
1120       s16X[7 + Offset] = *ps16PcmBuf;
1121       ps16PcmBuf++;
1122       s16X[Offset2 + 7 + Offset] = *ps16PcmBuf;
1123       ps16PcmBuf++;
1124       s16X[6 + Offset] = *ps16PcmBuf;
1125       ps16PcmBuf++;
1126       s16X[Offset2 + 6 + Offset] = *ps16PcmBuf;
1127       ps16PcmBuf++;
1128       s16X[5 + Offset] = *ps16PcmBuf;
1129       ps16PcmBuf++;
1130       s16X[Offset2 + 5 + Offset] = *ps16PcmBuf;
1131       ps16PcmBuf++;
1132       s16X[4 + Offset] = *ps16PcmBuf;
1133       ps16PcmBuf++;
1134       s16X[Offset2 + 4 + Offset] = *ps16PcmBuf;
1135       ps16PcmBuf++;
1136       s16X[3 + Offset] = *ps16PcmBuf;
1137       ps16PcmBuf++;
1138       s16X[Offset2 + 3 + Offset] = *ps16PcmBuf;
1139       ps16PcmBuf++;
1140       s16X[2 + Offset] = *ps16PcmBuf;
1141       ps16PcmBuf++;
1142       s16X[Offset2 + 2 + Offset] = *ps16PcmBuf;
1143       ps16PcmBuf++;
1144       s16X[1 + Offset] = *ps16PcmBuf;
1145       ps16PcmBuf++;
1146       s16X[Offset2 + 1 + Offset] = *ps16PcmBuf;
1147       ps16PcmBuf++;
1148       s16X[0 + Offset] = *ps16PcmBuf;
1149       ps16PcmBuf++;
1150       s16X[Offset2 + 0 + Offset] = *ps16PcmBuf;
1151       ps16PcmBuf++;
1152     }
1153     for (s32Ch = 0; s32Ch < s32NumOfChannels; s32Ch++) {
1154       ChOffset = s32Ch * Offset2 + Offset;
1155 
1156       WINDOW_PARTIAL_8
1157 
1158       SBC_FastIDCT8(s32DCTY, ps32SbBuf);
1159 
1160       ps32SbBuf += SUB_BANDS_8;
1161     }
1162     if (s32NumOfChannels == 1) {
1163       if (ShiftCounter >= EncMaxShiftCounter) {
1164         SHIFTUP_X8;
1165         ShiftCounter = 0;
1166       } else {
1167         ShiftCounter += SUB_BANDS_8;
1168       }
1169     } else {
1170       if (ShiftCounter >= EncMaxShiftCounter) {
1171         SHIFTUP_X8_2;
1172         ShiftCounter = 0;
1173       } else {
1174         ShiftCounter += SUB_BANDS_8;
1175       }
1176     }
1177   }
1178 }
1179 
SbcAnalysisInit(void)1180 void SbcAnalysisInit(void) {
1181   memset(s16X, 0, ENC_VX_BUFFER_SIZE * sizeof(int16_t));
1182   ShiftCounter = 0;
1183 }
1184