1 /******************************************************************************
2  *
3  *  Copyright 1999-2012 Broadcom Corporation
4  *
5  *  Licensed under the Apache License, Version 2.0 (the "License");
6  *  you may not use this file except in compliance with the License.
7  *  You may obtain a copy of the License at:
8  *
9  *  http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  *
17  ******************************************************************************/
18 
19 /******************************************************************************
20  *
21  *  source file for fast dct operations
22  *
23  ******************************************************************************/
24 
25 #include "sbc_dct.h"
26 
27 #include "sbc_enc_func_declare.h"
28 #include "sbc_encoder.h"
29 
30 /*******************************************************************************
31  *
32  * Function         SBC_FastIDCT8
33  *
34  * Description      implementation of fast DCT algorithm by Feig and Winograd
35  *
36  *
37  * Returns          y = dct(pInVect)
38  *
39  *
40  ******************************************************************************/
41 
42 #if (SBC_IS_64_MULT_IN_IDCT == FALSE)
43 #define SBC_COS_PI_SUR_4                                                       \
44   (0x00005a82)                          /* ((0x8000) * 0.7071)     = cos(pi/4) \
45                                          */
46 #define SBC_COS_PI_SUR_8 (0x00007641)   /* ((0x8000) * 0.9239)     = (cos(pi/8)) */
47 #define SBC_COS_3PI_SUR_8 (0x000030fb)  /* ((0x8000) * 0.3827)     = (cos(3*pi/8)) */
48 #define SBC_COS_PI_SUR_16 (0x00007d8a)  /* ((0x8000) * 0.9808))     = (cos(pi/16)) */
49 #define SBC_COS_3PI_SUR_16 (0x00006a6d) /* ((0x8000) * 0.8315))     = (cos(3*pi/16)) */
50 #define SBC_COS_5PI_SUR_16 (0x0000471c) /* ((0x8000) * 0.5556))     = (cos(5*pi/16)) */
51 #define SBC_COS_7PI_SUR_16 (0x000018f8) /* ((0x8000) * 0.1951))     = (cos(7*pi/16)) */
52 #define SBC_IDCT_MULT(a, b, c) SBC_MULT_32_16_SIMPLIFIED(a, b, c)
53 #else
54 #define SBC_COS_PI_SUR_4 (0x5A827999)   /* ((0x80000000) * 0.707106781)      = (cos(pi/4)   ) */
55 #define SBC_COS_PI_SUR_8 (0x7641AF3C)   /* ((0x80000000) * 0.923879533)      = (cos(pi/8)   ) */
56 #define SBC_COS_3PI_SUR_8 (0x30FBC54D)  /* ((0x80000000) * 0.382683432)      = (cos(3*pi/8) ) */
57 #define SBC_COS_PI_SUR_16 (0x7D8A5F3F)  /* ((0x80000000) * 0.98078528 ))     = (cos(pi/16)  ) */
58 #define SBC_COS_3PI_SUR_16 (0x6A6D98A4) /* ((0x80000000) * 0.831469612))     = (cos(3*pi/16)) */
59 #define SBC_COS_5PI_SUR_16 (0x471CECE6) /* ((0x80000000) * 0.555570233))     = (cos(5*pi/16)) */
60 #define SBC_COS_7PI_SUR_16 (0x18F8B83C) /* ((0x80000000) * 0.195090322))     = (cos(7*pi/16)) */
61 #define SBC_IDCT_MULT(a, b, c) SBC_MULT_32_32(a, b, c)
62 #endif /* SBC_IS_64_MULT_IN_IDCT */
63 
64 #if (SBC_FAST_DCT == FALSE)
65 extern const int16_t gas16AnalDCTcoeff8[];
66 extern const int16_t gas16AnalDCTcoeff4[];
67 #endif
68 
SBC_FastIDCT8(int32_t * pInVect,int32_t * pOutVect)69 void SBC_FastIDCT8(int32_t* pInVect, int32_t* pOutVect) {
70 #if (SBC_FAST_DCT == TRUE)
71 #if (SBC_ARM_ASM_OPT == TRUE)
72 #else
73 #if (SBC_IPAQ_OPT == TRUE)
74 #if (SBC_IS_64_MULT_IN_IDCT == TRUE)
75   int64_t s64Temp;
76 #endif
77 #else
78 #if (SBC_IS_64_MULT_IN_IDCT == TRUE)
79   int32_t s32HiTemp;
80 #else
81   int32_t s32In2Temp;
82   register int32_t s32In1Temp;
83 #endif
84 #endif
85 #endif
86 
87   register int32_t x0, x1, x2, x3, x4, x5, x6, x7, temp;
88   int32_t res_even[4], res_odd[4];
89   /*x0= (pInVect[4])/2 ;*/
90   SBC_IDCT_MULT(SBC_COS_PI_SUR_4, pInVect[4], x0);
91   /*printf("x0 0x%x = %d = %d * %d\n", x0, x0, SBC_COS_PI_SUR_4, pInVect[4]);*/
92 
93   x1 = (pInVect[3] + pInVect[5]) >> 1;
94   x2 = (pInVect[2] + pInVect[6]) >> 1;
95   x3 = (pInVect[1] + pInVect[7]) >> 1;
96   x4 = (pInVect[0] + pInVect[8]) >> 1;
97   x5 = (pInVect[9] - pInVect[15]) >> 1;
98   x6 = (pInVect[10] - pInVect[14]) >> 1;
99   x7 = (pInVect[11] - pInVect[13]) >> 1;
100 
101   /* 2-point IDCT of x0 and x4 as in (11) */
102   temp = x0;
103   SBC_IDCT_MULT(SBC_COS_PI_SUR_4, (x0 + x4), x0);   /*x0 = ( x0 + x4 ) * cos(1*pi/4) ; */
104   SBC_IDCT_MULT(SBC_COS_PI_SUR_4, (temp - x4), x4); /*x4 = ( temp - x4 ) * cos(1*pi/4) ; */
105 
106   /* rearrangement of x2 and x6 as in (15) */
107   x2 -= x6;
108   x6 <<= 1;
109 
110   /* 2-point IDCT of x2 and x6 and post-multiplication as in (15) */
111   SBC_IDCT_MULT(SBC_COS_PI_SUR_4, x6, x6); /*x6 = x6 * cos(1*pi/4) ; */
112   temp = x2;
113   SBC_IDCT_MULT(SBC_COS_PI_SUR_8, (x2 + x6), x2);    /*x2 = ( x2 + x6 ) * cos(1*pi/8) ; */
114   SBC_IDCT_MULT(SBC_COS_3PI_SUR_8, (temp - x6), x6); /*x6 = ( temp - x6 ) * cos(3*pi/8) ;*/
115 
116   /* 4-point IDCT of x0,x2,x4 and x6 as in (11) */
117   res_even[0] = x0 + x2;
118   res_even[1] = x4 + x6;
119   res_even[2] = x4 - x6;
120   res_even[3] = x0 - x2;
121 
122   /* rearrangement of x1,x3,x5,x7 as in (15) */
123   x7 <<= 1;
124   x5 = (x5 << 1) - x7;
125   x3 = (x3 << 1) - x5;
126   x1 -= x3 >> 1;
127 
128   /* two-dimensional IDCT of x1 and x5 */
129   SBC_IDCT_MULT(SBC_COS_PI_SUR_4, x5, x5); /*x5 = x5 * cos(1*pi/4) ; */
130   temp = x1;
131   x1 = x1 + x5;
132   x5 = temp - x5;
133 
134   /* rearrangement of x3 and x7 as in (15) */
135   x3 -= x7;
136   x7 <<= 1;
137   SBC_IDCT_MULT(SBC_COS_PI_SUR_4, x7, x7); /*x7 = x7 * cos(1*pi/4) ; */
138 
139   /* 2-point IDCT of x3 and x7 and post-multiplication as in (15) */
140   temp = x3;
141   SBC_IDCT_MULT(SBC_COS_PI_SUR_8, (x3 + x7), x3);    /*x3 = ( x3 + x7 ) * cos(1*pi/8)  ; */
142   SBC_IDCT_MULT(SBC_COS_3PI_SUR_8, (temp - x7), x7); /*x7 = ( temp - x7 ) * cos(3*pi/8) ;*/
143 
144   /* 4-point IDCT of x1,x3,x5 and x7 and post multiplication by diagonal matrix
145    * as in (14) */
146   SBC_IDCT_MULT((SBC_COS_PI_SUR_16), (x1 + x3),
147                 res_odd[0]); /*res_odd[ 0 ] = ( x1 + x3 ) * cos(1*pi/16) ; */
148   SBC_IDCT_MULT((SBC_COS_3PI_SUR_16), (x5 + x7),
149                 res_odd[1]); /*res_odd[ 1 ] = ( x5 + x7 ) * cos(3*pi/16) ; */
150   SBC_IDCT_MULT((SBC_COS_5PI_SUR_16), (x5 - x7),
151                 res_odd[2]); /*res_odd[ 2 ] = ( x5 - x7 ) * cos(5*pi/16) ; */
152   SBC_IDCT_MULT((SBC_COS_7PI_SUR_16), (x1 - x3),
153                 res_odd[3]); /*res_odd[ 3 ] = ( x1 - x3 ) * cos(7*pi/16) ; */
154 
155   /* additions and subtractions as in (9) */
156   pOutVect[0] = (res_even[0] + res_odd[0]);
157   pOutVect[1] = (res_even[1] + res_odd[1]);
158   pOutVect[2] = (res_even[2] + res_odd[2]);
159   pOutVect[3] = (res_even[3] + res_odd[3]);
160   pOutVect[7] = (res_even[0] - res_odd[0]);
161   pOutVect[6] = (res_even[1] - res_odd[1]);
162   pOutVect[5] = (res_even[2] - res_odd[2]);
163   pOutVect[4] = (res_even[3] - res_odd[3]);
164 #else
165   uint8_t Index, k;
166   int32_t temp;
167   /*Calculate 4 subband samples by matrixing*/
168   for (Index = 0; Index < 8; Index++) {
169     temp = 0;
170     for (k = 0; k < 16; k++) {
171       /*temp += (int32_t)(((int64_t)M[(Index*strEncParams->numOfSubBands*2)+k] *
172        * Y[k]) >> 16 );*/
173       temp += (gas16AnalDCTcoeff8[(Index * 8 * 2) + k] * (pInVect[k] >> 16));
174       temp += ((gas16AnalDCTcoeff8[(Index * 8 * 2) + k] * (pInVect[k] & 0xFFFF)) >> 16);
175     }
176     pOutVect[Index] = temp;
177   }
178 #endif
179   /*    printf("pOutVect: 0x%x;0x%x;0x%x;0x%x;0x%x;0x%x;0x%x;0x%x\n",\
180           pOutVect[0],pOutVect[1],pOutVect[2],pOutVect[3],pOutVect[4],pOutVect[5],pOutVect[6],pOutVect[7]);*/
181 }
182 
183 /*******************************************************************************
184  *
185  * Function         SBC_FastIDCT4
186  *
187  * Description      implementation of fast DCT algorithm by Feig and Winograd
188  *
189  *
190  * Returns          y = dct(x0)
191  *
192  *
193  ******************************************************************************/
SBC_FastIDCT4(int32_t * pInVect,int32_t * pOutVect)194 void SBC_FastIDCT4(int32_t* pInVect, int32_t* pOutVect) {
195 #if (SBC_FAST_DCT == TRUE)
196 #if (SBC_ARM_ASM_OPT == TRUE)
197 #else
198 #if (SBC_IPAQ_OPT == TRUE)
199 #if (SBC_IS_64_MULT_IN_IDCT == TRUE)
200   int64_t s64Temp;
201 #endif
202 #else
203 #if (SBC_IS_64_MULT_IN_IDCT == TRUE)
204   int32_t s32HiTemp;
205 #else
206   uint16_t s32In2Temp;
207   int32_t s32In1Temp;
208 #endif
209 #endif
210 #endif
211   int32_t temp, x2;
212   int32_t tmp[8];
213 
214   x2 = pInVect[2] >> 1;
215   temp = (pInVect[0] + pInVect[4]);
216   SBC_IDCT_MULT((SBC_COS_PI_SUR_4 >> 1), temp, tmp[0]);
217   tmp[1] = x2 - tmp[0];
218   tmp[0] += x2;
219   temp = (pInVect[1] + pInVect[3]);
220   SBC_IDCT_MULT((SBC_COS_3PI_SUR_8 >> 1), temp, tmp[3]);
221   SBC_IDCT_MULT((SBC_COS_PI_SUR_8 >> 1), temp, tmp[2]);
222   temp = (pInVect[5] - pInVect[7]);
223   SBC_IDCT_MULT((SBC_COS_3PI_SUR_8 >> 1), temp, tmp[5]);
224   SBC_IDCT_MULT((SBC_COS_PI_SUR_8 >> 1), temp, tmp[4]);
225   tmp[6] = tmp[2] + tmp[5];
226   tmp[7] = tmp[3] - tmp[4];
227   pOutVect[0] = (tmp[0] + tmp[6]);
228   pOutVect[1] = (tmp[1] + tmp[7]);
229   pOutVect[2] = (tmp[1] - tmp[7]);
230   pOutVect[3] = (tmp[0] - tmp[6]);
231 #else
232   uint8_t Index, k;
233   int32_t temp;
234   /*Calculate 4 subband samples by matrixing*/
235   for (Index = 0; Index < 4; Index++) {
236     temp = 0;
237     for (k = 0; k < 8; k++) {
238       /*temp += (int32_t)(((int64_t)M[(Index*strEncParams->numOfSubBands*2)+k] *
239        * Y[k]) >> 16 ); */
240       temp += (gas16AnalDCTcoeff4[(Index * 4 * 2) + k] * (pInVect[k] >> 16));
241       temp += ((gas16AnalDCTcoeff4[(Index * 4 * 2) + k] * (pInVect[k] & 0xFFFF)) >> 16);
242     }
243     pOutVect[Index] = temp;
244   }
245 #endif
246 }
247