1*a58d3d2aSXin Li /* Copyright (c) 2002-2008 Jean-Marc Valin
2*a58d3d2aSXin Li Copyright (c) 2007-2008 CSIRO
3*a58d3d2aSXin Li Copyright (c) 2007-2009 Xiph.Org Foundation
4*a58d3d2aSXin Li Written by Jean-Marc Valin */
5*a58d3d2aSXin Li /**
6*a58d3d2aSXin Li @file mathops.h
7*a58d3d2aSXin Li @brief Various math functions
8*a58d3d2aSXin Li */
9*a58d3d2aSXin Li /*
10*a58d3d2aSXin Li Redistribution and use in source and binary forms, with or without
11*a58d3d2aSXin Li modification, are permitted provided that the following conditions
12*a58d3d2aSXin Li are met:
13*a58d3d2aSXin Li
14*a58d3d2aSXin Li - Redistributions of source code must retain the above copyright
15*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer.
16*a58d3d2aSXin Li
17*a58d3d2aSXin Li - Redistributions in binary form must reproduce the above copyright
18*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer in the
19*a58d3d2aSXin Li documentation and/or other materials provided with the distribution.
20*a58d3d2aSXin Li
21*a58d3d2aSXin Li THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22*a58d3d2aSXin Li ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23*a58d3d2aSXin Li LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24*a58d3d2aSXin Li A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25*a58d3d2aSXin Li OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26*a58d3d2aSXin Li EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27*a58d3d2aSXin Li PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28*a58d3d2aSXin Li PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29*a58d3d2aSXin Li LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30*a58d3d2aSXin Li NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31*a58d3d2aSXin Li SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32*a58d3d2aSXin Li */
33*a58d3d2aSXin Li
34*a58d3d2aSXin Li #ifndef MATHOPS_H
35*a58d3d2aSXin Li #define MATHOPS_H
36*a58d3d2aSXin Li
37*a58d3d2aSXin Li #include "arch.h"
38*a58d3d2aSXin Li #include "entcode.h"
39*a58d3d2aSXin Li #include "os_support.h"
40*a58d3d2aSXin Li
41*a58d3d2aSXin Li #define PI 3.141592653f
42*a58d3d2aSXin Li
43*a58d3d2aSXin Li /* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */
44*a58d3d2aSXin Li #define FRAC_MUL16(a,b) ((16384+((opus_int32)(opus_int16)(a)*(opus_int16)(b)))>>15)
45*a58d3d2aSXin Li
46*a58d3d2aSXin Li unsigned isqrt32(opus_uint32 _val);
47*a58d3d2aSXin Li
48*a58d3d2aSXin Li /* CELT doesn't need it for fixed-point, by analysis.c does. */
49*a58d3d2aSXin Li #if !defined(FIXED_POINT) || defined(ANALYSIS_C)
50*a58d3d2aSXin Li #define cA 0.43157974f
51*a58d3d2aSXin Li #define cB 0.67848403f
52*a58d3d2aSXin Li #define cC 0.08595542f
53*a58d3d2aSXin Li #define cE ((float)PI/2)
fast_atan2f(float y,float x)54*a58d3d2aSXin Li static OPUS_INLINE float fast_atan2f(float y, float x) {
55*a58d3d2aSXin Li float x2, y2;
56*a58d3d2aSXin Li x2 = x*x;
57*a58d3d2aSXin Li y2 = y*y;
58*a58d3d2aSXin Li /* For very small values, we don't care about the answer, so
59*a58d3d2aSXin Li we can just return 0. */
60*a58d3d2aSXin Li if (x2 + y2 < 1e-18f)
61*a58d3d2aSXin Li {
62*a58d3d2aSXin Li return 0;
63*a58d3d2aSXin Li }
64*a58d3d2aSXin Li if(x2<y2){
65*a58d3d2aSXin Li float den = (y2 + cB*x2) * (y2 + cC*x2);
66*a58d3d2aSXin Li return -x*y*(y2 + cA*x2) / den + (y<0 ? -cE : cE);
67*a58d3d2aSXin Li }else{
68*a58d3d2aSXin Li float den = (x2 + cB*y2) * (x2 + cC*y2);
69*a58d3d2aSXin Li return x*y*(x2 + cA*y2) / den + (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
70*a58d3d2aSXin Li }
71*a58d3d2aSXin Li }
72*a58d3d2aSXin Li #undef cA
73*a58d3d2aSXin Li #undef cB
74*a58d3d2aSXin Li #undef cC
75*a58d3d2aSXin Li #undef cE
76*a58d3d2aSXin Li #endif
77*a58d3d2aSXin Li
78*a58d3d2aSXin Li
79*a58d3d2aSXin Li #ifndef OVERRIDE_CELT_MAXABS16
celt_maxabs16(const opus_val16 * x,int len)80*a58d3d2aSXin Li static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len)
81*a58d3d2aSXin Li {
82*a58d3d2aSXin Li int i;
83*a58d3d2aSXin Li opus_val16 maxval = 0;
84*a58d3d2aSXin Li opus_val16 minval = 0;
85*a58d3d2aSXin Li for (i=0;i<len;i++)
86*a58d3d2aSXin Li {
87*a58d3d2aSXin Li maxval = MAX16(maxval, x[i]);
88*a58d3d2aSXin Li minval = MIN16(minval, x[i]);
89*a58d3d2aSXin Li }
90*a58d3d2aSXin Li return MAX32(EXTEND32(maxval),-EXTEND32(minval));
91*a58d3d2aSXin Li }
92*a58d3d2aSXin Li #endif
93*a58d3d2aSXin Li
94*a58d3d2aSXin Li #ifndef OVERRIDE_CELT_MAXABS32
95*a58d3d2aSXin Li #ifdef FIXED_POINT
celt_maxabs32(const opus_val32 * x,int len)96*a58d3d2aSXin Li static OPUS_INLINE opus_val32 celt_maxabs32(const opus_val32 *x, int len)
97*a58d3d2aSXin Li {
98*a58d3d2aSXin Li int i;
99*a58d3d2aSXin Li opus_val32 maxval = 0;
100*a58d3d2aSXin Li opus_val32 minval = 0;
101*a58d3d2aSXin Li for (i=0;i<len;i++)
102*a58d3d2aSXin Li {
103*a58d3d2aSXin Li maxval = MAX32(maxval, x[i]);
104*a58d3d2aSXin Li minval = MIN32(minval, x[i]);
105*a58d3d2aSXin Li }
106*a58d3d2aSXin Li return MAX32(maxval, -minval);
107*a58d3d2aSXin Li }
108*a58d3d2aSXin Li #else
109*a58d3d2aSXin Li #define celt_maxabs32(x,len) celt_maxabs16(x,len)
110*a58d3d2aSXin Li #endif
111*a58d3d2aSXin Li #endif
112*a58d3d2aSXin Li
113*a58d3d2aSXin Li
114*a58d3d2aSXin Li #ifndef FIXED_POINT
115*a58d3d2aSXin Li
116*a58d3d2aSXin Li #define celt_sqrt(x) ((float)sqrt(x))
117*a58d3d2aSXin Li #define celt_rsqrt(x) (1.f/celt_sqrt(x))
118*a58d3d2aSXin Li #define celt_rsqrt_norm(x) (celt_rsqrt(x))
119*a58d3d2aSXin Li #define celt_cos_norm(x) ((float)cos((.5f*PI)*(x)))
120*a58d3d2aSXin Li #define celt_rcp(x) (1.f/(x))
121*a58d3d2aSXin Li #define celt_div(a,b) ((a)/(b))
122*a58d3d2aSXin Li #define frac_div32(a,b) ((float)(a)/(b))
123*a58d3d2aSXin Li
124*a58d3d2aSXin Li #ifdef FLOAT_APPROX
125*a58d3d2aSXin Li
126*a58d3d2aSXin Li /* Note: This assumes radix-2 floating point with the exponent at bits 23..30 and an offset of 127
127*a58d3d2aSXin Li denorm, +/- inf and NaN are *not* handled */
128*a58d3d2aSXin Li
129*a58d3d2aSXin Li /** Base-2 log approximation (log2(x)). */
celt_log2(float x)130*a58d3d2aSXin Li static OPUS_INLINE float celt_log2(float x)
131*a58d3d2aSXin Li {
132*a58d3d2aSXin Li int integer;
133*a58d3d2aSXin Li float frac;
134*a58d3d2aSXin Li union {
135*a58d3d2aSXin Li float f;
136*a58d3d2aSXin Li opus_uint32 i;
137*a58d3d2aSXin Li } in;
138*a58d3d2aSXin Li in.f = x;
139*a58d3d2aSXin Li integer = (in.i>>23)-127;
140*a58d3d2aSXin Li in.i -= (opus_uint32)integer<<23;
141*a58d3d2aSXin Li frac = in.f - 1.5f;
142*a58d3d2aSXin Li frac = -0.41445418f + frac*(0.95909232f
143*a58d3d2aSXin Li + frac*(-0.33951290f + frac*0.16541097f));
144*a58d3d2aSXin Li return 1+integer+frac;
145*a58d3d2aSXin Li }
146*a58d3d2aSXin Li
147*a58d3d2aSXin Li /** Base-2 exponential approximation (2^x). */
celt_exp2(float x)148*a58d3d2aSXin Li static OPUS_INLINE float celt_exp2(float x)
149*a58d3d2aSXin Li {
150*a58d3d2aSXin Li int integer;
151*a58d3d2aSXin Li float frac;
152*a58d3d2aSXin Li union {
153*a58d3d2aSXin Li float f;
154*a58d3d2aSXin Li opus_uint32 i;
155*a58d3d2aSXin Li } res;
156*a58d3d2aSXin Li integer = (int)floor(x);
157*a58d3d2aSXin Li if (integer < -50)
158*a58d3d2aSXin Li return 0;
159*a58d3d2aSXin Li frac = x-integer;
160*a58d3d2aSXin Li /* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */
161*a58d3d2aSXin Li res.f = 0.99992522f + frac * (0.69583354f
162*a58d3d2aSXin Li + frac * (0.22606716f + 0.078024523f*frac));
163*a58d3d2aSXin Li res.i = (res.i + ((opus_uint32)integer<<23)) & 0x7fffffff;
164*a58d3d2aSXin Li return res.f;
165*a58d3d2aSXin Li }
166*a58d3d2aSXin Li
167*a58d3d2aSXin Li #else
168*a58d3d2aSXin Li #define celt_log2(x) ((float)(1.442695040888963387*log(x)))
169*a58d3d2aSXin Li #define celt_exp2(x) ((float)exp(0.6931471805599453094*(x)))
170*a58d3d2aSXin Li #endif
171*a58d3d2aSXin Li
172*a58d3d2aSXin Li #endif
173*a58d3d2aSXin Li
174*a58d3d2aSXin Li #ifdef FIXED_POINT
175*a58d3d2aSXin Li
176*a58d3d2aSXin Li #include "os_support.h"
177*a58d3d2aSXin Li
178*a58d3d2aSXin Li #ifndef OVERRIDE_CELT_ILOG2
179*a58d3d2aSXin Li /** Integer log in base2. Undefined for zero and negative numbers */
celt_ilog2(opus_int32 x)180*a58d3d2aSXin Li static OPUS_INLINE opus_int16 celt_ilog2(opus_int32 x)
181*a58d3d2aSXin Li {
182*a58d3d2aSXin Li celt_sig_assert(x>0);
183*a58d3d2aSXin Li return EC_ILOG(x)-1;
184*a58d3d2aSXin Li }
185*a58d3d2aSXin Li #endif
186*a58d3d2aSXin Li
187*a58d3d2aSXin Li
188*a58d3d2aSXin Li /** Integer log in base2. Defined for zero, but not for negative numbers */
celt_zlog2(opus_val32 x)189*a58d3d2aSXin Li static OPUS_INLINE opus_int16 celt_zlog2(opus_val32 x)
190*a58d3d2aSXin Li {
191*a58d3d2aSXin Li return x <= 0 ? 0 : celt_ilog2(x);
192*a58d3d2aSXin Li }
193*a58d3d2aSXin Li
194*a58d3d2aSXin Li opus_val16 celt_rsqrt_norm(opus_val32 x);
195*a58d3d2aSXin Li
196*a58d3d2aSXin Li opus_val32 celt_sqrt(opus_val32 x);
197*a58d3d2aSXin Li
198*a58d3d2aSXin Li opus_val16 celt_cos_norm(opus_val32 x);
199*a58d3d2aSXin Li
200*a58d3d2aSXin Li /** Base-2 logarithm approximation (log2(x)). (Q14 input, Q10 output) */
celt_log2(opus_val32 x)201*a58d3d2aSXin Li static OPUS_INLINE opus_val16 celt_log2(opus_val32 x)
202*a58d3d2aSXin Li {
203*a58d3d2aSXin Li int i;
204*a58d3d2aSXin Li opus_val16 n, frac;
205*a58d3d2aSXin Li /* -0.41509302963303146, 0.9609890551383969, -0.31836011537636605,
206*a58d3d2aSXin Li 0.15530808010959576, -0.08556153059057618 */
207*a58d3d2aSXin Li static const opus_val16 C[5] = {-6801+(1<<(13-DB_SHIFT)), 15746, -5217, 2545, -1401};
208*a58d3d2aSXin Li if (x==0)
209*a58d3d2aSXin Li return -32767;
210*a58d3d2aSXin Li i = celt_ilog2(x);
211*a58d3d2aSXin Li n = VSHR32(x,i-15)-32768-16384;
212*a58d3d2aSXin Li frac = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, C[4]))))))));
213*a58d3d2aSXin Li return SHL16(i-13,DB_SHIFT)+SHR16(frac,14-DB_SHIFT);
214*a58d3d2aSXin Li }
215*a58d3d2aSXin Li
216*a58d3d2aSXin Li /*
217*a58d3d2aSXin Li K0 = 1
218*a58d3d2aSXin Li K1 = log(2)
219*a58d3d2aSXin Li K2 = 3-4*log(2)
220*a58d3d2aSXin Li K3 = 3*log(2) - 2
221*a58d3d2aSXin Li */
222*a58d3d2aSXin Li #define D0 16383
223*a58d3d2aSXin Li #define D1 22804
224*a58d3d2aSXin Li #define D2 14819
225*a58d3d2aSXin Li #define D3 10204
226*a58d3d2aSXin Li
celt_exp2_frac(opus_val16 x)227*a58d3d2aSXin Li static OPUS_INLINE opus_val32 celt_exp2_frac(opus_val16 x)
228*a58d3d2aSXin Li {
229*a58d3d2aSXin Li opus_val16 frac;
230*a58d3d2aSXin Li frac = SHL16(x, 4);
231*a58d3d2aSXin Li return ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac))))));
232*a58d3d2aSXin Li }
233*a58d3d2aSXin Li
234*a58d3d2aSXin Li #undef D0
235*a58d3d2aSXin Li #undef D1
236*a58d3d2aSXin Li #undef D2
237*a58d3d2aSXin Li #undef D3
238*a58d3d2aSXin Li
239*a58d3d2aSXin Li /** Base-2 exponential approximation (2^x). (Q10 input, Q16 output) */
celt_exp2(opus_val16 x)240*a58d3d2aSXin Li static OPUS_INLINE opus_val32 celt_exp2(opus_val16 x)
241*a58d3d2aSXin Li {
242*a58d3d2aSXin Li int integer;
243*a58d3d2aSXin Li opus_val16 frac;
244*a58d3d2aSXin Li integer = SHR16(x,10);
245*a58d3d2aSXin Li if (integer>14)
246*a58d3d2aSXin Li return 0x7f000000;
247*a58d3d2aSXin Li else if (integer < -15)
248*a58d3d2aSXin Li return 0;
249*a58d3d2aSXin Li frac = celt_exp2_frac(x-SHL16(integer,10));
250*a58d3d2aSXin Li return VSHR32(EXTEND32(frac), -integer-2);
251*a58d3d2aSXin Li }
252*a58d3d2aSXin Li
253*a58d3d2aSXin Li opus_val32 celt_rcp(opus_val32 x);
254*a58d3d2aSXin Li
255*a58d3d2aSXin Li #define celt_div(a,b) MULT32_32_Q31((opus_val32)(a),celt_rcp(b))
256*a58d3d2aSXin Li
257*a58d3d2aSXin Li opus_val32 frac_div32(opus_val32 a, opus_val32 b);
258*a58d3d2aSXin Li
259*a58d3d2aSXin Li #define M1 32767
260*a58d3d2aSXin Li #define M2 -21
261*a58d3d2aSXin Li #define M3 -11943
262*a58d3d2aSXin Li #define M4 4936
263*a58d3d2aSXin Li
264*a58d3d2aSXin Li /* Atan approximation using a 4th order polynomial. Input is in Q15 format
265*a58d3d2aSXin Li and normalized by pi/4. Output is in Q15 format */
celt_atan01(opus_val16 x)266*a58d3d2aSXin Li static OPUS_INLINE opus_val16 celt_atan01(opus_val16 x)
267*a58d3d2aSXin Li {
268*a58d3d2aSXin Li return MULT16_16_P15(x, ADD32(M1, MULT16_16_P15(x, ADD32(M2, MULT16_16_P15(x, ADD32(M3, MULT16_16_P15(M4, x)))))));
269*a58d3d2aSXin Li }
270*a58d3d2aSXin Li
271*a58d3d2aSXin Li #undef M1
272*a58d3d2aSXin Li #undef M2
273*a58d3d2aSXin Li #undef M3
274*a58d3d2aSXin Li #undef M4
275*a58d3d2aSXin Li
276*a58d3d2aSXin Li /* atan2() approximation valid for positive input values */
celt_atan2p(opus_val16 y,opus_val16 x)277*a58d3d2aSXin Li static OPUS_INLINE opus_val16 celt_atan2p(opus_val16 y, opus_val16 x)
278*a58d3d2aSXin Li {
279*a58d3d2aSXin Li if (y < x)
280*a58d3d2aSXin Li {
281*a58d3d2aSXin Li opus_val32 arg;
282*a58d3d2aSXin Li arg = celt_div(SHL32(EXTEND32(y),15),x);
283*a58d3d2aSXin Li if (arg >= 32767)
284*a58d3d2aSXin Li arg = 32767;
285*a58d3d2aSXin Li return SHR16(celt_atan01(EXTRACT16(arg)),1);
286*a58d3d2aSXin Li } else {
287*a58d3d2aSXin Li opus_val32 arg;
288*a58d3d2aSXin Li arg = celt_div(SHL32(EXTEND32(x),15),y);
289*a58d3d2aSXin Li if (arg >= 32767)
290*a58d3d2aSXin Li arg = 32767;
291*a58d3d2aSXin Li return 25736-SHR16(celt_atan01(EXTRACT16(arg)),1);
292*a58d3d2aSXin Li }
293*a58d3d2aSXin Li }
294*a58d3d2aSXin Li
295*a58d3d2aSXin Li #endif /* FIXED_POINT */
296*a58d3d2aSXin Li #endif /* MATHOPS_H */
297