xref: /aosp_15_r20/external/libopus/celt/celt_lpc.c (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li /* Copyright (c) 2009-2010 Xiph.Org Foundation
2*a58d3d2aSXin Li    Written by Jean-Marc Valin */
3*a58d3d2aSXin Li /*
4*a58d3d2aSXin Li    Redistribution and use in source and binary forms, with or without
5*a58d3d2aSXin Li    modification, are permitted provided that the following conditions
6*a58d3d2aSXin Li    are met:
7*a58d3d2aSXin Li 
8*a58d3d2aSXin Li    - Redistributions of source code must retain the above copyright
9*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer.
10*a58d3d2aSXin Li 
11*a58d3d2aSXin Li    - Redistributions in binary form must reproduce the above copyright
12*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer in the
13*a58d3d2aSXin Li    documentation and/or other materials provided with the distribution.
14*a58d3d2aSXin Li 
15*a58d3d2aSXin Li    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16*a58d3d2aSXin Li    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17*a58d3d2aSXin Li    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18*a58d3d2aSXin Li    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19*a58d3d2aSXin Li    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20*a58d3d2aSXin Li    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21*a58d3d2aSXin Li    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22*a58d3d2aSXin Li    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23*a58d3d2aSXin Li    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24*a58d3d2aSXin Li    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*a58d3d2aSXin Li    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*a58d3d2aSXin Li */
27*a58d3d2aSXin Li 
28*a58d3d2aSXin Li #ifdef HAVE_CONFIG_H
29*a58d3d2aSXin Li #include "config.h"
30*a58d3d2aSXin Li #endif
31*a58d3d2aSXin Li 
32*a58d3d2aSXin Li #include "celt_lpc.h"
33*a58d3d2aSXin Li #include "stack_alloc.h"
34*a58d3d2aSXin Li #include "mathops.h"
35*a58d3d2aSXin Li #include "pitch.h"
36*a58d3d2aSXin Li 
_celt_lpc(opus_val16 * _lpc,const opus_val32 * ac,int p)37*a58d3d2aSXin Li void _celt_lpc(
38*a58d3d2aSXin Li       opus_val16       *_lpc, /* out: [0...p-1] LPC coefficients      */
39*a58d3d2aSXin Li const opus_val32 *ac,  /* in:  [0...p] autocorrelation values  */
40*a58d3d2aSXin Li int          p
41*a58d3d2aSXin Li )
42*a58d3d2aSXin Li {
43*a58d3d2aSXin Li    int i, j;
44*a58d3d2aSXin Li    opus_val32 r;
45*a58d3d2aSXin Li    opus_val32 error = ac[0];
46*a58d3d2aSXin Li #ifdef FIXED_POINT
47*a58d3d2aSXin Li    opus_val32 lpc[CELT_LPC_ORDER];
48*a58d3d2aSXin Li #else
49*a58d3d2aSXin Li    float *lpc = _lpc;
50*a58d3d2aSXin Li #endif
51*a58d3d2aSXin Li 
52*a58d3d2aSXin Li    OPUS_CLEAR(lpc, p);
53*a58d3d2aSXin Li #ifdef FIXED_POINT
54*a58d3d2aSXin Li    if (ac[0] != 0)
55*a58d3d2aSXin Li #else
56*a58d3d2aSXin Li    if (ac[0] > 1e-10f)
57*a58d3d2aSXin Li #endif
58*a58d3d2aSXin Li    {
59*a58d3d2aSXin Li       for (i = 0; i < p; i++) {
60*a58d3d2aSXin Li          /* Sum up this iteration's reflection coefficient */
61*a58d3d2aSXin Li          opus_val32 rr = 0;
62*a58d3d2aSXin Li          for (j = 0; j < i; j++)
63*a58d3d2aSXin Li             rr += MULT32_32_Q31(lpc[j],ac[i - j]);
64*a58d3d2aSXin Li          rr += SHR32(ac[i + 1],6);
65*a58d3d2aSXin Li          r = -frac_div32(SHL32(rr,6), error);
66*a58d3d2aSXin Li          /*  Update LPC coefficients and total error */
67*a58d3d2aSXin Li          lpc[i] = SHR32(r,6);
68*a58d3d2aSXin Li          for (j = 0; j < (i+1)>>1; j++)
69*a58d3d2aSXin Li          {
70*a58d3d2aSXin Li             opus_val32 tmp1, tmp2;
71*a58d3d2aSXin Li             tmp1 = lpc[j];
72*a58d3d2aSXin Li             tmp2 = lpc[i-1-j];
73*a58d3d2aSXin Li             lpc[j]     = tmp1 + MULT32_32_Q31(r,tmp2);
74*a58d3d2aSXin Li             lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
75*a58d3d2aSXin Li          }
76*a58d3d2aSXin Li 
77*a58d3d2aSXin Li          error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
78*a58d3d2aSXin Li          /* Bail out once we get 30 dB gain */
79*a58d3d2aSXin Li #ifdef FIXED_POINT
80*a58d3d2aSXin Li          if (error<=SHR32(ac[0],10))
81*a58d3d2aSXin Li             break;
82*a58d3d2aSXin Li #else
83*a58d3d2aSXin Li          if (error<=.001f*ac[0])
84*a58d3d2aSXin Li             break;
85*a58d3d2aSXin Li #endif
86*a58d3d2aSXin Li       }
87*a58d3d2aSXin Li    }
88*a58d3d2aSXin Li #ifdef FIXED_POINT
89*a58d3d2aSXin Li    {
90*a58d3d2aSXin Li       /* Convert the int32 lpcs to int16 and ensure there are no wrap-arounds.
91*a58d3d2aSXin Li          This reuses the logic in silk_LPC_fit() and silk_bwexpander_32(). Any bug
92*a58d3d2aSXin Li          fixes should also be applied there. */
93*a58d3d2aSXin Li       int iter, idx = 0;
94*a58d3d2aSXin Li       opus_val32 maxabs, absval, chirp_Q16, chirp_minus_one_Q16;
95*a58d3d2aSXin Li 
96*a58d3d2aSXin Li       for (iter = 0; iter < 10; iter++) {
97*a58d3d2aSXin Li          maxabs = 0;
98*a58d3d2aSXin Li          for (i = 0; i < p; i++) {
99*a58d3d2aSXin Li             absval = ABS32(lpc[i]);
100*a58d3d2aSXin Li             if (absval > maxabs) {
101*a58d3d2aSXin Li                maxabs = absval;
102*a58d3d2aSXin Li                idx = i;
103*a58d3d2aSXin Li             }
104*a58d3d2aSXin Li          }
105*a58d3d2aSXin Li          maxabs = PSHR32(maxabs, 13);  /* Q25->Q12 */
106*a58d3d2aSXin Li 
107*a58d3d2aSXin Li          if (maxabs > 32767) {
108*a58d3d2aSXin Li             maxabs = MIN32(maxabs, 163838);
109*a58d3d2aSXin Li             chirp_Q16 = QCONST32(0.999, 16) - DIV32(SHL32(maxabs - 32767, 14),
110*a58d3d2aSXin Li                                                     SHR32(MULT32_32_32(maxabs, idx + 1), 2));
111*a58d3d2aSXin Li             chirp_minus_one_Q16 = chirp_Q16 - 65536;
112*a58d3d2aSXin Li 
113*a58d3d2aSXin Li             /* Apply bandwidth expansion. */
114*a58d3d2aSXin Li             for (i = 0; i < p - 1; i++) {
115*a58d3d2aSXin Li                lpc[i] = MULT32_32_Q16(chirp_Q16, lpc[i]);
116*a58d3d2aSXin Li                chirp_Q16 += PSHR32(MULT32_32_32(chirp_Q16, chirp_minus_one_Q16), 16);
117*a58d3d2aSXin Li             }
118*a58d3d2aSXin Li             lpc[p - 1] = MULT32_32_Q16(chirp_Q16, lpc[p - 1]);
119*a58d3d2aSXin Li          } else {
120*a58d3d2aSXin Li             break;
121*a58d3d2aSXin Li          }
122*a58d3d2aSXin Li       }
123*a58d3d2aSXin Li 
124*a58d3d2aSXin Li       if (iter == 10) {
125*a58d3d2aSXin Li          /* If the coeffs still do not fit into the 16 bit range after 10 iterations,
126*a58d3d2aSXin Li             fall back to the A(z)=1 filter. */
127*a58d3d2aSXin Li          OPUS_CLEAR(lpc, p);
128*a58d3d2aSXin Li          _lpc[0] = 4096;  /* Q12 */
129*a58d3d2aSXin Li       } else {
130*a58d3d2aSXin Li          for (i = 0; i < p; i++) {
131*a58d3d2aSXin Li             _lpc[i] = EXTRACT16(PSHR32(lpc[i], 13));  /* Q25->Q12 */
132*a58d3d2aSXin Li          }
133*a58d3d2aSXin Li       }
134*a58d3d2aSXin Li    }
135*a58d3d2aSXin Li #endif
136*a58d3d2aSXin Li }
137*a58d3d2aSXin Li 
138*a58d3d2aSXin Li 
celt_fir_c(const opus_val16 * x,const opus_val16 * num,opus_val16 * y,int N,int ord,int arch)139*a58d3d2aSXin Li void celt_fir_c(
140*a58d3d2aSXin Li          const opus_val16 *x,
141*a58d3d2aSXin Li          const opus_val16 *num,
142*a58d3d2aSXin Li          opus_val16 *y,
143*a58d3d2aSXin Li          int N,
144*a58d3d2aSXin Li          int ord,
145*a58d3d2aSXin Li          int arch)
146*a58d3d2aSXin Li {
147*a58d3d2aSXin Li    int i,j;
148*a58d3d2aSXin Li    VARDECL(opus_val16, rnum);
149*a58d3d2aSXin Li    SAVE_STACK;
150*a58d3d2aSXin Li    celt_assert(x != y);
151*a58d3d2aSXin Li    ALLOC(rnum, ord, opus_val16);
152*a58d3d2aSXin Li    for(i=0;i<ord;i++)
153*a58d3d2aSXin Li       rnum[i] = num[ord-i-1];
154*a58d3d2aSXin Li    for (i=0;i<N-3;i+=4)
155*a58d3d2aSXin Li    {
156*a58d3d2aSXin Li       opus_val32 sum[4];
157*a58d3d2aSXin Li       sum[0] = SHL32(EXTEND32(x[i  ]), SIG_SHIFT);
158*a58d3d2aSXin Li       sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
159*a58d3d2aSXin Li       sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
160*a58d3d2aSXin Li       sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
161*a58d3d2aSXin Li #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
162*a58d3d2aSXin Li       {
163*a58d3d2aSXin Li          opus_val32 sum_c[4];
164*a58d3d2aSXin Li          memcpy(sum_c, sum, sizeof(sum_c));
165*a58d3d2aSXin Li          xcorr_kernel_c(rnum, x+i-ord, sum_c, ord);
166*a58d3d2aSXin Li #endif
167*a58d3d2aSXin Li          xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
168*a58d3d2aSXin Li #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
169*a58d3d2aSXin Li          celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
170*a58d3d2aSXin Li       }
171*a58d3d2aSXin Li #endif
172*a58d3d2aSXin Li       y[i  ] = SROUND16(sum[0], SIG_SHIFT);
173*a58d3d2aSXin Li       y[i+1] = SROUND16(sum[1], SIG_SHIFT);
174*a58d3d2aSXin Li       y[i+2] = SROUND16(sum[2], SIG_SHIFT);
175*a58d3d2aSXin Li       y[i+3] = SROUND16(sum[3], SIG_SHIFT);
176*a58d3d2aSXin Li    }
177*a58d3d2aSXin Li    for (;i<N;i++)
178*a58d3d2aSXin Li    {
179*a58d3d2aSXin Li       opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
180*a58d3d2aSXin Li       for (j=0;j<ord;j++)
181*a58d3d2aSXin Li          sum = MAC16_16(sum,rnum[j],x[i+j-ord]);
182*a58d3d2aSXin Li       y[i] = SROUND16(sum, SIG_SHIFT);
183*a58d3d2aSXin Li    }
184*a58d3d2aSXin Li    RESTORE_STACK;
185*a58d3d2aSXin Li }
186*a58d3d2aSXin Li 
celt_iir(const opus_val32 * _x,const opus_val16 * den,opus_val32 * _y,int N,int ord,opus_val16 * mem,int arch)187*a58d3d2aSXin Li void celt_iir(const opus_val32 *_x,
188*a58d3d2aSXin Li          const opus_val16 *den,
189*a58d3d2aSXin Li          opus_val32 *_y,
190*a58d3d2aSXin Li          int N,
191*a58d3d2aSXin Li          int ord,
192*a58d3d2aSXin Li          opus_val16 *mem,
193*a58d3d2aSXin Li          int arch)
194*a58d3d2aSXin Li {
195*a58d3d2aSXin Li #ifdef SMALL_FOOTPRINT
196*a58d3d2aSXin Li    int i,j;
197*a58d3d2aSXin Li    (void)arch;
198*a58d3d2aSXin Li    for (i=0;i<N;i++)
199*a58d3d2aSXin Li    {
200*a58d3d2aSXin Li       opus_val32 sum = _x[i];
201*a58d3d2aSXin Li       for (j=0;j<ord;j++)
202*a58d3d2aSXin Li       {
203*a58d3d2aSXin Li          sum -= MULT16_16(den[j],mem[j]);
204*a58d3d2aSXin Li       }
205*a58d3d2aSXin Li       for (j=ord-1;j>=1;j--)
206*a58d3d2aSXin Li       {
207*a58d3d2aSXin Li          mem[j]=mem[j-1];
208*a58d3d2aSXin Li       }
209*a58d3d2aSXin Li       mem[0] = SROUND16(sum, SIG_SHIFT);
210*a58d3d2aSXin Li       _y[i] = sum;
211*a58d3d2aSXin Li    }
212*a58d3d2aSXin Li #else
213*a58d3d2aSXin Li    int i,j;
214*a58d3d2aSXin Li    VARDECL(opus_val16, rden);
215*a58d3d2aSXin Li    VARDECL(opus_val16, y);
216*a58d3d2aSXin Li    SAVE_STACK;
217*a58d3d2aSXin Li 
218*a58d3d2aSXin Li    celt_assert((ord&3)==0);
219*a58d3d2aSXin Li    ALLOC(rden, ord, opus_val16);
220*a58d3d2aSXin Li    ALLOC(y, N+ord, opus_val16);
221*a58d3d2aSXin Li    for(i=0;i<ord;i++)
222*a58d3d2aSXin Li       rden[i] = den[ord-i-1];
223*a58d3d2aSXin Li    for(i=0;i<ord;i++)
224*a58d3d2aSXin Li       y[i] = -mem[ord-i-1];
225*a58d3d2aSXin Li    for(;i<N+ord;i++)
226*a58d3d2aSXin Li       y[i]=0;
227*a58d3d2aSXin Li    for (i=0;i<N-3;i+=4)
228*a58d3d2aSXin Li    {
229*a58d3d2aSXin Li       /* Unroll by 4 as if it were an FIR filter */
230*a58d3d2aSXin Li       opus_val32 sum[4];
231*a58d3d2aSXin Li       sum[0]=_x[i];
232*a58d3d2aSXin Li       sum[1]=_x[i+1];
233*a58d3d2aSXin Li       sum[2]=_x[i+2];
234*a58d3d2aSXin Li       sum[3]=_x[i+3];
235*a58d3d2aSXin Li #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
236*a58d3d2aSXin Li       {
237*a58d3d2aSXin Li          opus_val32 sum_c[4];
238*a58d3d2aSXin Li          memcpy(sum_c, sum, sizeof(sum_c));
239*a58d3d2aSXin Li          xcorr_kernel_c(rden, y+i, sum_c, ord);
240*a58d3d2aSXin Li #endif
241*a58d3d2aSXin Li          xcorr_kernel(rden, y+i, sum, ord, arch);
242*a58d3d2aSXin Li #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
243*a58d3d2aSXin Li          celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
244*a58d3d2aSXin Li       }
245*a58d3d2aSXin Li #endif
246*a58d3d2aSXin Li       /* Patch up the result to compensate for the fact that this is an IIR */
247*a58d3d2aSXin Li       y[i+ord  ] = -SROUND16(sum[0],SIG_SHIFT);
248*a58d3d2aSXin Li       _y[i  ] = sum[0];
249*a58d3d2aSXin Li       sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
250*a58d3d2aSXin Li       y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
251*a58d3d2aSXin Li       _y[i+1] = sum[1];
252*a58d3d2aSXin Li       sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
253*a58d3d2aSXin Li       sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
254*a58d3d2aSXin Li       y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
255*a58d3d2aSXin Li       _y[i+2] = sum[2];
256*a58d3d2aSXin Li 
257*a58d3d2aSXin Li       sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
258*a58d3d2aSXin Li       sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
259*a58d3d2aSXin Li       sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
260*a58d3d2aSXin Li       y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
261*a58d3d2aSXin Li       _y[i+3] = sum[3];
262*a58d3d2aSXin Li    }
263*a58d3d2aSXin Li    for (;i<N;i++)
264*a58d3d2aSXin Li    {
265*a58d3d2aSXin Li       opus_val32 sum = _x[i];
266*a58d3d2aSXin Li       for (j=0;j<ord;j++)
267*a58d3d2aSXin Li          sum -= MULT16_16(rden[j],y[i+j]);
268*a58d3d2aSXin Li       y[i+ord] = SROUND16(sum,SIG_SHIFT);
269*a58d3d2aSXin Li       _y[i] = sum;
270*a58d3d2aSXin Li    }
271*a58d3d2aSXin Li    for(i=0;i<ord;i++)
272*a58d3d2aSXin Li       mem[i] = _y[N-i-1];
273*a58d3d2aSXin Li    RESTORE_STACK;
274*a58d3d2aSXin Li #endif
275*a58d3d2aSXin Li }
276*a58d3d2aSXin Li 
_celt_autocorr(const opus_val16 * x,opus_val32 * ac,const opus_val16 * window,int overlap,int lag,int n,int arch)277*a58d3d2aSXin Li int _celt_autocorr(
278*a58d3d2aSXin Li                    const opus_val16 *x,   /*  in: [0...n-1] samples x   */
279*a58d3d2aSXin Li                    opus_val32       *ac,  /* out: [0...lag-1] ac values */
280*a58d3d2aSXin Li                    const opus_val16       *window,
281*a58d3d2aSXin Li                    int          overlap,
282*a58d3d2aSXin Li                    int          lag,
283*a58d3d2aSXin Li                    int          n,
284*a58d3d2aSXin Li                    int          arch
285*a58d3d2aSXin Li                   )
286*a58d3d2aSXin Li {
287*a58d3d2aSXin Li    opus_val32 d;
288*a58d3d2aSXin Li    int i, k;
289*a58d3d2aSXin Li    int fastN=n-lag;
290*a58d3d2aSXin Li    int shift;
291*a58d3d2aSXin Li    const opus_val16 *xptr;
292*a58d3d2aSXin Li    VARDECL(opus_val16, xx);
293*a58d3d2aSXin Li    SAVE_STACK;
294*a58d3d2aSXin Li    ALLOC(xx, n, opus_val16);
295*a58d3d2aSXin Li    celt_assert(n>0);
296*a58d3d2aSXin Li    celt_assert(overlap>=0);
297*a58d3d2aSXin Li    if (overlap == 0)
298*a58d3d2aSXin Li    {
299*a58d3d2aSXin Li       xptr = x;
300*a58d3d2aSXin Li    } else {
301*a58d3d2aSXin Li       for (i=0;i<n;i++)
302*a58d3d2aSXin Li          xx[i] = x[i];
303*a58d3d2aSXin Li       for (i=0;i<overlap;i++)
304*a58d3d2aSXin Li       {
305*a58d3d2aSXin Li          xx[i] = MULT16_16_Q15(x[i],window[i]);
306*a58d3d2aSXin Li          xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
307*a58d3d2aSXin Li       }
308*a58d3d2aSXin Li       xptr = xx;
309*a58d3d2aSXin Li    }
310*a58d3d2aSXin Li    shift=0;
311*a58d3d2aSXin Li #ifdef FIXED_POINT
312*a58d3d2aSXin Li    {
313*a58d3d2aSXin Li       opus_val32 ac0;
314*a58d3d2aSXin Li       ac0 = 1+(n<<7);
315*a58d3d2aSXin Li       if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
316*a58d3d2aSXin Li       for(i=(n&1);i<n;i+=2)
317*a58d3d2aSXin Li       {
318*a58d3d2aSXin Li          ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
319*a58d3d2aSXin Li          ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
320*a58d3d2aSXin Li       }
321*a58d3d2aSXin Li 
322*a58d3d2aSXin Li       shift = celt_ilog2(ac0)-30+10;
323*a58d3d2aSXin Li       shift = (shift)/2;
324*a58d3d2aSXin Li       if (shift>0)
325*a58d3d2aSXin Li       {
326*a58d3d2aSXin Li          for(i=0;i<n;i++)
327*a58d3d2aSXin Li             xx[i] = PSHR32(xptr[i], shift);
328*a58d3d2aSXin Li          xptr = xx;
329*a58d3d2aSXin Li       } else
330*a58d3d2aSXin Li          shift = 0;
331*a58d3d2aSXin Li    }
332*a58d3d2aSXin Li #endif
333*a58d3d2aSXin Li    celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
334*a58d3d2aSXin Li    for (k=0;k<=lag;k++)
335*a58d3d2aSXin Li    {
336*a58d3d2aSXin Li       for (i = k+fastN, d = 0; i < n; i++)
337*a58d3d2aSXin Li          d = MAC16_16(d, xptr[i], xptr[i-k]);
338*a58d3d2aSXin Li       ac[k] += d;
339*a58d3d2aSXin Li    }
340*a58d3d2aSXin Li #ifdef FIXED_POINT
341*a58d3d2aSXin Li    shift = 2*shift;
342*a58d3d2aSXin Li    if (shift<=0)
343*a58d3d2aSXin Li       ac[0] += SHL32((opus_int32)1, -shift);
344*a58d3d2aSXin Li    if (ac[0] < 268435456)
345*a58d3d2aSXin Li    {
346*a58d3d2aSXin Li       int shift2 = 29 - EC_ILOG(ac[0]);
347*a58d3d2aSXin Li       for (i=0;i<=lag;i++)
348*a58d3d2aSXin Li          ac[i] = SHL32(ac[i], shift2);
349*a58d3d2aSXin Li       shift -= shift2;
350*a58d3d2aSXin Li    } else if (ac[0] >= 536870912)
351*a58d3d2aSXin Li    {
352*a58d3d2aSXin Li       int shift2=1;
353*a58d3d2aSXin Li       if (ac[0] >= 1073741824)
354*a58d3d2aSXin Li          shift2++;
355*a58d3d2aSXin Li       for (i=0;i<=lag;i++)
356*a58d3d2aSXin Li          ac[i] = SHR32(ac[i], shift2);
357*a58d3d2aSXin Li       shift += shift2;
358*a58d3d2aSXin Li    }
359*a58d3d2aSXin Li #endif
360*a58d3d2aSXin Li 
361*a58d3d2aSXin Li    RESTORE_STACK;
362*a58d3d2aSXin Li    return shift;
363*a58d3d2aSXin Li }
364