1*a58d3d2aSXin Li /* Copyright (c) 2007-2008 CSIRO
2*a58d3d2aSXin Li Copyright (c) 2007-2009 Xiph.Org Foundation
3*a58d3d2aSXin Li Written by Jean-Marc Valin */
4*a58d3d2aSXin Li /**
5*a58d3d2aSXin Li @file pitch.c
6*a58d3d2aSXin Li @brief Pitch analysis
7*a58d3d2aSXin Li */
8*a58d3d2aSXin Li
9*a58d3d2aSXin Li /*
10*a58d3d2aSXin Li Redistribution and use in source and binary forms, with or without
11*a58d3d2aSXin Li modification, are permitted provided that the following conditions
12*a58d3d2aSXin Li are met:
13*a58d3d2aSXin Li
14*a58d3d2aSXin Li - Redistributions of source code must retain the above copyright
15*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer.
16*a58d3d2aSXin Li
17*a58d3d2aSXin Li - Redistributions in binary form must reproduce the above copyright
18*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer in the
19*a58d3d2aSXin Li documentation and/or other materials provided with the distribution.
20*a58d3d2aSXin Li
21*a58d3d2aSXin Li THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22*a58d3d2aSXin Li ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23*a58d3d2aSXin Li LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24*a58d3d2aSXin Li A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25*a58d3d2aSXin Li OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26*a58d3d2aSXin Li EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27*a58d3d2aSXin Li PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28*a58d3d2aSXin Li PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29*a58d3d2aSXin Li LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30*a58d3d2aSXin Li NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31*a58d3d2aSXin Li SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32*a58d3d2aSXin Li */
33*a58d3d2aSXin Li
34*a58d3d2aSXin Li #ifdef HAVE_CONFIG_H
35*a58d3d2aSXin Li #include "config.h"
36*a58d3d2aSXin Li #endif
37*a58d3d2aSXin Li
38*a58d3d2aSXin Li #include "pitch.h"
39*a58d3d2aSXin Li #include "os_support.h"
40*a58d3d2aSXin Li #include "modes.h"
41*a58d3d2aSXin Li #include "stack_alloc.h"
42*a58d3d2aSXin Li #include "mathops.h"
43*a58d3d2aSXin Li #include "celt_lpc.h"
44*a58d3d2aSXin Li
find_best_pitch(opus_val32 * xcorr,opus_val16 * y,int len,int max_pitch,int * best_pitch,int yshift,opus_val32 maxcorr)45*a58d3d2aSXin Li static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
46*a58d3d2aSXin Li int max_pitch, int *best_pitch
47*a58d3d2aSXin Li #ifdef FIXED_POINT
48*a58d3d2aSXin Li , int yshift, opus_val32 maxcorr
49*a58d3d2aSXin Li #endif
50*a58d3d2aSXin Li )
51*a58d3d2aSXin Li {
52*a58d3d2aSXin Li int i, j;
53*a58d3d2aSXin Li opus_val32 Syy=1;
54*a58d3d2aSXin Li opus_val16 best_num[2];
55*a58d3d2aSXin Li opus_val32 best_den[2];
56*a58d3d2aSXin Li #ifdef FIXED_POINT
57*a58d3d2aSXin Li int xshift;
58*a58d3d2aSXin Li
59*a58d3d2aSXin Li xshift = celt_ilog2(maxcorr)-14;
60*a58d3d2aSXin Li #endif
61*a58d3d2aSXin Li
62*a58d3d2aSXin Li best_num[0] = -1;
63*a58d3d2aSXin Li best_num[1] = -1;
64*a58d3d2aSXin Li best_den[0] = 0;
65*a58d3d2aSXin Li best_den[1] = 0;
66*a58d3d2aSXin Li best_pitch[0] = 0;
67*a58d3d2aSXin Li best_pitch[1] = 1;
68*a58d3d2aSXin Li for (j=0;j<len;j++)
69*a58d3d2aSXin Li Syy = ADD32(Syy, SHR32(MULT16_16(y[j],y[j]), yshift));
70*a58d3d2aSXin Li for (i=0;i<max_pitch;i++)
71*a58d3d2aSXin Li {
72*a58d3d2aSXin Li if (xcorr[i]>0)
73*a58d3d2aSXin Li {
74*a58d3d2aSXin Li opus_val16 num;
75*a58d3d2aSXin Li opus_val32 xcorr16;
76*a58d3d2aSXin Li xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift));
77*a58d3d2aSXin Li #ifndef FIXED_POINT
78*a58d3d2aSXin Li /* Considering the range of xcorr16, this should avoid both underflows
79*a58d3d2aSXin Li and overflows (inf) when squaring xcorr16 */
80*a58d3d2aSXin Li xcorr16 *= 1e-12f;
81*a58d3d2aSXin Li #endif
82*a58d3d2aSXin Li num = MULT16_16_Q15(xcorr16,xcorr16);
83*a58d3d2aSXin Li if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))
84*a58d3d2aSXin Li {
85*a58d3d2aSXin Li if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy))
86*a58d3d2aSXin Li {
87*a58d3d2aSXin Li best_num[1] = best_num[0];
88*a58d3d2aSXin Li best_den[1] = best_den[0];
89*a58d3d2aSXin Li best_pitch[1] = best_pitch[0];
90*a58d3d2aSXin Li best_num[0] = num;
91*a58d3d2aSXin Li best_den[0] = Syy;
92*a58d3d2aSXin Li best_pitch[0] = i;
93*a58d3d2aSXin Li } else {
94*a58d3d2aSXin Li best_num[1] = num;
95*a58d3d2aSXin Li best_den[1] = Syy;
96*a58d3d2aSXin Li best_pitch[1] = i;
97*a58d3d2aSXin Li }
98*a58d3d2aSXin Li }
99*a58d3d2aSXin Li }
100*a58d3d2aSXin Li Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift);
101*a58d3d2aSXin Li Syy = MAX32(1, Syy);
102*a58d3d2aSXin Li }
103*a58d3d2aSXin Li }
104*a58d3d2aSXin Li
celt_fir5(opus_val16 * x,const opus_val16 * num,int N)105*a58d3d2aSXin Li static void celt_fir5(opus_val16 *x,
106*a58d3d2aSXin Li const opus_val16 *num,
107*a58d3d2aSXin Li int N)
108*a58d3d2aSXin Li {
109*a58d3d2aSXin Li int i;
110*a58d3d2aSXin Li opus_val16 num0, num1, num2, num3, num4;
111*a58d3d2aSXin Li opus_val32 mem0, mem1, mem2, mem3, mem4;
112*a58d3d2aSXin Li num0=num[0];
113*a58d3d2aSXin Li num1=num[1];
114*a58d3d2aSXin Li num2=num[2];
115*a58d3d2aSXin Li num3=num[3];
116*a58d3d2aSXin Li num4=num[4];
117*a58d3d2aSXin Li mem0=0;
118*a58d3d2aSXin Li mem1=0;
119*a58d3d2aSXin Li mem2=0;
120*a58d3d2aSXin Li mem3=0;
121*a58d3d2aSXin Li mem4=0;
122*a58d3d2aSXin Li for (i=0;i<N;i++)
123*a58d3d2aSXin Li {
124*a58d3d2aSXin Li opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
125*a58d3d2aSXin Li sum = MAC16_16(sum,num0,mem0);
126*a58d3d2aSXin Li sum = MAC16_16(sum,num1,mem1);
127*a58d3d2aSXin Li sum = MAC16_16(sum,num2,mem2);
128*a58d3d2aSXin Li sum = MAC16_16(sum,num3,mem3);
129*a58d3d2aSXin Li sum = MAC16_16(sum,num4,mem4);
130*a58d3d2aSXin Li mem4 = mem3;
131*a58d3d2aSXin Li mem3 = mem2;
132*a58d3d2aSXin Li mem2 = mem1;
133*a58d3d2aSXin Li mem1 = mem0;
134*a58d3d2aSXin Li mem0 = x[i];
135*a58d3d2aSXin Li x[i] = ROUND16(sum, SIG_SHIFT);
136*a58d3d2aSXin Li }
137*a58d3d2aSXin Li }
138*a58d3d2aSXin Li
139*a58d3d2aSXin Li
pitch_downsample(celt_sig * OPUS_RESTRICT x[],opus_val16 * OPUS_RESTRICT x_lp,int len,int C,int arch)140*a58d3d2aSXin Li void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
141*a58d3d2aSXin Li int len, int C, int arch)
142*a58d3d2aSXin Li {
143*a58d3d2aSXin Li int i;
144*a58d3d2aSXin Li opus_val32 ac[5];
145*a58d3d2aSXin Li opus_val16 tmp=Q15ONE;
146*a58d3d2aSXin Li opus_val16 lpc[4];
147*a58d3d2aSXin Li opus_val16 lpc2[5];
148*a58d3d2aSXin Li opus_val16 c1 = QCONST16(.8f,15);
149*a58d3d2aSXin Li #ifdef FIXED_POINT
150*a58d3d2aSXin Li int shift;
151*a58d3d2aSXin Li opus_val32 maxabs = celt_maxabs32(x[0], len);
152*a58d3d2aSXin Li if (C==2)
153*a58d3d2aSXin Li {
154*a58d3d2aSXin Li opus_val32 maxabs_1 = celt_maxabs32(x[1], len);
155*a58d3d2aSXin Li maxabs = MAX32(maxabs, maxabs_1);
156*a58d3d2aSXin Li }
157*a58d3d2aSXin Li if (maxabs<1)
158*a58d3d2aSXin Li maxabs=1;
159*a58d3d2aSXin Li shift = celt_ilog2(maxabs)-10;
160*a58d3d2aSXin Li if (shift<0)
161*a58d3d2aSXin Li shift=0;
162*a58d3d2aSXin Li if (C==2)
163*a58d3d2aSXin Li shift++;
164*a58d3d2aSXin Li for (i=1;i<len>>1;i++)
165*a58d3d2aSXin Li x_lp[i] = SHR32(x[0][(2*i-1)], shift+2) + SHR32(x[0][(2*i+1)], shift+2) + SHR32(x[0][2*i], shift+1);
166*a58d3d2aSXin Li x_lp[0] = SHR32(x[0][1], shift+2) + SHR32(x[0][0], shift+1);
167*a58d3d2aSXin Li if (C==2)
168*a58d3d2aSXin Li {
169*a58d3d2aSXin Li for (i=1;i<len>>1;i++)
170*a58d3d2aSXin Li x_lp[i] += SHR32(x[1][(2*i-1)], shift+2) + SHR32(x[1][(2*i+1)], shift+2) + SHR32(x[1][2*i], shift+1);
171*a58d3d2aSXin Li x_lp[0] += SHR32(x[1][1], shift+2) + SHR32(x[1][0], shift+1);
172*a58d3d2aSXin Li }
173*a58d3d2aSXin Li #else
174*a58d3d2aSXin Li for (i=1;i<len>>1;i++)
175*a58d3d2aSXin Li x_lp[i] = .25f*x[0][(2*i-1)] + .25f*x[0][(2*i+1)] + .5f*x[0][2*i];
176*a58d3d2aSXin Li x_lp[0] = .25f*x[0][1] + .5f*x[0][0];
177*a58d3d2aSXin Li if (C==2)
178*a58d3d2aSXin Li {
179*a58d3d2aSXin Li for (i=1;i<len>>1;i++)
180*a58d3d2aSXin Li x_lp[i] += .25f*x[1][(2*i-1)] + .25f*x[1][(2*i+1)] + .5f*x[1][2*i];
181*a58d3d2aSXin Li x_lp[0] += .25f*x[1][1] + .5f*x[1][0];
182*a58d3d2aSXin Li }
183*a58d3d2aSXin Li #endif
184*a58d3d2aSXin Li _celt_autocorr(x_lp, ac, NULL, 0,
185*a58d3d2aSXin Li 4, len>>1, arch);
186*a58d3d2aSXin Li
187*a58d3d2aSXin Li /* Noise floor -40 dB */
188*a58d3d2aSXin Li #ifdef FIXED_POINT
189*a58d3d2aSXin Li ac[0] += SHR32(ac[0],13);
190*a58d3d2aSXin Li #else
191*a58d3d2aSXin Li ac[0] *= 1.0001f;
192*a58d3d2aSXin Li #endif
193*a58d3d2aSXin Li /* Lag windowing */
194*a58d3d2aSXin Li for (i=1;i<=4;i++)
195*a58d3d2aSXin Li {
196*a58d3d2aSXin Li /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
197*a58d3d2aSXin Li #ifdef FIXED_POINT
198*a58d3d2aSXin Li ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);
199*a58d3d2aSXin Li #else
200*a58d3d2aSXin Li ac[i] -= ac[i]*(.008f*i)*(.008f*i);
201*a58d3d2aSXin Li #endif
202*a58d3d2aSXin Li }
203*a58d3d2aSXin Li
204*a58d3d2aSXin Li _celt_lpc(lpc, ac, 4);
205*a58d3d2aSXin Li for (i=0;i<4;i++)
206*a58d3d2aSXin Li {
207*a58d3d2aSXin Li tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);
208*a58d3d2aSXin Li lpc[i] = MULT16_16_Q15(lpc[i], tmp);
209*a58d3d2aSXin Li }
210*a58d3d2aSXin Li /* Add a zero */
211*a58d3d2aSXin Li lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT);
212*a58d3d2aSXin Li lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]);
213*a58d3d2aSXin Li lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);
214*a58d3d2aSXin Li lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);
215*a58d3d2aSXin Li lpc2[4] = MULT16_16_Q15(c1,lpc[3]);
216*a58d3d2aSXin Li celt_fir5(x_lp, lpc2, len>>1);
217*a58d3d2aSXin Li }
218*a58d3d2aSXin Li
219*a58d3d2aSXin Li /* Pure C implementation. */
220*a58d3d2aSXin Li #ifdef FIXED_POINT
221*a58d3d2aSXin Li opus_val32
222*a58d3d2aSXin Li #else
223*a58d3d2aSXin Li void
224*a58d3d2aSXin Li #endif
celt_pitch_xcorr_c(const opus_val16 * _x,const opus_val16 * _y,opus_val32 * xcorr,int len,int max_pitch,int arch)225*a58d3d2aSXin Li celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
226*a58d3d2aSXin Li opus_val32 *xcorr, int len, int max_pitch, int arch)
227*a58d3d2aSXin Li {
228*a58d3d2aSXin Li
229*a58d3d2aSXin Li #if 0 /* This is a simple version of the pitch correlation that should work
230*a58d3d2aSXin Li well on DSPs like Blackfin and TI C5x/C6x */
231*a58d3d2aSXin Li int i, j;
232*a58d3d2aSXin Li #ifdef FIXED_POINT
233*a58d3d2aSXin Li opus_val32 maxcorr=1;
234*a58d3d2aSXin Li #endif
235*a58d3d2aSXin Li #if !defined(OVERRIDE_PITCH_XCORR)
236*a58d3d2aSXin Li (void)arch;
237*a58d3d2aSXin Li #endif
238*a58d3d2aSXin Li for (i=0;i<max_pitch;i++)
239*a58d3d2aSXin Li {
240*a58d3d2aSXin Li opus_val32 sum = 0;
241*a58d3d2aSXin Li for (j=0;j<len;j++)
242*a58d3d2aSXin Li sum = MAC16_16(sum, _x[j], _y[i+j]);
243*a58d3d2aSXin Li xcorr[i] = sum;
244*a58d3d2aSXin Li #ifdef FIXED_POINT
245*a58d3d2aSXin Li maxcorr = MAX32(maxcorr, sum);
246*a58d3d2aSXin Li #endif
247*a58d3d2aSXin Li }
248*a58d3d2aSXin Li #ifdef FIXED_POINT
249*a58d3d2aSXin Li return maxcorr;
250*a58d3d2aSXin Li #endif
251*a58d3d2aSXin Li
252*a58d3d2aSXin Li #else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
253*a58d3d2aSXin Li int i;
254*a58d3d2aSXin Li /*The EDSP version requires that max_pitch is at least 1, and that _x is
255*a58d3d2aSXin Li 32-bit aligned.
256*a58d3d2aSXin Li Since it's hard to put asserts in assembly, put them here.*/
257*a58d3d2aSXin Li #ifdef FIXED_POINT
258*a58d3d2aSXin Li opus_val32 maxcorr=1;
259*a58d3d2aSXin Li #endif
260*a58d3d2aSXin Li celt_assert(max_pitch>0);
261*a58d3d2aSXin Li celt_sig_assert(((size_t)_x&3)==0);
262*a58d3d2aSXin Li for (i=0;i<max_pitch-3;i+=4)
263*a58d3d2aSXin Li {
264*a58d3d2aSXin Li opus_val32 sum[4]={0,0,0,0};
265*a58d3d2aSXin Li #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
266*a58d3d2aSXin Li {
267*a58d3d2aSXin Li opus_val32 sum_c[4]={0,0,0,0};
268*a58d3d2aSXin Li xcorr_kernel_c(_x, _y+i, sum_c, len);
269*a58d3d2aSXin Li #endif
270*a58d3d2aSXin Li xcorr_kernel(_x, _y+i, sum, len, arch);
271*a58d3d2aSXin Li #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
272*a58d3d2aSXin Li celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
273*a58d3d2aSXin Li }
274*a58d3d2aSXin Li #endif
275*a58d3d2aSXin Li xcorr[i]=sum[0];
276*a58d3d2aSXin Li xcorr[i+1]=sum[1];
277*a58d3d2aSXin Li xcorr[i+2]=sum[2];
278*a58d3d2aSXin Li xcorr[i+3]=sum[3];
279*a58d3d2aSXin Li #ifdef FIXED_POINT
280*a58d3d2aSXin Li sum[0] = MAX32(sum[0], sum[1]);
281*a58d3d2aSXin Li sum[2] = MAX32(sum[2], sum[3]);
282*a58d3d2aSXin Li sum[0] = MAX32(sum[0], sum[2]);
283*a58d3d2aSXin Li maxcorr = MAX32(maxcorr, sum[0]);
284*a58d3d2aSXin Li #endif
285*a58d3d2aSXin Li }
286*a58d3d2aSXin Li /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
287*a58d3d2aSXin Li for (;i<max_pitch;i++)
288*a58d3d2aSXin Li {
289*a58d3d2aSXin Li opus_val32 sum;
290*a58d3d2aSXin Li sum = celt_inner_prod(_x, _y+i, len, arch);
291*a58d3d2aSXin Li xcorr[i] = sum;
292*a58d3d2aSXin Li #ifdef FIXED_POINT
293*a58d3d2aSXin Li maxcorr = MAX32(maxcorr, sum);
294*a58d3d2aSXin Li #endif
295*a58d3d2aSXin Li }
296*a58d3d2aSXin Li #ifdef FIXED_POINT
297*a58d3d2aSXin Li return maxcorr;
298*a58d3d2aSXin Li #endif
299*a58d3d2aSXin Li #endif
300*a58d3d2aSXin Li }
301*a58d3d2aSXin Li
pitch_search(const opus_val16 * OPUS_RESTRICT x_lp,opus_val16 * OPUS_RESTRICT y,int len,int max_pitch,int * pitch,int arch)302*a58d3d2aSXin Li void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
303*a58d3d2aSXin Li int len, int max_pitch, int *pitch, int arch)
304*a58d3d2aSXin Li {
305*a58d3d2aSXin Li int i, j;
306*a58d3d2aSXin Li int lag;
307*a58d3d2aSXin Li int best_pitch[2]={0,0};
308*a58d3d2aSXin Li VARDECL(opus_val16, x_lp4);
309*a58d3d2aSXin Li VARDECL(opus_val16, y_lp4);
310*a58d3d2aSXin Li VARDECL(opus_val32, xcorr);
311*a58d3d2aSXin Li #ifdef FIXED_POINT
312*a58d3d2aSXin Li opus_val32 maxcorr;
313*a58d3d2aSXin Li opus_val32 xmax, ymax;
314*a58d3d2aSXin Li int shift=0;
315*a58d3d2aSXin Li #endif
316*a58d3d2aSXin Li int offset;
317*a58d3d2aSXin Li
318*a58d3d2aSXin Li SAVE_STACK;
319*a58d3d2aSXin Li
320*a58d3d2aSXin Li celt_assert(len>0);
321*a58d3d2aSXin Li celt_assert(max_pitch>0);
322*a58d3d2aSXin Li lag = len+max_pitch;
323*a58d3d2aSXin Li
324*a58d3d2aSXin Li ALLOC(x_lp4, len>>2, opus_val16);
325*a58d3d2aSXin Li ALLOC(y_lp4, lag>>2, opus_val16);
326*a58d3d2aSXin Li ALLOC(xcorr, max_pitch>>1, opus_val32);
327*a58d3d2aSXin Li
328*a58d3d2aSXin Li /* Downsample by 2 again */
329*a58d3d2aSXin Li for (j=0;j<len>>2;j++)
330*a58d3d2aSXin Li x_lp4[j] = x_lp[2*j];
331*a58d3d2aSXin Li for (j=0;j<lag>>2;j++)
332*a58d3d2aSXin Li y_lp4[j] = y[2*j];
333*a58d3d2aSXin Li
334*a58d3d2aSXin Li #ifdef FIXED_POINT
335*a58d3d2aSXin Li xmax = celt_maxabs16(x_lp4, len>>2);
336*a58d3d2aSXin Li ymax = celt_maxabs16(y_lp4, lag>>2);
337*a58d3d2aSXin Li shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11;
338*a58d3d2aSXin Li if (shift>0)
339*a58d3d2aSXin Li {
340*a58d3d2aSXin Li for (j=0;j<len>>2;j++)
341*a58d3d2aSXin Li x_lp4[j] = SHR16(x_lp4[j], shift);
342*a58d3d2aSXin Li for (j=0;j<lag>>2;j++)
343*a58d3d2aSXin Li y_lp4[j] = SHR16(y_lp4[j], shift);
344*a58d3d2aSXin Li /* Use double the shift for a MAC */
345*a58d3d2aSXin Li shift *= 2;
346*a58d3d2aSXin Li } else {
347*a58d3d2aSXin Li shift = 0;
348*a58d3d2aSXin Li }
349*a58d3d2aSXin Li #endif
350*a58d3d2aSXin Li
351*a58d3d2aSXin Li /* Coarse search with 4x decimation */
352*a58d3d2aSXin Li
353*a58d3d2aSXin Li #ifdef FIXED_POINT
354*a58d3d2aSXin Li maxcorr =
355*a58d3d2aSXin Li #endif
356*a58d3d2aSXin Li celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2, arch);
357*a58d3d2aSXin Li
358*a58d3d2aSXin Li find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
359*a58d3d2aSXin Li #ifdef FIXED_POINT
360*a58d3d2aSXin Li , 0, maxcorr
361*a58d3d2aSXin Li #endif
362*a58d3d2aSXin Li );
363*a58d3d2aSXin Li
364*a58d3d2aSXin Li /* Finer search with 2x decimation */
365*a58d3d2aSXin Li #ifdef FIXED_POINT
366*a58d3d2aSXin Li maxcorr=1;
367*a58d3d2aSXin Li #endif
368*a58d3d2aSXin Li for (i=0;i<max_pitch>>1;i++)
369*a58d3d2aSXin Li {
370*a58d3d2aSXin Li opus_val32 sum;
371*a58d3d2aSXin Li xcorr[i] = 0;
372*a58d3d2aSXin Li if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)
373*a58d3d2aSXin Li continue;
374*a58d3d2aSXin Li #ifdef FIXED_POINT
375*a58d3d2aSXin Li sum = 0;
376*a58d3d2aSXin Li for (j=0;j<len>>1;j++)
377*a58d3d2aSXin Li sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
378*a58d3d2aSXin Li #else
379*a58d3d2aSXin Li sum = celt_inner_prod(x_lp, y+i, len>>1, arch);
380*a58d3d2aSXin Li #endif
381*a58d3d2aSXin Li xcorr[i] = MAX32(-1, sum);
382*a58d3d2aSXin Li #ifdef FIXED_POINT
383*a58d3d2aSXin Li maxcorr = MAX32(maxcorr, sum);
384*a58d3d2aSXin Li #endif
385*a58d3d2aSXin Li }
386*a58d3d2aSXin Li find_best_pitch(xcorr, y, len>>1, max_pitch>>1, best_pitch
387*a58d3d2aSXin Li #ifdef FIXED_POINT
388*a58d3d2aSXin Li , shift+1, maxcorr
389*a58d3d2aSXin Li #endif
390*a58d3d2aSXin Li );
391*a58d3d2aSXin Li
392*a58d3d2aSXin Li /* Refine by pseudo-interpolation */
393*a58d3d2aSXin Li if (best_pitch[0]>0 && best_pitch[0]<(max_pitch>>1)-1)
394*a58d3d2aSXin Li {
395*a58d3d2aSXin Li opus_val32 a, b, c;
396*a58d3d2aSXin Li a = xcorr[best_pitch[0]-1];
397*a58d3d2aSXin Li b = xcorr[best_pitch[0]];
398*a58d3d2aSXin Li c = xcorr[best_pitch[0]+1];
399*a58d3d2aSXin Li if ((c-a) > MULT16_32_Q15(QCONST16(.7f,15),b-a))
400*a58d3d2aSXin Li offset = 1;
401*a58d3d2aSXin Li else if ((a-c) > MULT16_32_Q15(QCONST16(.7f,15),b-c))
402*a58d3d2aSXin Li offset = -1;
403*a58d3d2aSXin Li else
404*a58d3d2aSXin Li offset = 0;
405*a58d3d2aSXin Li } else {
406*a58d3d2aSXin Li offset = 0;
407*a58d3d2aSXin Li }
408*a58d3d2aSXin Li *pitch = 2*best_pitch[0]-offset;
409*a58d3d2aSXin Li
410*a58d3d2aSXin Li RESTORE_STACK;
411*a58d3d2aSXin Li }
412*a58d3d2aSXin Li
413*a58d3d2aSXin Li #ifdef FIXED_POINT
compute_pitch_gain(opus_val32 xy,opus_val32 xx,opus_val32 yy)414*a58d3d2aSXin Li static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
415*a58d3d2aSXin Li {
416*a58d3d2aSXin Li opus_val32 x2y2;
417*a58d3d2aSXin Li int sx, sy, shift;
418*a58d3d2aSXin Li opus_val32 g;
419*a58d3d2aSXin Li opus_val16 den;
420*a58d3d2aSXin Li if (xy == 0 || xx == 0 || yy == 0)
421*a58d3d2aSXin Li return 0;
422*a58d3d2aSXin Li sx = celt_ilog2(xx)-14;
423*a58d3d2aSXin Li sy = celt_ilog2(yy)-14;
424*a58d3d2aSXin Li shift = sx + sy;
425*a58d3d2aSXin Li x2y2 = SHR32(MULT16_16(VSHR32(xx, sx), VSHR32(yy, sy)), 14);
426*a58d3d2aSXin Li if (shift & 1) {
427*a58d3d2aSXin Li if (x2y2 < 32768)
428*a58d3d2aSXin Li {
429*a58d3d2aSXin Li x2y2 <<= 1;
430*a58d3d2aSXin Li shift--;
431*a58d3d2aSXin Li } else {
432*a58d3d2aSXin Li x2y2 >>= 1;
433*a58d3d2aSXin Li shift++;
434*a58d3d2aSXin Li }
435*a58d3d2aSXin Li }
436*a58d3d2aSXin Li den = celt_rsqrt_norm(x2y2);
437*a58d3d2aSXin Li g = MULT16_32_Q15(den, xy);
438*a58d3d2aSXin Li g = VSHR32(g, (shift>>1)-1);
439*a58d3d2aSXin Li return EXTRACT16(MIN32(g, Q15ONE));
440*a58d3d2aSXin Li }
441*a58d3d2aSXin Li #else
compute_pitch_gain(opus_val32 xy,opus_val32 xx,opus_val32 yy)442*a58d3d2aSXin Li static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
443*a58d3d2aSXin Li {
444*a58d3d2aSXin Li return xy/celt_sqrt(1+xx*yy);
445*a58d3d2aSXin Li }
446*a58d3d2aSXin Li #endif
447*a58d3d2aSXin Li
448*a58d3d2aSXin Li static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
remove_doubling(opus_val16 * x,int maxperiod,int minperiod,int N,int * T0_,int prev_period,opus_val16 prev_gain,int arch)449*a58d3d2aSXin Li opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
450*a58d3d2aSXin Li int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch)
451*a58d3d2aSXin Li {
452*a58d3d2aSXin Li int k, i, T, T0;
453*a58d3d2aSXin Li opus_val16 g, g0;
454*a58d3d2aSXin Li opus_val16 pg;
455*a58d3d2aSXin Li opus_val32 xy,xx,yy,xy2;
456*a58d3d2aSXin Li opus_val32 xcorr[3];
457*a58d3d2aSXin Li opus_val32 best_xy, best_yy;
458*a58d3d2aSXin Li int offset;
459*a58d3d2aSXin Li int minperiod0;
460*a58d3d2aSXin Li VARDECL(opus_val32, yy_lookup);
461*a58d3d2aSXin Li SAVE_STACK;
462*a58d3d2aSXin Li
463*a58d3d2aSXin Li minperiod0 = minperiod;
464*a58d3d2aSXin Li maxperiod /= 2;
465*a58d3d2aSXin Li minperiod /= 2;
466*a58d3d2aSXin Li *T0_ /= 2;
467*a58d3d2aSXin Li prev_period /= 2;
468*a58d3d2aSXin Li N /= 2;
469*a58d3d2aSXin Li x += maxperiod;
470*a58d3d2aSXin Li if (*T0_>=maxperiod)
471*a58d3d2aSXin Li *T0_=maxperiod-1;
472*a58d3d2aSXin Li
473*a58d3d2aSXin Li T = T0 = *T0_;
474*a58d3d2aSXin Li ALLOC(yy_lookup, maxperiod+1, opus_val32);
475*a58d3d2aSXin Li dual_inner_prod(x, x, x-T0, N, &xx, &xy, arch);
476*a58d3d2aSXin Li yy_lookup[0] = xx;
477*a58d3d2aSXin Li yy=xx;
478*a58d3d2aSXin Li for (i=1;i<=maxperiod;i++)
479*a58d3d2aSXin Li {
480*a58d3d2aSXin Li yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);
481*a58d3d2aSXin Li yy_lookup[i] = MAX32(0, yy);
482*a58d3d2aSXin Li }
483*a58d3d2aSXin Li yy = yy_lookup[T0];
484*a58d3d2aSXin Li best_xy = xy;
485*a58d3d2aSXin Li best_yy = yy;
486*a58d3d2aSXin Li g = g0 = compute_pitch_gain(xy, xx, yy);
487*a58d3d2aSXin Li /* Look for any pitch at T/k */
488*a58d3d2aSXin Li for (k=2;k<=15;k++)
489*a58d3d2aSXin Li {
490*a58d3d2aSXin Li int T1, T1b;
491*a58d3d2aSXin Li opus_val16 g1;
492*a58d3d2aSXin Li opus_val16 cont=0;
493*a58d3d2aSXin Li opus_val16 thresh;
494*a58d3d2aSXin Li T1 = celt_udiv(2*T0+k, 2*k);
495*a58d3d2aSXin Li if (T1 < minperiod)
496*a58d3d2aSXin Li break;
497*a58d3d2aSXin Li /* Look for another strong correlation at T1b */
498*a58d3d2aSXin Li if (k==2)
499*a58d3d2aSXin Li {
500*a58d3d2aSXin Li if (T1+T0>maxperiod)
501*a58d3d2aSXin Li T1b = T0;
502*a58d3d2aSXin Li else
503*a58d3d2aSXin Li T1b = T0+T1;
504*a58d3d2aSXin Li } else
505*a58d3d2aSXin Li {
506*a58d3d2aSXin Li T1b = celt_udiv(2*second_check[k]*T0+k, 2*k);
507*a58d3d2aSXin Li }
508*a58d3d2aSXin Li dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch);
509*a58d3d2aSXin Li xy = HALF32(xy + xy2);
510*a58d3d2aSXin Li yy = HALF32(yy_lookup[T1] + yy_lookup[T1b]);
511*a58d3d2aSXin Li g1 = compute_pitch_gain(xy, xx, yy);
512*a58d3d2aSXin Li if (abs(T1-prev_period)<=1)
513*a58d3d2aSXin Li cont = prev_gain;
514*a58d3d2aSXin Li else if (abs(T1-prev_period)<=2 && 5*k*k < T0)
515*a58d3d2aSXin Li cont = HALF16(prev_gain);
516*a58d3d2aSXin Li else
517*a58d3d2aSXin Li cont = 0;
518*a58d3d2aSXin Li thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
519*a58d3d2aSXin Li /* Bias against very high pitch (very short period) to avoid false-positives
520*a58d3d2aSXin Li due to short-term correlation */
521*a58d3d2aSXin Li if (T1<3*minperiod)
522*a58d3d2aSXin Li thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont);
523*a58d3d2aSXin Li else if (T1<2*minperiod)
524*a58d3d2aSXin Li thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont);
525*a58d3d2aSXin Li if (g1 > thresh)
526*a58d3d2aSXin Li {
527*a58d3d2aSXin Li best_xy = xy;
528*a58d3d2aSXin Li best_yy = yy;
529*a58d3d2aSXin Li T = T1;
530*a58d3d2aSXin Li g = g1;
531*a58d3d2aSXin Li }
532*a58d3d2aSXin Li }
533*a58d3d2aSXin Li best_xy = MAX32(0, best_xy);
534*a58d3d2aSXin Li if (best_yy <= best_xy)
535*a58d3d2aSXin Li pg = Q15ONE;
536*a58d3d2aSXin Li else
537*a58d3d2aSXin Li pg = SHR32(frac_div32(best_xy,best_yy+1),16);
538*a58d3d2aSXin Li
539*a58d3d2aSXin Li for (k=0;k<3;k++)
540*a58d3d2aSXin Li xcorr[k] = celt_inner_prod(x, x-(T+k-1), N, arch);
541*a58d3d2aSXin Li if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))
542*a58d3d2aSXin Li offset = 1;
543*a58d3d2aSXin Li else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))
544*a58d3d2aSXin Li offset = -1;
545*a58d3d2aSXin Li else
546*a58d3d2aSXin Li offset = 0;
547*a58d3d2aSXin Li if (pg > g)
548*a58d3d2aSXin Li pg = g;
549*a58d3d2aSXin Li *T0_ = 2*T+offset;
550*a58d3d2aSXin Li
551*a58d3d2aSXin Li if (*T0_<minperiod0)
552*a58d3d2aSXin Li *T0_=minperiod0;
553*a58d3d2aSXin Li RESTORE_STACK;
554*a58d3d2aSXin Li return pg;
555*a58d3d2aSXin Li }
556