xref: /aosp_15_r20/external/pffft/test_pffastconv.c (revision 3f1979aa0d7ad34fcf3763de7b7b8f8cd67e5bdd)
1*3f1979aaSAndroid Build Coastguard Worker /*
2*3f1979aaSAndroid Build Coastguard Worker   Copyright (c) 2013 Julien Pommier.
3*3f1979aaSAndroid Build Coastguard Worker   Copyright (c) 2019  Hayati Ayguen ( [email protected] )
4*3f1979aaSAndroid Build Coastguard Worker  */
5*3f1979aaSAndroid Build Coastguard Worker 
6*3f1979aaSAndroid Build Coastguard Worker #define _WANT_SNAN  1
7*3f1979aaSAndroid Build Coastguard Worker 
8*3f1979aaSAndroid Build Coastguard Worker #include "pffft.h"
9*3f1979aaSAndroid Build Coastguard Worker #include "pffastconv.h"
10*3f1979aaSAndroid Build Coastguard Worker 
11*3f1979aaSAndroid Build Coastguard Worker #include <math.h>
12*3f1979aaSAndroid Build Coastguard Worker #include <float.h>
13*3f1979aaSAndroid Build Coastguard Worker #include <limits.h>
14*3f1979aaSAndroid Build Coastguard Worker #include <inttypes.h>
15*3f1979aaSAndroid Build Coastguard Worker #include <stdio.h>
16*3f1979aaSAndroid Build Coastguard Worker #include <stdlib.h>
17*3f1979aaSAndroid Build Coastguard Worker #include <time.h>
18*3f1979aaSAndroid Build Coastguard Worker #include <assert.h>
19*3f1979aaSAndroid Build Coastguard Worker #include <string.h>
20*3f1979aaSAndroid Build Coastguard Worker 
21*3f1979aaSAndroid Build Coastguard Worker #ifdef HAVE_SYS_TIMES
22*3f1979aaSAndroid Build Coastguard Worker #  include <sys/times.h>
23*3f1979aaSAndroid Build Coastguard Worker #  include <unistd.h>
24*3f1979aaSAndroid Build Coastguard Worker #endif
25*3f1979aaSAndroid Build Coastguard Worker 
26*3f1979aaSAndroid Build Coastguard Worker /*
27*3f1979aaSAndroid Build Coastguard Worker    vector support macros: the rest of the code is independant of
28*3f1979aaSAndroid Build Coastguard Worker    SSE/Altivec/NEON -- adding support for other platforms with 4-element
29*3f1979aaSAndroid Build Coastguard Worker    vectors should be limited to these macros
30*3f1979aaSAndroid Build Coastguard Worker */
31*3f1979aaSAndroid Build Coastguard Worker #if 0
32*3f1979aaSAndroid Build Coastguard Worker #include "simd/pf_float.h"
33*3f1979aaSAndroid Build Coastguard Worker #endif
34*3f1979aaSAndroid Build Coastguard Worker 
35*3f1979aaSAndroid Build Coastguard Worker #if defined(_MSC_VER)
36*3f1979aaSAndroid Build Coastguard Worker #  define RESTRICT __restrict
37*3f1979aaSAndroid Build Coastguard Worker #elif defined(__GNUC__)
38*3f1979aaSAndroid Build Coastguard Worker #  define RESTRICT __restrict
39*3f1979aaSAndroid Build Coastguard Worker #else
40*3f1979aaSAndroid Build Coastguard Worker #  define RESTRICT
41*3f1979aaSAndroid Build Coastguard Worker #endif
42*3f1979aaSAndroid Build Coastguard Worker 
43*3f1979aaSAndroid Build Coastguard Worker 
44*3f1979aaSAndroid Build Coastguard Worker #if defined(_MSC_VER)
45*3f1979aaSAndroid Build Coastguard Worker #pragma warning( disable : 4244 )
46*3f1979aaSAndroid Build Coastguard Worker #endif
47*3f1979aaSAndroid Build Coastguard Worker 
48*3f1979aaSAndroid Build Coastguard Worker 
49*3f1979aaSAndroid Build Coastguard Worker #ifdef SNANF
50*3f1979aaSAndroid Build Coastguard Worker   #define INVALID_FLOAT_VAL  SNANF
51*3f1979aaSAndroid Build Coastguard Worker #elif defined(SNAN)
52*3f1979aaSAndroid Build Coastguard Worker   #define INVALID_FLOAT_VAL  SNAN
53*3f1979aaSAndroid Build Coastguard Worker #elif defined(NAN)
54*3f1979aaSAndroid Build Coastguard Worker   #define INVALID_FLOAT_VAL  NAN
55*3f1979aaSAndroid Build Coastguard Worker #elif defined(INFINITY)
56*3f1979aaSAndroid Build Coastguard Worker   #define INVALID_FLOAT_VAL  INFINITY
57*3f1979aaSAndroid Build Coastguard Worker #else
58*3f1979aaSAndroid Build Coastguard Worker   #define INVALID_FLOAT_VAL  FLT_MAX
59*3f1979aaSAndroid Build Coastguard Worker #endif
60*3f1979aaSAndroid Build Coastguard Worker 
61*3f1979aaSAndroid Build Coastguard Worker 
62*3f1979aaSAndroid Build Coastguard Worker #if defined(HAVE_SYS_TIMES)
uclock_sec(void)63*3f1979aaSAndroid Build Coastguard Worker   inline double uclock_sec(void) {
64*3f1979aaSAndroid Build Coastguard Worker     static double ttclk = 0.;
65*3f1979aaSAndroid Build Coastguard Worker     struct tms t;
66*3f1979aaSAndroid Build Coastguard Worker     if (ttclk == 0.)
67*3f1979aaSAndroid Build Coastguard Worker       ttclk = sysconf(_SC_CLK_TCK);
68*3f1979aaSAndroid Build Coastguard Worker     times(&t);
69*3f1979aaSAndroid Build Coastguard Worker     /* use only the user time of this process - not realtime, which depends on OS-scheduler .. */
70*3f1979aaSAndroid Build Coastguard Worker     return ((double)t.tms_utime)) / ttclk;
71*3f1979aaSAndroid Build Coastguard Worker   }
72*3f1979aaSAndroid Build Coastguard Worker # else
73*3f1979aaSAndroid Build Coastguard Worker   double uclock_sec(void)
74*3f1979aaSAndroid Build Coastguard Worker { return (double)clock()/(double)CLOCKS_PER_SEC; }
75*3f1979aaSAndroid Build Coastguard Worker #endif
76*3f1979aaSAndroid Build Coastguard Worker 
77*3f1979aaSAndroid Build Coastguard Worker 
78*3f1979aaSAndroid Build Coastguard Worker 
79*3f1979aaSAndroid Build Coastguard Worker typedef int            (*pfnConvolution)  (void * setup, const float * X, int len, float *Y, const float *Yref, int applyFlush);
80*3f1979aaSAndroid Build Coastguard Worker typedef void*          (*pfnConvSetup)    (float *Hfwd, int Nf, int * BlkLen, int flags);
81*3f1979aaSAndroid Build Coastguard Worker typedef pfnConvolution (*pfnGetConvFnPtr) (void * setup);
82*3f1979aaSAndroid Build Coastguard Worker typedef void           (*pfnConvDestroy)  (void * setup);
83*3f1979aaSAndroid Build Coastguard Worker 
84*3f1979aaSAndroid Build Coastguard Worker 
85*3f1979aaSAndroid Build Coastguard Worker struct ConvSetup
86*3f1979aaSAndroid Build Coastguard Worker {
87*3f1979aaSAndroid Build Coastguard Worker   pfnConvolution pfn;
88*3f1979aaSAndroid Build Coastguard Worker   int N;
89*3f1979aaSAndroid Build Coastguard Worker   int B;
90*3f1979aaSAndroid Build Coastguard Worker   float * H;
91*3f1979aaSAndroid Build Coastguard Worker   int flags;
92*3f1979aaSAndroid Build Coastguard Worker };
93*3f1979aaSAndroid Build Coastguard Worker 
94*3f1979aaSAndroid Build Coastguard Worker 
95*3f1979aaSAndroid Build Coastguard Worker void * convSetupRev( float * H, int N, int * BlkLen, int flags )
96*3f1979aaSAndroid Build Coastguard Worker {
97*3f1979aaSAndroid Build Coastguard Worker   struct ConvSetup * s = pffastconv_malloc( sizeof(struct ConvSetup) );
98*3f1979aaSAndroid Build Coastguard Worker   int i, Nr = N;
99*3f1979aaSAndroid Build Coastguard Worker   if (flags & PFFASTCONV_CPLX_INP_OUT)
100*3f1979aaSAndroid Build Coastguard Worker     Nr *= 2;
101*3f1979aaSAndroid Build Coastguard Worker   Nr += 4;
102*3f1979aaSAndroid Build Coastguard Worker   s->pfn = NULL;
103*3f1979aaSAndroid Build Coastguard Worker   s->N = N;
104*3f1979aaSAndroid Build Coastguard Worker   s->B = *BlkLen;
105*3f1979aaSAndroid Build Coastguard Worker   s->H = pffastconv_malloc((unsigned)Nr * sizeof(float));
106*3f1979aaSAndroid Build Coastguard Worker   s->flags = flags;
107*3f1979aaSAndroid Build Coastguard Worker   memset(s->H, 0, (unsigned)Nr * sizeof(float));
108*3f1979aaSAndroid Build Coastguard Worker   if (flags & PFFASTCONV_CPLX_INP_OUT)
109*3f1979aaSAndroid Build Coastguard Worker   {
110*3f1979aaSAndroid Build Coastguard Worker     for ( i = 0; i < N; ++i ) {
111*3f1979aaSAndroid Build Coastguard Worker       s->H[2*(N-1 -i)  ] = H[i];
112*3f1979aaSAndroid Build Coastguard Worker       s->H[2*(N-1 -i)+1] = H[i];
113*3f1979aaSAndroid Build Coastguard Worker     }
114*3f1979aaSAndroid Build Coastguard Worker     /* simpler detection of overruns */
115*3f1979aaSAndroid Build Coastguard Worker     s->H[ 2*N    ] = INVALID_FLOAT_VAL;
116*3f1979aaSAndroid Build Coastguard Worker     s->H[ 2*N +1 ] = INVALID_FLOAT_VAL;
117*3f1979aaSAndroid Build Coastguard Worker     s->H[ 2*N +2 ] = INVALID_FLOAT_VAL;
118*3f1979aaSAndroid Build Coastguard Worker     s->H[ 2*N +3 ] = INVALID_FLOAT_VAL;
119*3f1979aaSAndroid Build Coastguard Worker   }
120*3f1979aaSAndroid Build Coastguard Worker   else
121*3f1979aaSAndroid Build Coastguard Worker   {
122*3f1979aaSAndroid Build Coastguard Worker     for ( i = 0; i < N; ++i )
123*3f1979aaSAndroid Build Coastguard Worker       s->H[ N-1 -i ] = H[i];
124*3f1979aaSAndroid Build Coastguard Worker     /* simpler detection of overruns */
125*3f1979aaSAndroid Build Coastguard Worker     s->H[ N    ] = INVALID_FLOAT_VAL;
126*3f1979aaSAndroid Build Coastguard Worker     s->H[ N +1 ] = INVALID_FLOAT_VAL;
127*3f1979aaSAndroid Build Coastguard Worker     s->H[ N +2 ] = INVALID_FLOAT_VAL;
128*3f1979aaSAndroid Build Coastguard Worker     s->H[ N +3 ] = INVALID_FLOAT_VAL;
129*3f1979aaSAndroid Build Coastguard Worker   }
130*3f1979aaSAndroid Build Coastguard Worker   return s;
131*3f1979aaSAndroid Build Coastguard Worker }
132*3f1979aaSAndroid Build Coastguard Worker 
133*3f1979aaSAndroid Build Coastguard Worker void convDestroyRev( void * setup )
134*3f1979aaSAndroid Build Coastguard Worker {
135*3f1979aaSAndroid Build Coastguard Worker   struct ConvSetup * s = (struct ConvSetup*)setup;
136*3f1979aaSAndroid Build Coastguard Worker   pffastconv_free(s->H);
137*3f1979aaSAndroid Build Coastguard Worker   pffastconv_free(setup);
138*3f1979aaSAndroid Build Coastguard Worker }
139*3f1979aaSAndroid Build Coastguard Worker 
140*3f1979aaSAndroid Build Coastguard Worker 
141*3f1979aaSAndroid Build Coastguard Worker pfnConvolution ConvGetFnPtrRev( void * setup )
142*3f1979aaSAndroid Build Coastguard Worker {
143*3f1979aaSAndroid Build Coastguard Worker   struct ConvSetup * s = (struct ConvSetup*)setup;
144*3f1979aaSAndroid Build Coastguard Worker   if (!s)
145*3f1979aaSAndroid Build Coastguard Worker     return NULL;
146*3f1979aaSAndroid Build Coastguard Worker   return s->pfn;
147*3f1979aaSAndroid Build Coastguard Worker }
148*3f1979aaSAndroid Build Coastguard Worker 
149*3f1979aaSAndroid Build Coastguard Worker 
150*3f1979aaSAndroid Build Coastguard Worker void convSimdDestroy( void * setup )
151*3f1979aaSAndroid Build Coastguard Worker {
152*3f1979aaSAndroid Build Coastguard Worker   convDestroyRev(setup);
153*3f1979aaSAndroid Build Coastguard Worker }
154*3f1979aaSAndroid Build Coastguard Worker 
155*3f1979aaSAndroid Build Coastguard Worker 
156*3f1979aaSAndroid Build Coastguard Worker void * fastConvSetup( float * H, int N, int * BlkLen, int flags )
157*3f1979aaSAndroid Build Coastguard Worker {
158*3f1979aaSAndroid Build Coastguard Worker   void * p = pffastconv_new_setup( H, N, BlkLen, flags );
159*3f1979aaSAndroid Build Coastguard Worker   if (!p)
160*3f1979aaSAndroid Build Coastguard Worker     printf("fastConvSetup(N = %d, *BlkLen = %d, flags = %d) = NULL\n", N, *BlkLen, flags);
161*3f1979aaSAndroid Build Coastguard Worker   return p;
162*3f1979aaSAndroid Build Coastguard Worker }
163*3f1979aaSAndroid Build Coastguard Worker 
164*3f1979aaSAndroid Build Coastguard Worker 
165*3f1979aaSAndroid Build Coastguard Worker void fastConvDestroy( void * setup )
166*3f1979aaSAndroid Build Coastguard Worker {
167*3f1979aaSAndroid Build Coastguard Worker   pffastconv_destroy_setup( (PFFASTCONV_Setup*)setup );
168*3f1979aaSAndroid Build Coastguard Worker }
169*3f1979aaSAndroid Build Coastguard Worker 
170*3f1979aaSAndroid Build Coastguard Worker 
171*3f1979aaSAndroid Build Coastguard Worker 
172*3f1979aaSAndroid Build Coastguard Worker int slow_conv_R(void * setup, const float * input, int len, float *output, const float *Yref, int applyFlush)
173*3f1979aaSAndroid Build Coastguard Worker {
174*3f1979aaSAndroid Build Coastguard Worker   struct ConvSetup * p = (struct ConvSetup*)setup;
175*3f1979aaSAndroid Build Coastguard Worker   const float * RESTRICT X = input;
176*3f1979aaSAndroid Build Coastguard Worker   const float * RESTRICT Hrev = p->H;
177*3f1979aaSAndroid Build Coastguard Worker   float * RESTRICT Y = output;
178*3f1979aaSAndroid Build Coastguard Worker   const int Nr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * p->N;
179*3f1979aaSAndroid Build Coastguard Worker   const int lenNr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * (len - p->N);
180*3f1979aaSAndroid Build Coastguard Worker   int i, j;
181*3f1979aaSAndroid Build Coastguard Worker   (void)Yref;
182*3f1979aaSAndroid Build Coastguard Worker   (void)applyFlush;
183*3f1979aaSAndroid Build Coastguard Worker 
184*3f1979aaSAndroid Build Coastguard Worker   if (p->flags & PFFASTCONV_CPLX_INP_OUT)
185*3f1979aaSAndroid Build Coastguard Worker   {
186*3f1979aaSAndroid Build Coastguard Worker     for ( i = 0; i <= lenNr; i += 2 )
187*3f1979aaSAndroid Build Coastguard Worker     {
188*3f1979aaSAndroid Build Coastguard Worker       float sumRe = 0.0F, sumIm = 0.0F;
189*3f1979aaSAndroid Build Coastguard Worker       for ( j = 0; j < Nr; j += 2 )
190*3f1979aaSAndroid Build Coastguard Worker       {
191*3f1979aaSAndroid Build Coastguard Worker         sumRe += X[i+j  ] * Hrev[j];
192*3f1979aaSAndroid Build Coastguard Worker         sumIm += X[i+j+1] * Hrev[j+1];
193*3f1979aaSAndroid Build Coastguard Worker       }
194*3f1979aaSAndroid Build Coastguard Worker       Y[i  ] = sumRe;
195*3f1979aaSAndroid Build Coastguard Worker       Y[i+1] = sumIm;
196*3f1979aaSAndroid Build Coastguard Worker     }
197*3f1979aaSAndroid Build Coastguard Worker     return i/2;
198*3f1979aaSAndroid Build Coastguard Worker   }
199*3f1979aaSAndroid Build Coastguard Worker   else
200*3f1979aaSAndroid Build Coastguard Worker   {
201*3f1979aaSAndroid Build Coastguard Worker     for ( i = 0; i <= lenNr; ++i )
202*3f1979aaSAndroid Build Coastguard Worker     {
203*3f1979aaSAndroid Build Coastguard Worker       float sum = 0.0F;
204*3f1979aaSAndroid Build Coastguard Worker       for (j = 0; j < Nr; ++j )
205*3f1979aaSAndroid Build Coastguard Worker         sum += X[i+j]   * Hrev[j];
206*3f1979aaSAndroid Build Coastguard Worker       Y[i] = sum;
207*3f1979aaSAndroid Build Coastguard Worker     }
208*3f1979aaSAndroid Build Coastguard Worker     return i;
209*3f1979aaSAndroid Build Coastguard Worker   }
210*3f1979aaSAndroid Build Coastguard Worker }
211*3f1979aaSAndroid Build Coastguard Worker 
212*3f1979aaSAndroid Build Coastguard Worker 
213*3f1979aaSAndroid Build Coastguard Worker 
214*3f1979aaSAndroid Build Coastguard Worker int slow_conv_A(void * setup, const float * input, int len, float *output, const float *Yref, int applyFlush)
215*3f1979aaSAndroid Build Coastguard Worker {
216*3f1979aaSAndroid Build Coastguard Worker   float sum[4];
217*3f1979aaSAndroid Build Coastguard Worker   struct ConvSetup * p = (struct ConvSetup*)setup;
218*3f1979aaSAndroid Build Coastguard Worker   const float * RESTRICT X = input;
219*3f1979aaSAndroid Build Coastguard Worker   const float * RESTRICT Hrev = p->H;
220*3f1979aaSAndroid Build Coastguard Worker   float * RESTRICT Y = output;
221*3f1979aaSAndroid Build Coastguard Worker   const int Nr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * p->N;
222*3f1979aaSAndroid Build Coastguard Worker   const int lenNr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * (len - p->N);
223*3f1979aaSAndroid Build Coastguard Worker   int i, j;
224*3f1979aaSAndroid Build Coastguard Worker   (void)Yref;
225*3f1979aaSAndroid Build Coastguard Worker   (void)applyFlush;
226*3f1979aaSAndroid Build Coastguard Worker 
227*3f1979aaSAndroid Build Coastguard Worker   if (p->flags & PFFASTCONV_CPLX_INP_OUT)
228*3f1979aaSAndroid Build Coastguard Worker   {
229*3f1979aaSAndroid Build Coastguard Worker     if ( (Nr & 3) == 0 )
230*3f1979aaSAndroid Build Coastguard Worker     {
231*3f1979aaSAndroid Build Coastguard Worker       for ( i = 0; i <= lenNr; i += 2 )
232*3f1979aaSAndroid Build Coastguard Worker       {
233*3f1979aaSAndroid Build Coastguard Worker         sum[0] = sum[1] = sum[2] = sum[3] = 0.0F;
234*3f1979aaSAndroid Build Coastguard Worker         for (j = 0; j < Nr; j += 4 )
235*3f1979aaSAndroid Build Coastguard Worker         {
236*3f1979aaSAndroid Build Coastguard Worker           sum[0] += X[i+j]   * Hrev[j];
237*3f1979aaSAndroid Build Coastguard Worker           sum[1] += X[i+j+1] * Hrev[j+1];
238*3f1979aaSAndroid Build Coastguard Worker           sum[2] += X[i+j+2] * Hrev[j+2];
239*3f1979aaSAndroid Build Coastguard Worker           sum[3] += X[i+j+3] * Hrev[j+3];
240*3f1979aaSAndroid Build Coastguard Worker         }
241*3f1979aaSAndroid Build Coastguard Worker         Y[i  ] = sum[0] + sum[2];
242*3f1979aaSAndroid Build Coastguard Worker         Y[i+1] = sum[1] + sum[3];
243*3f1979aaSAndroid Build Coastguard Worker       }
244*3f1979aaSAndroid Build Coastguard Worker     }
245*3f1979aaSAndroid Build Coastguard Worker     else
246*3f1979aaSAndroid Build Coastguard Worker     {
247*3f1979aaSAndroid Build Coastguard Worker       const int M = Nr & (~3);
248*3f1979aaSAndroid Build Coastguard Worker       for ( i = 0; i <= lenNr; i += 2 )
249*3f1979aaSAndroid Build Coastguard Worker       {
250*3f1979aaSAndroid Build Coastguard Worker         float tailSumRe = 0.0F, tailSumIm = 0.0F;
251*3f1979aaSAndroid Build Coastguard Worker         sum[0] = sum[1] = sum[2] = sum[3] = 0.0F;
252*3f1979aaSAndroid Build Coastguard Worker         for (j = 0; j < M; j += 4 )
253*3f1979aaSAndroid Build Coastguard Worker         {
254*3f1979aaSAndroid Build Coastguard Worker           sum[0] += X[i+j  ] * Hrev[j  ];
255*3f1979aaSAndroid Build Coastguard Worker           sum[1] += X[i+j+1] * Hrev[j+1];
256*3f1979aaSAndroid Build Coastguard Worker           sum[2] += X[i+j+2] * Hrev[j+2];
257*3f1979aaSAndroid Build Coastguard Worker           sum[3] += X[i+j+3] * Hrev[j+3];
258*3f1979aaSAndroid Build Coastguard Worker         }
259*3f1979aaSAndroid Build Coastguard Worker         for ( ; j < Nr; j += 2 ) {
260*3f1979aaSAndroid Build Coastguard Worker           tailSumRe += X[i+j  ] * Hrev[j  ];
261*3f1979aaSAndroid Build Coastguard Worker           tailSumIm += X[i+j+1] * Hrev[j+1];
262*3f1979aaSAndroid Build Coastguard Worker         }
263*3f1979aaSAndroid Build Coastguard Worker         Y[i  ] = ( sum[0] + sum[2] ) + tailSumRe;
264*3f1979aaSAndroid Build Coastguard Worker         Y[i+1] = ( sum[1] + sum[3] ) + tailSumIm;
265*3f1979aaSAndroid Build Coastguard Worker       }
266*3f1979aaSAndroid Build Coastguard Worker     }
267*3f1979aaSAndroid Build Coastguard Worker     return i/2;
268*3f1979aaSAndroid Build Coastguard Worker   }
269*3f1979aaSAndroid Build Coastguard Worker   else
270*3f1979aaSAndroid Build Coastguard Worker   {
271*3f1979aaSAndroid Build Coastguard Worker     if ( (Nr & 3) == 0 )
272*3f1979aaSAndroid Build Coastguard Worker     {
273*3f1979aaSAndroid Build Coastguard Worker       for ( i = 0; i <= lenNr; ++i )
274*3f1979aaSAndroid Build Coastguard Worker       {
275*3f1979aaSAndroid Build Coastguard Worker         sum[0] = sum[1] = sum[2] = sum[3] = 0.0F;
276*3f1979aaSAndroid Build Coastguard Worker         for (j = 0; j < Nr; j += 4 )
277*3f1979aaSAndroid Build Coastguard Worker         {
278*3f1979aaSAndroid Build Coastguard Worker           sum[0] += X[i+j]   * Hrev[j];
279*3f1979aaSAndroid Build Coastguard Worker           sum[1] += X[i+j+1] * Hrev[j+1];
280*3f1979aaSAndroid Build Coastguard Worker           sum[2] += X[i+j+2] * Hrev[j+2];
281*3f1979aaSAndroid Build Coastguard Worker           sum[3] += X[i+j+3] * Hrev[j+3];
282*3f1979aaSAndroid Build Coastguard Worker         }
283*3f1979aaSAndroid Build Coastguard Worker         Y[i] = sum[0] + sum[1] + sum[2] + sum[3];
284*3f1979aaSAndroid Build Coastguard Worker       }
285*3f1979aaSAndroid Build Coastguard Worker       return i;
286*3f1979aaSAndroid Build Coastguard Worker     }
287*3f1979aaSAndroid Build Coastguard Worker     else
288*3f1979aaSAndroid Build Coastguard Worker     {
289*3f1979aaSAndroid Build Coastguard Worker       const int M = Nr & (~3);
290*3f1979aaSAndroid Build Coastguard Worker       /* printf("A: Nr = %d, M = %d, H[M] = %f, H[M+1] = %f, H[M+2] = %f, H[M+3] = %f\n", Nr, M, Hrev[M], Hrev[M+1], Hrev[M+2], Hrev[M+3] ); */
291*3f1979aaSAndroid Build Coastguard Worker       for ( i = 0; i <= lenNr; ++i )
292*3f1979aaSAndroid Build Coastguard Worker       {
293*3f1979aaSAndroid Build Coastguard Worker         float tailSum = 0.0;
294*3f1979aaSAndroid Build Coastguard Worker         sum[0] = sum[1] = sum[2] = sum[3] = 0.0F;
295*3f1979aaSAndroid Build Coastguard Worker         for (j = 0; j < M; j += 4 )
296*3f1979aaSAndroid Build Coastguard Worker         {
297*3f1979aaSAndroid Build Coastguard Worker           sum[0] += X[i+j]   * Hrev[j];
298*3f1979aaSAndroid Build Coastguard Worker           sum[1] += X[i+j+1] * Hrev[j+1];
299*3f1979aaSAndroid Build Coastguard Worker           sum[2] += X[i+j+2] * Hrev[j+2];
300*3f1979aaSAndroid Build Coastguard Worker           sum[3] += X[i+j+3] * Hrev[j+3];
301*3f1979aaSAndroid Build Coastguard Worker         }
302*3f1979aaSAndroid Build Coastguard Worker         for ( ; j < Nr; ++j )
303*3f1979aaSAndroid Build Coastguard Worker           tailSum += X[i+j] * Hrev[j];
304*3f1979aaSAndroid Build Coastguard Worker         Y[i] = (sum[0] + sum[1]) + (sum[2] + sum[3]) + tailSum;
305*3f1979aaSAndroid Build Coastguard Worker       }
306*3f1979aaSAndroid Build Coastguard Worker       return i;
307*3f1979aaSAndroid Build Coastguard Worker     }
308*3f1979aaSAndroid Build Coastguard Worker   }
309*3f1979aaSAndroid Build Coastguard Worker }
310*3f1979aaSAndroid Build Coastguard Worker 
311*3f1979aaSAndroid Build Coastguard Worker 
312*3f1979aaSAndroid Build Coastguard Worker int slow_conv_B(void * setup, const float * input, int len, float *output, const float *Yref, int applyFlush)
313*3f1979aaSAndroid Build Coastguard Worker {
314*3f1979aaSAndroid Build Coastguard Worker   float sum[4];
315*3f1979aaSAndroid Build Coastguard Worker   struct ConvSetup * p = (struct ConvSetup*)setup;
316*3f1979aaSAndroid Build Coastguard Worker   (void)Yref;
317*3f1979aaSAndroid Build Coastguard Worker   (void)applyFlush;
318*3f1979aaSAndroid Build Coastguard Worker   if (p->flags & PFFASTCONV_SYMMETRIC)
319*3f1979aaSAndroid Build Coastguard Worker   {
320*3f1979aaSAndroid Build Coastguard Worker     const float * RESTRICT X = input;
321*3f1979aaSAndroid Build Coastguard Worker     const float * RESTRICT Hrev = p->H;
322*3f1979aaSAndroid Build Coastguard Worker     float * RESTRICT Y = output;
323*3f1979aaSAndroid Build Coastguard Worker     const int Nr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * p->N;
324*3f1979aaSAndroid Build Coastguard Worker     const int lenNr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * (len - p->N);
325*3f1979aaSAndroid Build Coastguard Worker     const int h = Nr / 2 -4;
326*3f1979aaSAndroid Build Coastguard Worker     const int E = Nr -4;
327*3f1979aaSAndroid Build Coastguard Worker     int i, j;
328*3f1979aaSAndroid Build Coastguard Worker 
329*3f1979aaSAndroid Build Coastguard Worker     if (p->flags & PFFASTCONV_CPLX_INP_OUT)
330*3f1979aaSAndroid Build Coastguard Worker     {
331*3f1979aaSAndroid Build Coastguard Worker       for ( i = 0; i <= lenNr; i += 2 )
332*3f1979aaSAndroid Build Coastguard Worker       {
333*3f1979aaSAndroid Build Coastguard Worker         const int k = i + E;
334*3f1979aaSAndroid Build Coastguard Worker         sum[0] = sum[1] = sum[2] = sum[3] = 0.0F;
335*3f1979aaSAndroid Build Coastguard Worker         for (j = 0; j <= h; j += 4 )
336*3f1979aaSAndroid Build Coastguard Worker         {
337*3f1979aaSAndroid Build Coastguard Worker           sum[0] += Hrev[j  ] * ( X[i+j  ] + X[k-j+2] );
338*3f1979aaSAndroid Build Coastguard Worker           sum[1] += Hrev[j+1] * ( X[i+j+1] + X[k-j+3] );
339*3f1979aaSAndroid Build Coastguard Worker           sum[2] += Hrev[j+2] * ( X[i+j+2] + X[k-j  ] );
340*3f1979aaSAndroid Build Coastguard Worker           sum[3] += Hrev[j+3] * ( X[i+j+3] + X[k-j+1] );
341*3f1979aaSAndroid Build Coastguard Worker         }
342*3f1979aaSAndroid Build Coastguard Worker         Y[i  ] = sum[0] + sum[2];
343*3f1979aaSAndroid Build Coastguard Worker         Y[i+1] = sum[1] + sum[3];
344*3f1979aaSAndroid Build Coastguard Worker       }
345*3f1979aaSAndroid Build Coastguard Worker       return i/2;
346*3f1979aaSAndroid Build Coastguard Worker     }
347*3f1979aaSAndroid Build Coastguard Worker     else
348*3f1979aaSAndroid Build Coastguard Worker     {
349*3f1979aaSAndroid Build Coastguard Worker       for ( i = 0; i <= lenNr; ++i )
350*3f1979aaSAndroid Build Coastguard Worker       {
351*3f1979aaSAndroid Build Coastguard Worker         const int k = i + E;
352*3f1979aaSAndroid Build Coastguard Worker         sum[0] = sum[1] = sum[2] = sum[3] = 0.0F;
353*3f1979aaSAndroid Build Coastguard Worker         for (j = 0; j <= h; j += 4 )
354*3f1979aaSAndroid Build Coastguard Worker         {
355*3f1979aaSAndroid Build Coastguard Worker           sum[0] += Hrev[j  ] * ( X[i+j  ] + X[k-j+3] );
356*3f1979aaSAndroid Build Coastguard Worker           sum[1] += Hrev[j+1] * ( X[i+j+1] + X[k-j+2] );
357*3f1979aaSAndroid Build Coastguard Worker           sum[2] += Hrev[j+2] * ( X[i+j+2] + X[k-j+1] );
358*3f1979aaSAndroid Build Coastguard Worker           sum[3] += Hrev[j+3] * ( X[i+j+3] + X[k-j  ] );
359*3f1979aaSAndroid Build Coastguard Worker         }
360*3f1979aaSAndroid Build Coastguard Worker         Y[i] = sum[0] + sum[1] + sum[2] + sum[3];
361*3f1979aaSAndroid Build Coastguard Worker       }
362*3f1979aaSAndroid Build Coastguard Worker       return i;
363*3f1979aaSAndroid Build Coastguard Worker     }
364*3f1979aaSAndroid Build Coastguard Worker   }
365*3f1979aaSAndroid Build Coastguard Worker   else
366*3f1979aaSAndroid Build Coastguard Worker   {
367*3f1979aaSAndroid Build Coastguard Worker     const float * RESTRICT X = input;
368*3f1979aaSAndroid Build Coastguard Worker     const float * RESTRICT Hrev = p->H;
369*3f1979aaSAndroid Build Coastguard Worker     float * RESTRICT Y = output;
370*3f1979aaSAndroid Build Coastguard Worker     const int Nr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * p->N;
371*3f1979aaSAndroid Build Coastguard Worker     const int lenNr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * (len - p->N);
372*3f1979aaSAndroid Build Coastguard Worker     int i, j;
373*3f1979aaSAndroid Build Coastguard Worker 
374*3f1979aaSAndroid Build Coastguard Worker     if (p->flags & PFFASTCONV_CPLX_INP_OUT)
375*3f1979aaSAndroid Build Coastguard Worker     {
376*3f1979aaSAndroid Build Coastguard Worker       for ( i = 0; i <= lenNr; i += 2 )
377*3f1979aaSAndroid Build Coastguard Worker       {
378*3f1979aaSAndroid Build Coastguard Worker         sum[0] = sum[1] = sum[2] = sum[3] = 0.0F;
379*3f1979aaSAndroid Build Coastguard Worker         for (j = 0; j < Nr; j += 4 )
380*3f1979aaSAndroid Build Coastguard Worker         {
381*3f1979aaSAndroid Build Coastguard Worker           sum[0] += X[i+j]   * Hrev[j];
382*3f1979aaSAndroid Build Coastguard Worker           sum[1] += X[i+j+1] * Hrev[j+1];
383*3f1979aaSAndroid Build Coastguard Worker           sum[2] += X[i+j+2] * Hrev[j+2];
384*3f1979aaSAndroid Build Coastguard Worker           sum[3] += X[i+j+3] * Hrev[j+3];
385*3f1979aaSAndroid Build Coastguard Worker         }
386*3f1979aaSAndroid Build Coastguard Worker         Y[i  ] = sum[0] + sum[2];
387*3f1979aaSAndroid Build Coastguard Worker         Y[i+1] = sum[1] + sum[3];
388*3f1979aaSAndroid Build Coastguard Worker       }
389*3f1979aaSAndroid Build Coastguard Worker       return i/2;
390*3f1979aaSAndroid Build Coastguard Worker     }
391*3f1979aaSAndroid Build Coastguard Worker     else
392*3f1979aaSAndroid Build Coastguard Worker     {
393*3f1979aaSAndroid Build Coastguard Worker       if ( (Nr & 3) == 0 )
394*3f1979aaSAndroid Build Coastguard Worker       {
395*3f1979aaSAndroid Build Coastguard Worker         for ( i = 0; i <= lenNr; ++i )
396*3f1979aaSAndroid Build Coastguard Worker         {
397*3f1979aaSAndroid Build Coastguard Worker           sum[0] = sum[1] = sum[2] = sum[3] = 0.0F;
398*3f1979aaSAndroid Build Coastguard Worker           for (j = 0; j < Nr; j += 4 )
399*3f1979aaSAndroid Build Coastguard Worker           {
400*3f1979aaSAndroid Build Coastguard Worker             sum[0] += X[i+j]   * Hrev[j];
401*3f1979aaSAndroid Build Coastguard Worker             sum[1] += X[i+j+1] * Hrev[j+1];
402*3f1979aaSAndroid Build Coastguard Worker             sum[2] += X[i+j+2] * Hrev[j+2];
403*3f1979aaSAndroid Build Coastguard Worker             sum[3] += X[i+j+3] * Hrev[j+3];
404*3f1979aaSAndroid Build Coastguard Worker           }
405*3f1979aaSAndroid Build Coastguard Worker           Y[i] = (sum[0] + sum[1]) + (sum[2] + sum[3]);
406*3f1979aaSAndroid Build Coastguard Worker         }
407*3f1979aaSAndroid Build Coastguard Worker         return i;
408*3f1979aaSAndroid Build Coastguard Worker       }
409*3f1979aaSAndroid Build Coastguard Worker       else
410*3f1979aaSAndroid Build Coastguard Worker       {
411*3f1979aaSAndroid Build Coastguard Worker         const int M = Nr & (~3);
412*3f1979aaSAndroid Build Coastguard Worker         /* printf("B: Nr = %d\n", Nr ); */
413*3f1979aaSAndroid Build Coastguard Worker         for ( i = 0; i <= lenNr; ++i )
414*3f1979aaSAndroid Build Coastguard Worker         {
415*3f1979aaSAndroid Build Coastguard Worker           float tailSum = 0.0;
416*3f1979aaSAndroid Build Coastguard Worker           sum[0] = sum[1] = sum[2] = sum[3] = 0.0F;
417*3f1979aaSAndroid Build Coastguard Worker           for (j = 0; j < M; j += 4 )
418*3f1979aaSAndroid Build Coastguard Worker           {
419*3f1979aaSAndroid Build Coastguard Worker             sum[0] += X[i+j]   * Hrev[j];
420*3f1979aaSAndroid Build Coastguard Worker             sum[1] += X[i+j+1] * Hrev[j+1];
421*3f1979aaSAndroid Build Coastguard Worker             sum[2] += X[i+j+2] * Hrev[j+2];
422*3f1979aaSAndroid Build Coastguard Worker             sum[3] += X[i+j+3] * Hrev[j+3];
423*3f1979aaSAndroid Build Coastguard Worker           }
424*3f1979aaSAndroid Build Coastguard Worker           for ( ; j < Nr; ++j )
425*3f1979aaSAndroid Build Coastguard Worker             tailSum += X[i+j] * Hrev[j];
426*3f1979aaSAndroid Build Coastguard Worker           Y[i] = (sum[0] + sum[1]) + (sum[2] + sum[3]) + tailSum;
427*3f1979aaSAndroid Build Coastguard Worker         }
428*3f1979aaSAndroid Build Coastguard Worker         return i;
429*3f1979aaSAndroid Build Coastguard Worker       }
430*3f1979aaSAndroid Build Coastguard Worker     }
431*3f1979aaSAndroid Build Coastguard Worker   }
432*3f1979aaSAndroid Build Coastguard Worker 
433*3f1979aaSAndroid Build Coastguard Worker }
434*3f1979aaSAndroid Build Coastguard Worker 
435*3f1979aaSAndroid Build Coastguard Worker 
436*3f1979aaSAndroid Build Coastguard Worker int fast_conv(void * setup, const float * X, int len, float *Y, const float *Yref, int applyFlush)
437*3f1979aaSAndroid Build Coastguard Worker {
438*3f1979aaSAndroid Build Coastguard Worker   (void)Yref;
439*3f1979aaSAndroid Build Coastguard Worker   return pffastconv_apply( (PFFASTCONV_Setup*)setup, X, len, Y, applyFlush );
440*3f1979aaSAndroid Build Coastguard Worker }
441*3f1979aaSAndroid Build Coastguard Worker 
442*3f1979aaSAndroid Build Coastguard Worker 
443*3f1979aaSAndroid Build Coastguard Worker 
444*3f1979aaSAndroid Build Coastguard Worker void printFirst( const float * V, const char * st, const int N, const int perLine )
445*3f1979aaSAndroid Build Coastguard Worker {
446*3f1979aaSAndroid Build Coastguard Worker   (void)V;  (void)st;  (void)N;  (void)perLine;
447*3f1979aaSAndroid Build Coastguard Worker   return;
448*3f1979aaSAndroid Build Coastguard Worker #if 0
449*3f1979aaSAndroid Build Coastguard Worker   int i;
450*3f1979aaSAndroid Build Coastguard Worker   for ( i = 0; i < N; ++i )
451*3f1979aaSAndroid Build Coastguard Worker   {
452*3f1979aaSAndroid Build Coastguard Worker     if ( (i % perLine) == 0 )
453*3f1979aaSAndroid Build Coastguard Worker       printf("\n%s[%d]", st, i);
454*3f1979aaSAndroid Build Coastguard Worker     printf("\t%.1f", V[i]);
455*3f1979aaSAndroid Build Coastguard Worker   }
456*3f1979aaSAndroid Build Coastguard Worker   printf("\n");
457*3f1979aaSAndroid Build Coastguard Worker #endif
458*3f1979aaSAndroid Build Coastguard Worker }
459*3f1979aaSAndroid Build Coastguard Worker 
460*3f1979aaSAndroid Build Coastguard Worker 
461*3f1979aaSAndroid Build Coastguard Worker 
462*3f1979aaSAndroid Build Coastguard Worker #define NUMY       11
463*3f1979aaSAndroid Build Coastguard Worker 
464*3f1979aaSAndroid Build Coastguard Worker 
465*3f1979aaSAndroid Build Coastguard Worker int test(int FILTERLEN, int convFlags, const int testOutLen, int printDbg, int printSpeed) {
466*3f1979aaSAndroid Build Coastguard Worker   double t0, t1, tstop, td, tdref;
467*3f1979aaSAndroid Build Coastguard Worker   float *X, *H;
468*3f1979aaSAndroid Build Coastguard Worker   float *Y[NUMY];
469*3f1979aaSAndroid Build Coastguard Worker   int64_t outN[NUMY];
470*3f1979aaSAndroid Build Coastguard Worker   /* 256 KFloats or 16 MFloats data */
471*3f1979aaSAndroid Build Coastguard Worker #if 1
472*3f1979aaSAndroid Build Coastguard Worker   const int len = testOutLen ? (1 << 18) : (1 << 24);
473*3f1979aaSAndroid Build Coastguard Worker #elif 0
474*3f1979aaSAndroid Build Coastguard Worker   const int len = testOutLen ? (1 << 18) : (1 << 13);
475*3f1979aaSAndroid Build Coastguard Worker #else
476*3f1979aaSAndroid Build Coastguard Worker   const int len = testOutLen ? (1 << 18) : (1024);
477*3f1979aaSAndroid Build Coastguard Worker #endif
478*3f1979aaSAndroid Build Coastguard Worker   const int cplxFactor = ( convFlags & PFFASTCONV_CPLX_INP_OUT ) ? 2 : 1;
479*3f1979aaSAndroid Build Coastguard Worker   const int lenC = len / cplxFactor;
480*3f1979aaSAndroid Build Coastguard Worker 
481*3f1979aaSAndroid Build Coastguard Worker   int yi, yc, posMaxErr;
482*3f1979aaSAndroid Build Coastguard Worker   float yRangeMin, yRangeMax, yErrLimit, maxErr = 0.0;
483*3f1979aaSAndroid Build Coastguard Worker   int i, j, numErrOverLimit, iter;
484*3f1979aaSAndroid Build Coastguard Worker   int retErr = 0;
485*3f1979aaSAndroid Build Coastguard Worker 
486*3f1979aaSAndroid Build Coastguard Worker   /*                                  0               1               2               3                   4                   5                   6                   7                   8                      9  */
487*3f1979aaSAndroid Build Coastguard Worker   pfnConvSetup   aSetup[NUMY]     = { convSetupRev,   convSetupRev,   convSetupRev,   fastConvSetup,      fastConvSetup,      fastConvSetup,      fastConvSetup,      fastConvSetup,      fastConvSetup,         fastConvSetup   };
488*3f1979aaSAndroid Build Coastguard Worker   pfnConvDestroy aDestroy[NUMY]   = { convDestroyRev, convDestroyRev, convDestroyRev, fastConvDestroy,    fastConvDestroy,    fastConvDestroy,    fastConvDestroy,    fastConvDestroy,    fastConvDestroy,       fastConvDestroy };
489*3f1979aaSAndroid Build Coastguard Worker   pfnGetConvFnPtr aGetFnPtr[NUMY] = { NULL,           NULL,           NULL,           NULL,               NULL,               NULL,               NULL,               NULL,               NULL,                  NULL,           };
490*3f1979aaSAndroid Build Coastguard Worker   pfnConvolution aConv[NUMY]      = { slow_conv_R,    slow_conv_A,    slow_conv_B,    fast_conv,          fast_conv,          fast_conv,          fast_conv,          fast_conv,          fast_conv,             fast_conv       };
491*3f1979aaSAndroid Build Coastguard Worker   const char * convText[NUMY]     = { "R(non-simd)",  "A(non-simd)",  "B(non-simd)",  "fast_conv_64",     "fast_conv_128",    "fast_conv_256",    "fast_conv_512",    "fast_conv_1K",     "fast_conv_2K",        "fast_conv_4K"  };
492*3f1979aaSAndroid Build Coastguard Worker   int    aFastAlgo[NUMY]          = { 0,              0,              0,              1,                  1,                  1,                  1,                  1,                  1,                     1               };
493*3f1979aaSAndroid Build Coastguard Worker   void * aSetupCfg[NUMY]          = { NULL,           NULL,           NULL,           NULL,               NULL,               NULL,               NULL,               NULL,               NULL,                  NULL            };
494*3f1979aaSAndroid Build Coastguard Worker   int    aBlkLen[NUMY]            = { 1024,           1024,           1024,           64,                 128,                256,                512,                1024,               2048,                  4096            };
495*3f1979aaSAndroid Build Coastguard Worker #if 1
496*3f1979aaSAndroid Build Coastguard Worker   int    aRunAlgo[NUMY]           = { 1,              1,              1,              FILTERLEN<64,       FILTERLEN<128,      FILTERLEN<256,      FILTERLEN<512,      FILTERLEN<1024,     FILTERLEN<2048,        FILTERLEN<4096  };
497*3f1979aaSAndroid Build Coastguard Worker #elif 0
498*3f1979aaSAndroid Build Coastguard Worker   int    aRunAlgo[NUMY]           = { 1,              0,              0,              0 && FILTERLEN<64,  1 && FILTERLEN<128, 1 && FILTERLEN<256, 0 && FILTERLEN<512, 0 && FILTERLEN<1024, 0 && FILTERLEN<2048,  0 && FILTERLEN<4096  };
499*3f1979aaSAndroid Build Coastguard Worker #else
500*3f1979aaSAndroid Build Coastguard Worker   int    aRunAlgo[NUMY]           = { 1,              1,              1,              0 && FILTERLEN<64,  0 && FILTERLEN<128, 1 && FILTERLEN<256, 0 && FILTERLEN<512, 0 && FILTERLEN<1024, 0 && FILTERLEN<2048,  0 && FILTERLEN<4096  };
501*3f1979aaSAndroid Build Coastguard Worker #endif
502*3f1979aaSAndroid Build Coastguard Worker   double aSpeedFactor[NUMY], aDuration[NUMY], procSmpPerSec[NUMY];
503*3f1979aaSAndroid Build Coastguard Worker 
504*3f1979aaSAndroid Build Coastguard Worker   X = pffastconv_malloc( (unsigned)(len+4) * sizeof(float) );
505*3f1979aaSAndroid Build Coastguard Worker   for ( i=0; i < NUMY; ++i)
506*3f1979aaSAndroid Build Coastguard Worker   {
507*3f1979aaSAndroid Build Coastguard Worker     if ( 1 || i < 2 )
508*3f1979aaSAndroid Build Coastguard Worker       Y[i] = pffastconv_malloc( (unsigned)len * sizeof(float) );
509*3f1979aaSAndroid Build Coastguard Worker     else
510*3f1979aaSAndroid Build Coastguard Worker       Y[i] = Y[1];
511*3f1979aaSAndroid Build Coastguard Worker 
512*3f1979aaSAndroid Build Coastguard Worker     Y[i][0] = 123.F;  /* test for pffft_zconvolve_no_accu() */
513*3f1979aaSAndroid Build Coastguard Worker     aSpeedFactor[i] = -1.0;
514*3f1979aaSAndroid Build Coastguard Worker     aDuration[i] = -1.0;
515*3f1979aaSAndroid Build Coastguard Worker     procSmpPerSec[i] = -1.0;
516*3f1979aaSAndroid Build Coastguard Worker   }
517*3f1979aaSAndroid Build Coastguard Worker 
518*3f1979aaSAndroid Build Coastguard Worker   H = pffastconv_malloc((unsigned)FILTERLEN * sizeof(float));
519*3f1979aaSAndroid Build Coastguard Worker 
520*3f1979aaSAndroid Build Coastguard Worker   /* initialize input */
521*3f1979aaSAndroid Build Coastguard Worker   if ( convFlags & PFFASTCONV_CPLX_INP_OUT )
522*3f1979aaSAndroid Build Coastguard Worker   {
523*3f1979aaSAndroid Build Coastguard Worker     for ( i = 0; i < lenC; ++i )
524*3f1979aaSAndroid Build Coastguard Worker     {
525*3f1979aaSAndroid Build Coastguard Worker       X[2*i  ] = (float)(i % 4093);  /* 4094 is a prime number. see https://en.wikipedia.org/wiki/List_of_prime_numbers */
526*3f1979aaSAndroid Build Coastguard Worker       X[2*i+1] = (float)((i+2048) % 4093);
527*3f1979aaSAndroid Build Coastguard Worker     }
528*3f1979aaSAndroid Build Coastguard Worker   }
529*3f1979aaSAndroid Build Coastguard Worker   else
530*3f1979aaSAndroid Build Coastguard Worker   {
531*3f1979aaSAndroid Build Coastguard Worker     for ( i = 0; i < len; ++i )
532*3f1979aaSAndroid Build Coastguard Worker       X[i] = (float)(i % 4093);  /* 4094 is a prime number. see https://en.wikipedia.org/wiki/List_of_prime_numbers */
533*3f1979aaSAndroid Build Coastguard Worker   }
534*3f1979aaSAndroid Build Coastguard Worker   X[ len    ] = INVALID_FLOAT_VAL;
535*3f1979aaSAndroid Build Coastguard Worker   X[ len +1 ] = INVALID_FLOAT_VAL;
536*3f1979aaSAndroid Build Coastguard Worker   X[ len +2 ] = INVALID_FLOAT_VAL;
537*3f1979aaSAndroid Build Coastguard Worker   X[ len +3 ] = INVALID_FLOAT_VAL;
538*3f1979aaSAndroid Build Coastguard Worker 
539*3f1979aaSAndroid Build Coastguard Worker   if (!testOutLen)
540*3f1979aaSAndroid Build Coastguard Worker     printFirst( X, "X", 64, 8 );
541*3f1979aaSAndroid Build Coastguard Worker 
542*3f1979aaSAndroid Build Coastguard Worker   /* filter coeffs */
543*3f1979aaSAndroid Build Coastguard Worker   memset( H, 0, FILTERLEN * sizeof(float) );
544*3f1979aaSAndroid Build Coastguard Worker #if 1
545*3f1979aaSAndroid Build Coastguard Worker   if ( convFlags & PFFASTCONV_SYMMETRIC )
546*3f1979aaSAndroid Build Coastguard Worker   {
547*3f1979aaSAndroid Build Coastguard Worker     const int half = FILTERLEN / 2;
548*3f1979aaSAndroid Build Coastguard Worker     for ( j = 0; j < half; ++j ) {
549*3f1979aaSAndroid Build Coastguard Worker       switch (j % 3) {
550*3f1979aaSAndroid Build Coastguard Worker         case 0: H[j] = H[FILTERLEN-1-j] = -1.0F;  break;
551*3f1979aaSAndroid Build Coastguard Worker         case 1: H[j] = H[FILTERLEN-1-j] =  1.0F;  break;
552*3f1979aaSAndroid Build Coastguard Worker         case 2: H[j] = H[FILTERLEN-1-j] =  0.5F;  break;
553*3f1979aaSAndroid Build Coastguard Worker       }
554*3f1979aaSAndroid Build Coastguard Worker     }
555*3f1979aaSAndroid Build Coastguard Worker   }
556*3f1979aaSAndroid Build Coastguard Worker   else
557*3f1979aaSAndroid Build Coastguard Worker   {
558*3f1979aaSAndroid Build Coastguard Worker     for ( j = 0; j < FILTERLEN; ++j ) {
559*3f1979aaSAndroid Build Coastguard Worker       switch (j % 3) {
560*3f1979aaSAndroid Build Coastguard Worker         case 0: H[j] = -1.0F;  break;
561*3f1979aaSAndroid Build Coastguard Worker         case 1: H[j] = 1.0F;   break;
562*3f1979aaSAndroid Build Coastguard Worker         case 2: H[j] = 0.5F;   break;
563*3f1979aaSAndroid Build Coastguard Worker       }
564*3f1979aaSAndroid Build Coastguard Worker     }
565*3f1979aaSAndroid Build Coastguard Worker   }
566*3f1979aaSAndroid Build Coastguard Worker #else
567*3f1979aaSAndroid Build Coastguard Worker   H[0] = 1.0F;
568*3f1979aaSAndroid Build Coastguard Worker   H[FILTERLEN -1] = 1.0F;
569*3f1979aaSAndroid Build Coastguard Worker #endif
570*3f1979aaSAndroid Build Coastguard Worker   if (!testOutLen)
571*3f1979aaSAndroid Build Coastguard Worker     printFirst( H, "H", FILTERLEN, 8 );
572*3f1979aaSAndroid Build Coastguard Worker 
573*3f1979aaSAndroid Build Coastguard Worker   printf("\n");
574*3f1979aaSAndroid Build Coastguard Worker   printf("filterLen = %d\t%s%s\t%s:\n", FILTERLEN,
575*3f1979aaSAndroid Build Coastguard Worker     ((convFlags & PFFASTCONV_CPLX_INP_OUT)?"cplx":"real"),
576*3f1979aaSAndroid Build Coastguard Worker     (convFlags & PFFASTCONV_CPLX_INP_OUT)?((convFlags & PFFASTCONV_CPLX_SINGLE_FFT)?" single":" 2x") : "",
577*3f1979aaSAndroid Build Coastguard Worker     ((convFlags & PFFASTCONV_SYMMETRIC)?"symmetric":"non-sym") );
578*3f1979aaSAndroid Build Coastguard Worker 
579*3f1979aaSAndroid Build Coastguard Worker   while (1)
580*3f1979aaSAndroid Build Coastguard Worker   {
581*3f1979aaSAndroid Build Coastguard Worker 
582*3f1979aaSAndroid Build Coastguard Worker     for ( yi = 0; yi < NUMY; ++yi )
583*3f1979aaSAndroid Build Coastguard Worker     {
584*3f1979aaSAndroid Build Coastguard Worker       if (!aRunAlgo[yi])
585*3f1979aaSAndroid Build Coastguard Worker         continue;
586*3f1979aaSAndroid Build Coastguard Worker 
587*3f1979aaSAndroid Build Coastguard Worker       aSetupCfg[yi] = aSetup[yi]( H, FILTERLEN, &aBlkLen[yi], convFlags );
588*3f1979aaSAndroid Build Coastguard Worker 
589*3f1979aaSAndroid Build Coastguard Worker       /* get effective apply function ptr */
590*3f1979aaSAndroid Build Coastguard Worker       if ( aSetupCfg[yi] && aGetFnPtr[yi] )
591*3f1979aaSAndroid Build Coastguard Worker         aConv[yi] = aGetFnPtr[yi]( aSetupCfg[yi] );
592*3f1979aaSAndroid Build Coastguard Worker 
593*3f1979aaSAndroid Build Coastguard Worker       if ( aSetupCfg[yi] && aConv[yi] ) {
594*3f1979aaSAndroid Build Coastguard Worker         if (testOutLen)
595*3f1979aaSAndroid Build Coastguard Worker         {
596*3f1979aaSAndroid Build Coastguard Worker           t0 = uclock_sec();
597*3f1979aaSAndroid Build Coastguard Worker           outN[yi] = aConv[yi]( aSetupCfg[yi], X, lenC, Y[yi], Y[0], 1 /* applyFlush */ );
598*3f1979aaSAndroid Build Coastguard Worker           t1 = uclock_sec();
599*3f1979aaSAndroid Build Coastguard Worker           td = t1 - t0;
600*3f1979aaSAndroid Build Coastguard Worker         }
601*3f1979aaSAndroid Build Coastguard Worker         else
602*3f1979aaSAndroid Build Coastguard Worker         {
603*3f1979aaSAndroid Build Coastguard Worker           const int blkLen = 4096;  /* required for 'fast_conv_4K' */
604*3f1979aaSAndroid Build Coastguard Worker           int64_t offC = 0, offS, Nout;
605*3f1979aaSAndroid Build Coastguard Worker           int k;
606*3f1979aaSAndroid Build Coastguard Worker           iter = 0;
607*3f1979aaSAndroid Build Coastguard Worker           outN[yi] = 0;
608*3f1979aaSAndroid Build Coastguard Worker           t0 = uclock_sec();
609*3f1979aaSAndroid Build Coastguard Worker           tstop = t0 + 0.25;  /* benchmark duration: 250 ms */
610*3f1979aaSAndroid Build Coastguard Worker           do {
611*3f1979aaSAndroid Build Coastguard Worker             for ( k = 0; k < 128 && offC +blkLen < lenC; ++k )
612*3f1979aaSAndroid Build Coastguard Worker             {
613*3f1979aaSAndroid Build Coastguard Worker               offS = cplxFactor * offC;
614*3f1979aaSAndroid Build Coastguard Worker               Nout = aConv[yi]( aSetupCfg[yi], X +offS, blkLen, Y[yi] +offS, Y[0], (offC +blkLen >= lenC) /* applyFlush */ );
615*3f1979aaSAndroid Build Coastguard Worker               offC += Nout;
616*3f1979aaSAndroid Build Coastguard Worker               ++iter;
617*3f1979aaSAndroid Build Coastguard Worker               if ( !Nout )
618*3f1979aaSAndroid Build Coastguard Worker                 break;
619*3f1979aaSAndroid Build Coastguard Worker               if ( offC +blkLen >= lenC )
620*3f1979aaSAndroid Build Coastguard Worker               {
621*3f1979aaSAndroid Build Coastguard Worker                 outN[yi] += offC;
622*3f1979aaSAndroid Build Coastguard Worker                 offC = 0;
623*3f1979aaSAndroid Build Coastguard Worker               }
624*3f1979aaSAndroid Build Coastguard Worker             }
625*3f1979aaSAndroid Build Coastguard Worker             t1 = uclock_sec();
626*3f1979aaSAndroid Build Coastguard Worker           } while ( t1 < tstop );
627*3f1979aaSAndroid Build Coastguard Worker           outN[yi] += offC;
628*3f1979aaSAndroid Build Coastguard Worker           td = t1 - t0;
629*3f1979aaSAndroid Build Coastguard Worker           procSmpPerSec[yi] = cplxFactor * (double)outN[yi] / td;
630*3f1979aaSAndroid Build Coastguard Worker         }
631*3f1979aaSAndroid Build Coastguard Worker       }
632*3f1979aaSAndroid Build Coastguard Worker       else
633*3f1979aaSAndroid Build Coastguard Worker       {
634*3f1979aaSAndroid Build Coastguard Worker         t0 = t1 = td = 0.0;
635*3f1979aaSAndroid Build Coastguard Worker         outN[yi] = 0;
636*3f1979aaSAndroid Build Coastguard Worker       }
637*3f1979aaSAndroid Build Coastguard Worker       aDuration[yi] = td;
638*3f1979aaSAndroid Build Coastguard Worker       if ( yi == 0 ) {
639*3f1979aaSAndroid Build Coastguard Worker         const float * Yvals = Y[0];
640*3f1979aaSAndroid Build Coastguard Worker         const int64_t refOutLen = cplxFactor * outN[0];
641*3f1979aaSAndroid Build Coastguard Worker         tdref = td;
642*3f1979aaSAndroid Build Coastguard Worker         if (printDbg) {
643*3f1979aaSAndroid Build Coastguard Worker           printf("convolution '%s' took: %f ms\n", convText[yi], td*1000.0);
644*3f1979aaSAndroid Build Coastguard Worker           printf("  convolution '%s' output size %" PRId64 " == (cplx) len %d + %" PRId64 "\n", convText[yi], outN[yi], len / cplxFactor, outN[yi] - len / cplxFactor);
645*3f1979aaSAndroid Build Coastguard Worker         }
646*3f1979aaSAndroid Build Coastguard Worker         aSpeedFactor[yi] = 1.0;
647*3f1979aaSAndroid Build Coastguard Worker         /*  */
648*3f1979aaSAndroid Build Coastguard Worker         yRangeMin = FLT_MAX;
649*3f1979aaSAndroid Build Coastguard Worker         yRangeMax = FLT_MIN;
650*3f1979aaSAndroid Build Coastguard Worker         for ( i = 0; i < refOutLen; ++i )
651*3f1979aaSAndroid Build Coastguard Worker         {
652*3f1979aaSAndroid Build Coastguard Worker           if ( yRangeMax < Yvals[i] )  yRangeMax = Yvals[i];
653*3f1979aaSAndroid Build Coastguard Worker           if ( yRangeMin > Yvals[i] )  yRangeMin = Yvals[i];
654*3f1979aaSAndroid Build Coastguard Worker         }
655*3f1979aaSAndroid Build Coastguard Worker         yErrLimit = fabsf(yRangeMax - yRangeMin) / ( 100.0F * 1000.0F );
656*3f1979aaSAndroid Build Coastguard Worker         /* yErrLimit = 0.01F; */
657*3f1979aaSAndroid Build Coastguard Worker         if (testOutLen) {
658*3f1979aaSAndroid Build Coastguard Worker           if (1) {
659*3f1979aaSAndroid Build Coastguard Worker             printf("reference output len = %" PRId64 " smp\n", outN[0]);
660*3f1979aaSAndroid Build Coastguard Worker             printf("reference output range |%.1f ..%.1f| = %.1f ==> err limit = %f\n", yRangeMin, yRangeMax, yRangeMax - yRangeMin, yErrLimit);
661*3f1979aaSAndroid Build Coastguard Worker           }
662*3f1979aaSAndroid Build Coastguard Worker           printFirst( Yvals, "Yref", 64, 8 );
663*3f1979aaSAndroid Build Coastguard Worker         }
664*3f1979aaSAndroid Build Coastguard Worker       }
665*3f1979aaSAndroid Build Coastguard Worker       else
666*3f1979aaSAndroid Build Coastguard Worker       {
667*3f1979aaSAndroid Build Coastguard Worker         aSpeedFactor[yi] = tdref / td;
668*3f1979aaSAndroid Build Coastguard Worker         if (printDbg) {
669*3f1979aaSAndroid Build Coastguard Worker           printf("\nconvolution '%s' took: %f ms == %f %% == %f X\n", convText[yi], td*1000.0, td * 100 / tdref, tdref / td);
670*3f1979aaSAndroid Build Coastguard Worker           printf("  convolution '%s' output size %" PRId64 " == (cplx) len %d + %" PRId64 "\n", convText[yi], outN[yi], len / cplxFactor, outN[yi] - len / cplxFactor);
671*3f1979aaSAndroid Build Coastguard Worker         }
672*3f1979aaSAndroid Build Coastguard Worker       }
673*3f1979aaSAndroid Build Coastguard Worker     }
674*3f1979aaSAndroid Build Coastguard Worker 
675*3f1979aaSAndroid Build Coastguard Worker     int iMaxSpeedSlowAlgo = -1;
676*3f1979aaSAndroid Build Coastguard Worker     int iFirstFastAlgo = -1;
677*3f1979aaSAndroid Build Coastguard Worker     int iMaxSpeedFastAlgo = -1;
678*3f1979aaSAndroid Build Coastguard Worker     int iPrintedRefOutLen = 0;
679*3f1979aaSAndroid Build Coastguard Worker     {
680*3f1979aaSAndroid Build Coastguard Worker       for ( yc = 1; yc < NUMY; ++yc )
681*3f1979aaSAndroid Build Coastguard Worker       {
682*3f1979aaSAndroid Build Coastguard Worker         if (!aRunAlgo[yc])
683*3f1979aaSAndroid Build Coastguard Worker           continue;
684*3f1979aaSAndroid Build Coastguard Worker         if (aFastAlgo[yc]) {
685*3f1979aaSAndroid Build Coastguard Worker           if ( iMaxSpeedFastAlgo < 0 || aSpeedFactor[yc] > aSpeedFactor[iMaxSpeedFastAlgo] )
686*3f1979aaSAndroid Build Coastguard Worker             iMaxSpeedFastAlgo = yc;
687*3f1979aaSAndroid Build Coastguard Worker 
688*3f1979aaSAndroid Build Coastguard Worker           if (iFirstFastAlgo < 0)
689*3f1979aaSAndroid Build Coastguard Worker             iFirstFastAlgo = yc;
690*3f1979aaSAndroid Build Coastguard Worker         }
691*3f1979aaSAndroid Build Coastguard Worker         else
692*3f1979aaSAndroid Build Coastguard Worker         {
693*3f1979aaSAndroid Build Coastguard Worker           if ( iMaxSpeedSlowAlgo < 0 || aSpeedFactor[yc] > aSpeedFactor[iMaxSpeedSlowAlgo] )
694*3f1979aaSAndroid Build Coastguard Worker             iMaxSpeedSlowAlgo = yc;
695*3f1979aaSAndroid Build Coastguard Worker         }
696*3f1979aaSAndroid Build Coastguard Worker       }
697*3f1979aaSAndroid Build Coastguard Worker 
698*3f1979aaSAndroid Build Coastguard Worker       if (printSpeed)
699*3f1979aaSAndroid Build Coastguard Worker       {
700*3f1979aaSAndroid Build Coastguard Worker         if (testOutLen)
701*3f1979aaSAndroid Build Coastguard Worker         {
702*3f1979aaSAndroid Build Coastguard Worker           if (iMaxSpeedSlowAlgo >= 0 )
703*3f1979aaSAndroid Build Coastguard Worker             printf("fastest slow algorithm is '%s' at speed %f X ; abs duration %f ms\n", convText[iMaxSpeedSlowAlgo], aSpeedFactor[iMaxSpeedSlowAlgo], 1000.0 * aDuration[iMaxSpeedSlowAlgo]);
704*3f1979aaSAndroid Build Coastguard Worker           if (0 != iMaxSpeedSlowAlgo && aRunAlgo[0])
705*3f1979aaSAndroid Build Coastguard Worker             printf("slow algorithm '%s' at speed %f X ; abs duration %f ms\n", convText[0], aSpeedFactor[0], 1000.0 * aDuration[0]);
706*3f1979aaSAndroid Build Coastguard Worker           if (1 != iMaxSpeedSlowAlgo && aRunAlgo[1])
707*3f1979aaSAndroid Build Coastguard Worker             printf("slow algorithm '%s' at speed %f X ; abs duration %f ms\n", convText[1], aSpeedFactor[1], 1000.0 * aDuration[1]);
708*3f1979aaSAndroid Build Coastguard Worker 
709*3f1979aaSAndroid Build Coastguard Worker           if (iFirstFastAlgo >= 0 && iFirstFastAlgo != iMaxSpeedFastAlgo && aRunAlgo[iFirstFastAlgo])
710*3f1979aaSAndroid Build Coastguard Worker             printf("first   fast algorithm is '%s' at speed %f X ; abs duration %f ms\n", convText[iFirstFastAlgo],    aSpeedFactor[iFirstFastAlgo],    1000.0 * aDuration[iFirstFastAlgo]);
711*3f1979aaSAndroid Build Coastguard Worker           if (iFirstFastAlgo >= 0 && iFirstFastAlgo+1 != iMaxSpeedFastAlgo && iFirstFastAlgo+1 < NUMY && aRunAlgo[iFirstFastAlgo+1])
712*3f1979aaSAndroid Build Coastguard Worker             printf("2nd     fast algorithm is '%s' at speed %f X ; abs duration %f ms\n", convText[iFirstFastAlgo+1],  aSpeedFactor[iFirstFastAlgo+1],  1000.0 * aDuration[iFirstFastAlgo+1]);
713*3f1979aaSAndroid Build Coastguard Worker 
714*3f1979aaSAndroid Build Coastguard Worker           if ( 0 <= iMaxSpeedFastAlgo && iMaxSpeedFastAlgo < NUMY && aRunAlgo[iMaxSpeedFastAlgo] )
715*3f1979aaSAndroid Build Coastguard Worker           {
716*3f1979aaSAndroid Build Coastguard Worker             printf("fastest fast algorithm is '%s' at speed %f X ; abs duration %f ms\n", convText[iMaxSpeedFastAlgo], aSpeedFactor[iMaxSpeedFastAlgo], 1000.0 * aDuration[iMaxSpeedFastAlgo]);
717*3f1979aaSAndroid Build Coastguard Worker             if ( 0 <= iMaxSpeedSlowAlgo && iMaxSpeedSlowAlgo < NUMY && aRunAlgo[iMaxSpeedSlowAlgo] )
718*3f1979aaSAndroid Build Coastguard Worker               printf("fast / slow ratio: %f X\n", aSpeedFactor[iMaxSpeedFastAlgo] / aSpeedFactor[iMaxSpeedSlowAlgo] );
719*3f1979aaSAndroid Build Coastguard Worker           }
720*3f1979aaSAndroid Build Coastguard Worker           printf("\n");
721*3f1979aaSAndroid Build Coastguard Worker         }
722*3f1979aaSAndroid Build Coastguard Worker         else
723*3f1979aaSAndroid Build Coastguard Worker         {
724*3f1979aaSAndroid Build Coastguard Worker           for ( yc = 0; yc < NUMY; ++yc )
725*3f1979aaSAndroid Build Coastguard Worker           {
726*3f1979aaSAndroid Build Coastguard Worker             if (!aRunAlgo[yc] || procSmpPerSec[yc] <= 0.0)
727*3f1979aaSAndroid Build Coastguard Worker               continue;
728*3f1979aaSAndroid Build Coastguard Worker             printf("algo '%s':\t%.2f MSmp\tin\t%.1f ms\t= %g kSmpPerSec\n",
729*3f1979aaSAndroid Build Coastguard Worker               convText[yc], (double)outN[yc]/(1000.0 * 1000.0), 1000.0 * aDuration[yc], procSmpPerSec[yc] * 0.001 );
730*3f1979aaSAndroid Build Coastguard Worker           }
731*3f1979aaSAndroid Build Coastguard Worker         }
732*3f1979aaSAndroid Build Coastguard Worker 
733*3f1979aaSAndroid Build Coastguard Worker       }
734*3f1979aaSAndroid Build Coastguard Worker     }
735*3f1979aaSAndroid Build Coastguard Worker 
736*3f1979aaSAndroid Build Coastguard Worker 
737*3f1979aaSAndroid Build Coastguard Worker     for ( yc = 1; yc < NUMY; ++yc )
738*3f1979aaSAndroid Build Coastguard Worker     {
739*3f1979aaSAndroid Build Coastguard Worker       const float * Yref;
740*3f1979aaSAndroid Build Coastguard Worker       const float * Ycurr;
741*3f1979aaSAndroid Build Coastguard Worker       int outMin;
742*3f1979aaSAndroid Build Coastguard Worker 
743*3f1979aaSAndroid Build Coastguard Worker       if (!aRunAlgo[yc])
744*3f1979aaSAndroid Build Coastguard Worker         continue;
745*3f1979aaSAndroid Build Coastguard Worker 
746*3f1979aaSAndroid Build Coastguard Worker       if (printDbg)
747*3f1979aaSAndroid Build Coastguard Worker         printf("\n");
748*3f1979aaSAndroid Build Coastguard Worker 
749*3f1979aaSAndroid Build Coastguard Worker       if ( outN[yc] == 0 )
750*3f1979aaSAndroid Build Coastguard Worker       {
751*3f1979aaSAndroid Build Coastguard Worker         printf("output size 0: '%s' not implemented\n", convText[yc]);
752*3f1979aaSAndroid Build Coastguard Worker       }
753*3f1979aaSAndroid Build Coastguard Worker       else if ( outN[0] != outN[yc] /* && aFastAlgo[yc] */ && testOutLen )
754*3f1979aaSAndroid Build Coastguard Worker       {
755*3f1979aaSAndroid Build Coastguard Worker         if (!iPrintedRefOutLen)
756*3f1979aaSAndroid Build Coastguard Worker         {
757*3f1979aaSAndroid Build Coastguard Worker           printf("reference output size = %" PRId64 ", delta to (cplx) input length = %" PRId64 " smp\n", outN[0], (len / cplxFactor) - outN[0]);
758*3f1979aaSAndroid Build Coastguard Worker           iPrintedRefOutLen = 1;
759*3f1979aaSAndroid Build Coastguard Worker         }
760*3f1979aaSAndroid Build Coastguard Worker         printf("output size doesn't match!: ref (FILTERLEN %d) returned %" PRId64 " smp, '%s' returned %" PRId64 " smp : delta = %" PRId64 " smp\n",
761*3f1979aaSAndroid Build Coastguard Worker           FILTERLEN, outN[0], convText[yc], outN[yc], outN[yc] - outN[0] );
762*3f1979aaSAndroid Build Coastguard Worker         retErr = 1;
763*3f1979aaSAndroid Build Coastguard Worker       }
764*3f1979aaSAndroid Build Coastguard Worker 
765*3f1979aaSAndroid Build Coastguard Worker       posMaxErr = 0;
766*3f1979aaSAndroid Build Coastguard Worker       maxErr = -1.0;
767*3f1979aaSAndroid Build Coastguard Worker       Yref = Y[0];
768*3f1979aaSAndroid Build Coastguard Worker       Ycurr = Y[yc];
769*3f1979aaSAndroid Build Coastguard Worker       outMin = ( outN[yc] < outN[0] ) ? outN[yc] : outN[0];
770*3f1979aaSAndroid Build Coastguard Worker       numErrOverLimit = 0;
771*3f1979aaSAndroid Build Coastguard Worker       for ( i = 0; i < outMin; ++i )
772*3f1979aaSAndroid Build Coastguard Worker       {
773*3f1979aaSAndroid Build Coastguard Worker         if ( numErrOverLimit < 6 && fabs(Ycurr[i] - Yref[i]) >= yErrLimit )
774*3f1979aaSAndroid Build Coastguard Worker         {
775*3f1979aaSAndroid Build Coastguard Worker           printf("algo '%s': at %d: ***ERROR*** = %f, errLimit = %f, ref = %f, actual = %f\n",
776*3f1979aaSAndroid Build Coastguard Worker             convText[yc], i, fabs(Ycurr[i] - Yref[i]), yErrLimit, Yref[i], Ycurr[i] );
777*3f1979aaSAndroid Build Coastguard Worker           ++numErrOverLimit;
778*3f1979aaSAndroid Build Coastguard Worker         }
779*3f1979aaSAndroid Build Coastguard Worker 
780*3f1979aaSAndroid Build Coastguard Worker         if ( fabs(Ycurr[i] - Yref[i]) > maxErr )
781*3f1979aaSAndroid Build Coastguard Worker         {
782*3f1979aaSAndroid Build Coastguard Worker           maxErr = fabsf(Ycurr[i] - Yref[i]);
783*3f1979aaSAndroid Build Coastguard Worker           posMaxErr = i;
784*3f1979aaSAndroid Build Coastguard Worker         }
785*3f1979aaSAndroid Build Coastguard Worker       }
786*3f1979aaSAndroid Build Coastguard Worker 
787*3f1979aaSAndroid Build Coastguard Worker       if ( printDbg || (iMaxSpeedSlowAlgo == i) || (iMaxSpeedFastAlgo == i) )
788*3f1979aaSAndroid Build Coastguard Worker         printf("max difference for '%s' is %g at sample idx %d of max inp 4093-1 == %f %%\n", convText[yc], maxErr, posMaxErr, maxErr * 100.0 / 4092.0 );
789*3f1979aaSAndroid Build Coastguard Worker     }
790*3f1979aaSAndroid Build Coastguard Worker 
791*3f1979aaSAndroid Build Coastguard Worker     break;
792*3f1979aaSAndroid Build Coastguard Worker   }
793*3f1979aaSAndroid Build Coastguard Worker 
794*3f1979aaSAndroid Build Coastguard Worker   pffastconv_free(X);
795*3f1979aaSAndroid Build Coastguard Worker   for ( i=0; i < NUMY; ++i)
796*3f1979aaSAndroid Build Coastguard Worker   {
797*3f1979aaSAndroid Build Coastguard Worker     if ( 1 || i < 2 )
798*3f1979aaSAndroid Build Coastguard Worker       pffastconv_free( Y[i] );
799*3f1979aaSAndroid Build Coastguard Worker     if (!aRunAlgo[i])
800*3f1979aaSAndroid Build Coastguard Worker       continue;
801*3f1979aaSAndroid Build Coastguard Worker     aDestroy[i]( aSetupCfg[i] );
802*3f1979aaSAndroid Build Coastguard Worker   }
803*3f1979aaSAndroid Build Coastguard Worker 
804*3f1979aaSAndroid Build Coastguard Worker   pffastconv_free(H);
805*3f1979aaSAndroid Build Coastguard Worker 
806*3f1979aaSAndroid Build Coastguard Worker   return retErr;
807*3f1979aaSAndroid Build Coastguard Worker }
808*3f1979aaSAndroid Build Coastguard Worker 
809*3f1979aaSAndroid Build Coastguard Worker /* small functions inside pffft.c that will detect (compiler) bugs with respect to simd instructions */
810*3f1979aaSAndroid Build Coastguard Worker void validate_pffft_simd();
811*3f1979aaSAndroid Build Coastguard Worker int  validate_pffft_simd_ex(FILE * DbgOut);
812*3f1979aaSAndroid Build Coastguard Worker 
813*3f1979aaSAndroid Build Coastguard Worker 
814*3f1979aaSAndroid Build Coastguard Worker int main(int argc, char **argv)
815*3f1979aaSAndroid Build Coastguard Worker {
816*3f1979aaSAndroid Build Coastguard Worker   int result = 0;
817*3f1979aaSAndroid Build Coastguard Worker   int i, k, M, flagsA, flagsB, flagsC, testOutLen, printDbg, printSpeed;
818*3f1979aaSAndroid Build Coastguard Worker   int testOutLens = 1, benchConv = 1, quickTest = 0, slowTest = 0;
819*3f1979aaSAndroid Build Coastguard Worker   int testReal = 1, testCplx = 1, testSymetric = 0;
820*3f1979aaSAndroid Build Coastguard Worker 
821*3f1979aaSAndroid Build Coastguard Worker   for ( i = 1; i < argc; ++i ) {
822*3f1979aaSAndroid Build Coastguard Worker 
823*3f1979aaSAndroid Build Coastguard Worker     if (!strcmp(argv[i], "--test-simd")) {
824*3f1979aaSAndroid Build Coastguard Worker       int numErrs = validate_pffft_simd_ex(stdout);
825*3f1979aaSAndroid Build Coastguard Worker       fprintf( ( numErrs != 0 ? stderr : stdout ), "validate_pffft_simd_ex() returned %d errors!\n", numErrs);
826*3f1979aaSAndroid Build Coastguard Worker       return ( numErrs > 0 ? 1 : 0 );
827*3f1979aaSAndroid Build Coastguard Worker     }
828*3f1979aaSAndroid Build Coastguard Worker 
829*3f1979aaSAndroid Build Coastguard Worker     if (!strcmp(argv[i], "--no-len")) {
830*3f1979aaSAndroid Build Coastguard Worker       testOutLens = 0;
831*3f1979aaSAndroid Build Coastguard Worker     }
832*3f1979aaSAndroid Build Coastguard Worker     else if (!strcmp(argv[i], "--no-bench")) {
833*3f1979aaSAndroid Build Coastguard Worker       benchConv = 0;
834*3f1979aaSAndroid Build Coastguard Worker     }
835*3f1979aaSAndroid Build Coastguard Worker     else if (!strcmp(argv[i], "--quick")) {
836*3f1979aaSAndroid Build Coastguard Worker       quickTest = 1;
837*3f1979aaSAndroid Build Coastguard Worker     }
838*3f1979aaSAndroid Build Coastguard Worker     else if (!strcmp(argv[i], "--slow")) {
839*3f1979aaSAndroid Build Coastguard Worker       slowTest = 1;
840*3f1979aaSAndroid Build Coastguard Worker     }
841*3f1979aaSAndroid Build Coastguard Worker     else if (!strcmp(argv[i], "--real")) {
842*3f1979aaSAndroid Build Coastguard Worker       testCplx = 0;
843*3f1979aaSAndroid Build Coastguard Worker     }
844*3f1979aaSAndroid Build Coastguard Worker     else if (!strcmp(argv[i], "--cplx")) {
845*3f1979aaSAndroid Build Coastguard Worker       testReal = 0;
846*3f1979aaSAndroid Build Coastguard Worker     }
847*3f1979aaSAndroid Build Coastguard Worker     else if (!strcmp(argv[i], "--sym")) {
848*3f1979aaSAndroid Build Coastguard Worker       testSymetric = 1;
849*3f1979aaSAndroid Build Coastguard Worker     }
850*3f1979aaSAndroid Build Coastguard Worker     else /* if (!strcmp(argv[i], "--help")) */ {
851*3f1979aaSAndroid Build Coastguard Worker       printf("usage: %s [--test-simd] [--no-len] [--no-bench] [--quick|--slow] [--real|--cplx] [--sym]\n", argv[0]);
852*3f1979aaSAndroid Build Coastguard Worker       exit(1);
853*3f1979aaSAndroid Build Coastguard Worker     }
854*3f1979aaSAndroid Build Coastguard Worker   }
855*3f1979aaSAndroid Build Coastguard Worker 
856*3f1979aaSAndroid Build Coastguard Worker 
857*3f1979aaSAndroid Build Coastguard Worker   if (testOutLens)
858*3f1979aaSAndroid Build Coastguard Worker   {
859*3f1979aaSAndroid Build Coastguard Worker     for ( k = 0; k < 3; ++k )
860*3f1979aaSAndroid Build Coastguard Worker     {
861*3f1979aaSAndroid Build Coastguard Worker       if ( (k == 0 && !testReal) || (k > 0 && !testCplx) )
862*3f1979aaSAndroid Build Coastguard Worker         continue;
863*3f1979aaSAndroid Build Coastguard Worker       printf("\n\n==========\n");
864*3f1979aaSAndroid Build Coastguard Worker       printf("testing %s %s output lengths ..\n", (k == 0 ? "real" : "cplx"), ( k == 0 ? "" : (k==1 ? "2x" : "single") ) );
865*3f1979aaSAndroid Build Coastguard Worker       printf("==========\n");
866*3f1979aaSAndroid Build Coastguard Worker       flagsA = (k == 0) ? 0 : PFFASTCONV_CPLX_INP_OUT;
867*3f1979aaSAndroid Build Coastguard Worker       flagsB = flagsA | ( testSymetric ? PFFASTCONV_SYMMETRIC : 0 );
868*3f1979aaSAndroid Build Coastguard Worker       flagsC = flagsB | PFFASTCONV_CPLX_SINGLE_FFT;
869*3f1979aaSAndroid Build Coastguard Worker       testOutLen = 1;
870*3f1979aaSAndroid Build Coastguard Worker       printDbg = 0;
871*3f1979aaSAndroid Build Coastguard Worker       printSpeed = 0;
872*3f1979aaSAndroid Build Coastguard Worker       for ( M = 128 - 4; M <= (quickTest ? 128+16 : 256); ++M )
873*3f1979aaSAndroid Build Coastguard Worker       {
874*3f1979aaSAndroid Build Coastguard Worker         if ( (M % 16) != 0 && testSymetric )
875*3f1979aaSAndroid Build Coastguard Worker           continue;
876*3f1979aaSAndroid Build Coastguard Worker         result |= test(M, flagsB, testOutLen, printDbg, printSpeed);
877*3f1979aaSAndroid Build Coastguard Worker       }
878*3f1979aaSAndroid Build Coastguard Worker     }
879*3f1979aaSAndroid Build Coastguard Worker   }
880*3f1979aaSAndroid Build Coastguard Worker 
881*3f1979aaSAndroid Build Coastguard Worker   if (benchConv)
882*3f1979aaSAndroid Build Coastguard Worker   {
883*3f1979aaSAndroid Build Coastguard Worker     for ( k = 0; k < 3; ++k )
884*3f1979aaSAndroid Build Coastguard Worker     {
885*3f1979aaSAndroid Build Coastguard Worker       if ( (k == 0 && !testReal) || (k > 0 && !testCplx) )
886*3f1979aaSAndroid Build Coastguard Worker         continue;
887*3f1979aaSAndroid Build Coastguard Worker       printf("\n\n==========\n");
888*3f1979aaSAndroid Build Coastguard Worker       printf("starting %s %s benchmark against linear convolutions ..\n", (k == 0 ? "real" : "cplx"), ( k == 0 ? "" : (k==1 ? "2x" : "single") ) );
889*3f1979aaSAndroid Build Coastguard Worker       printf("==========\n");
890*3f1979aaSAndroid Build Coastguard Worker       flagsA = (k == 0) ? 0 : PFFASTCONV_CPLX_INP_OUT;
891*3f1979aaSAndroid Build Coastguard Worker       flagsB = flagsA | ( testSymetric ? PFFASTCONV_SYMMETRIC : 0 );
892*3f1979aaSAndroid Build Coastguard Worker       flagsC = flagsB | ( k == 2 ? PFFASTCONV_CPLX_SINGLE_FFT : 0 );
893*3f1979aaSAndroid Build Coastguard Worker       testOutLen = 0;
894*3f1979aaSAndroid Build Coastguard Worker       printDbg = 0;
895*3f1979aaSAndroid Build Coastguard Worker       printSpeed = 1;
896*3f1979aaSAndroid Build Coastguard Worker       if (!slowTest) {
897*3f1979aaSAndroid Build Coastguard Worker         result |= test( 32,     flagsC, testOutLen, printDbg, printSpeed);
898*3f1979aaSAndroid Build Coastguard Worker         result |= test( 32+ 16, flagsC, testOutLen, printDbg, printSpeed);
899*3f1979aaSAndroid Build Coastguard Worker         result |= test( 64,     flagsC, testOutLen, printDbg, printSpeed);
900*3f1979aaSAndroid Build Coastguard Worker         result |= test( 64+ 32, flagsC, testOutLen, printDbg, printSpeed);
901*3f1979aaSAndroid Build Coastguard Worker         result |= test(128,     flagsC, testOutLen, printDbg, printSpeed);
902*3f1979aaSAndroid Build Coastguard Worker       }
903*3f1979aaSAndroid Build Coastguard Worker       if (!quickTest) {
904*3f1979aaSAndroid Build Coastguard Worker         result |= test(128+ 64, flagsC, testOutLen, printDbg, printSpeed);
905*3f1979aaSAndroid Build Coastguard Worker         result |= test(256,     flagsC, testOutLen, printDbg, printSpeed);
906*3f1979aaSAndroid Build Coastguard Worker         result |= test(256+128, flagsC, testOutLen, printDbg, printSpeed);
907*3f1979aaSAndroid Build Coastguard Worker         result |= test(512,     flagsC, testOutLen, printDbg, printSpeed);
908*3f1979aaSAndroid Build Coastguard Worker         result |= test(1024,    flagsC, testOutLen, printDbg, printSpeed);
909*3f1979aaSAndroid Build Coastguard Worker       }
910*3f1979aaSAndroid Build Coastguard Worker     }
911*3f1979aaSAndroid Build Coastguard Worker   }
912*3f1979aaSAndroid Build Coastguard Worker 
913*3f1979aaSAndroid Build Coastguard Worker   return result;
914*3f1979aaSAndroid Build Coastguard Worker }
915*3f1979aaSAndroid Build Coastguard Worker 
916